Skip to content

Commit

Permalink
find protein names
Browse files Browse the repository at this point in the history
  • Loading branch information
gamcil committed Jul 16, 2021
1 parent b89e47d commit 58a6616
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions synthaser/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -38,14 +39,23 @@ def get_NRPS_PKS(features):
if ftype == "PKS":
pks.append(feature)
elif ftype == "NRPS":
nrps.append(ftype)
nrps.append(feature)
return pks, nrps


def get_feature_label(feature):
tags = ["protein_id", "locus_tag", "ID", "Name", "gene"]
for tag in tags:
try:
return feature.annotations[tag]
except KeyError:
continue
raise KeyError(f"Could not find a label for feature:\n {feature}")


def write(path, features):
with open(path, "w") as fp:
LOG.info("Writing %i feature(s) to file: %s", len(features), fp.name)
SeqIO.write(features, fp, "fasta")
LOG.info("Writing %i feature(s) to file: %s", len(features), path.name)
SeqIO.write(features, path, "fasta")


def convert(path, antismash=False):
Expand All @@ -66,7 +76,8 @@ def convert(path, antismash=False):
# Blank out descriptions for clean FASTA headers
for feature in features:
feature.description = ""

feature.id = get_feature_label(feature)

# If antismash=True, look for PKS and NRPS sequences only
if antismash:
LOG.info("Finding antiSMASH PKS and NRPS features")
Expand Down

0 comments on commit 58a6616

Please sign in to comment.