Merge pull request #876 from uclahs-cds/czhu-fix-split-fasta

Fix splitFasta: NovelORF peptides from coding transcripts not recognized correctly
uclahs-cds · Jun 23, 2024 · 305ffbb · 305ffbb
2 parents b4da46e + 2593943
commit 305ffbb
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ## [Unreleased]
 
+## [1.4.2] - 2024-06-23
+
+- Fixed `splitFasta` that NovelORF peptides coding transcripts not recognized correctly.
+
 ## [1.4.1] - 2024-05-26
 
 - Fixed `VariantPepidePool` that old versions of `SeqUtils.molecular_weight` don't handle `SeqRecord` objects. #874

diff --git a/moPepGen/__init__.py b/moPepGen/__init__.py
@@ -8,7 +8,7 @@
 from . import constant
 
 
-__version__ = '1.4.1'
+__version__ = '1.4.2'
 
 ## Error messages
 ERROR_INDEX_IN_INTRON = 'The genomic index seems to be in an intron'

diff --git a/moPepGen/aa/VariantPeptideLabel.py b/moPepGen/aa/VariantPeptideLabel.py
@@ -203,7 +203,7 @@ def from_variant_peptide(peptide:AminoAcidSeqRecord,
             info = VariantPeptideInfo(str(variant_id), gene_ids, var_ids, variant_id.index)
 
             if check_source:
-                if tx_id not in coding_tx:
+                if variant_id.orf_id is not None:
                     info.sources.add(constant.SOURCE_NOVEL_ORF, group_map=group_map)
 
                 for gene_id, _ids in var_ids.items():

diff --git a/test/unit/test_peptide_pool_splitter.py b/test/unit/test_peptide_pool_splitter.py
@@ -283,7 +283,7 @@ def test_from_variant_peptide_noncoding(self):
         infos = VariantPeptideInfo.from_variant_peptide(peptide, tx2gene, coding_tx, label_map)
         self.assertIn('NovelORF', infos[0].sources)
 
-        peptide = create_aa_record('KHIRJ','ENST0004|1')
+        peptide = create_aa_record('KHIRJ','ENST0004|ORF1|1')
         infos = VariantPeptideInfo.from_variant_peptide(peptide, tx2gene, coding_tx, label_map)
         self.assertIn('NovelORF', infos[0].sources)
 
@@ -543,7 +543,7 @@ def test_split_database_source_comb_order(self):
         peptides_data = [
             [
                 'SSSSSSSR',
-                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|1'
+                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|ORF2|1'
             ]
         ]
         peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data})
@@ -576,7 +576,7 @@ def test_split_database_source_comb_order_case2(self):
         peptides_data = [
             [
                 'SSSSFSSR',
-                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|W2F-5|1'
+                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|W2F-5|ORF-2|1'
             ]
         ]
         peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data})

diff --git a/test/unit/test_peptide_pool_summarizer.py b/test/unit/test_peptide_pool_summarizer.py
@@ -50,7 +50,7 @@ def test_summarize_fasta_source_comb_order(self):
         peptides_data = [
             [
                 'SSSSSSSR',
-                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|1'
+                'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|ORF2|1'
             ]
         ]
         peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data})