Skip to content

Commit

Permalink
Check if MANE transcript exists in UTA, if not select longest compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
jarbesfeld committed Nov 12, 2024
1 parent 6cf9577 commit a9c7422
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 8 deletions.
1 change: 1 addition & 0 deletions src/cool_seq_tool/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def __init__(
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
self.seqrepo_access,
self.uta_db,
self.mane_transcript,
self.mane_transcript_mappings,
self.liftover,
)
24 changes: 16 additions & 8 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
from cool_seq_tool.mappers.liftover import LiftOver
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
from cool_seq_tool.schemas import (
AnnotationLayer,
Assembly,
BaseModelForbidExtra,
ServiceMeta,
Expand Down Expand Up @@ -232,6 +234,7 @@ def __init__(
self,
seqrepo_access: SeqRepoAccess,
uta_db: UtaDatabase,
mane_transcript: ManeTranscript,
mane_transcript_mappings: ManeTranscriptMappings,
liftover: LiftOver,
) -> None:
Expand Down Expand Up @@ -261,6 +264,7 @@ def __init__(
"""
self.seqrepo_access = seqrepo_access
self.uta_db = uta_db
self.mane_transcript = mane_transcript
self.mane_transcript_mappings = mane_transcript_mappings
self.liftover = liftover

Expand Down Expand Up @@ -796,18 +800,23 @@ async def _genomic_to_tx_segment(
mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(
gene
)

if mane_transcripts:
if mane_transcripts and await self.uta_db.validate_mane_transcript_acc(
mane_transcripts
):
transcript = mane_transcripts[0]["RefSeq_nuc"]
else:
# Attempt to find a coding transcript if a MANE transcript
# cannot be found
results = await self.uta_db.get_transcripts(
gene=gene, alt_ac=genomic_ac
results = (
await self.mane_transcript.get_longest_compatible_transcript(
start_pos=genomic_pos,
end_pos=genomic_pos,
start_annotation_layer=AnnotationLayer.GENOMIC,
gene=gene,
)
)

if not results.is_empty():
transcript = results[0]["tx_ac"][0]
if results:
transcript = results.refseq
else:
# Run if gene is for a noncoding transcript
query = f"""
Expand All @@ -826,7 +835,6 @@ async def _genomic_to_tx_segment(
f"Could not find a transcript for {gene} on {genomic_ac}"
]
)

tx_exons = await self._get_all_exon_coords(
tx_ac=transcript, genomic_ac=genomic_ac
)
Expand Down
15 changes: 15 additions & 0 deletions src/cool_seq_tool/sources/uta_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,21 @@ async def validate_genomic_ac(self, ac: str) -> bool:
result = await self.execute_query(query)
return result[0][0]

async def validate_mane_transcript_acc(self, transcript_list: list[dict]) -> bool:
"""Return whether or not a MANE transcript exists in UTA
:param transcript_list: A list of MANE transcripts
:return ``True`` if transcript accession exists in UTA. ``False`` otherwise
"""
transcript = transcript_list[0]["RefSeq_nuc"]
query = f"""
SELECT *
FROM {self.schema}.tx_exon_aln_v
WHERE tx_ac = '{transcript}'
""" # noqa: S608
results = await self.execute_query(query)
return bool(results)

async def get_ac_descr(self, ac: str) -> str | None:
"""Return accession description. This is typically available only for accessions
from older (pre-GRCh38) builds.
Expand Down
10 changes: 10 additions & 0 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,16 @@ async def test_genomic_to_transcript(test_egc_mapper, tpm3_exon1, tpm3_exon8):
)
genomic_tx_seg_checks(resp, tpm3_exon8)

# Check if transcript exists in UTA. If not, return longest compatible transcript
resp = await test_egc_mapper._genomic_to_tx_segment(
22513522,
genomic_ac="NC_000016.10",
is_seg_start=False,
gene="NPIPB5",
get_nearest_transcript_junction=True,
)
assert resp.tx_ac == "NM_001135865.2"


@pytest.mark.asyncio()
async def test_tpm3(
Expand Down

0 comments on commit a9c7422

Please sign in to comment.