Skip to content

Commit

Permalink
revert to basic tuple (lightweight)
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Aug 21, 2024
1 parent d0f7a04 commit 9ed9a47
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 24 deletions.
29 changes: 9 additions & 20 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,6 @@
_logger = logging.getLogger(__name__)


class _Grch38Data(BaseModelForbidExtra):
"""Model representing GRCh38 accession and position, with errors"""

accession: StrictStr | None = Field(
None, description="GRCh38 genomic RefSeq accession."
)
position: StrictInt | None = Field(
None, description="GRCh38 genomic position on `genomic_ac`."
)


class ExonCoord(BaseModelForbidExtra):
"""Model for representing exon coordinate data"""

Expand Down Expand Up @@ -727,12 +716,11 @@ async def _genomic_to_tx_segment(
genomic_ac = genomic_acs[0]

# Always liftover to GRCh38
grch38_data, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos
)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position

if not transcript:
# Select a transcript if not provided
Expand Down Expand Up @@ -875,20 +863,20 @@ async def _genomic_to_tx_segment(

async def _get_grch38_ac_pos(
self, genomic_ac: str, genomic_pos: int, grch38_ac: str | None = None
) -> tuple[_Grch38Data | None, str | None]:
) -> tuple[str | None, int | None, str | None]:
"""Get GRCh38 genomic representation for accession and position
:param genomic_ac: RefSeq genomic accession (GRCh37 or GRCh38 assembly)
:param genomic_pos: Genomic position on ``genomic_ac``
:param grch38_ac: A valid GRCh38 genomic accession for ``genomic_ac``. If not
provided, will attempt to retrieve associated GRCh38 accession from UTA.
:return: Tuple containing GRCh38 accession and position, and errors if unable to
get GRCh38 representation
:return: Tuple containing GRCh38 accession, GRCh38 position, and error message
if unable to get GRCh38 representation
"""
if not grch38_ac:
grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
if not grch38_ac:
return None, f"Unrecognized genomic accession: {genomic_ac}."
return None, None, f"Unrecognized genomic accession: {genomic_ac}."

grch38_ac = grch38_ac[0]

Expand All @@ -904,6 +892,7 @@ async def _get_grch38_ac_pos(
genomic_ac,
)
return (
None,
None,
f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly.",
)
Expand All @@ -914,14 +903,15 @@ async def _get_grch38_ac_pos(
)
if liftover_data is None:
return (
None,
None,
f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful.",
)

genomic_pos = liftover_data[1]
genomic_ac = grch38_ac

return _Grch38Data(accession=genomic_ac, position=genomic_pos), None
return genomic_ac, genomic_pos, None

async def _get_genomic_ac_gene(
self,
Expand Down Expand Up @@ -1070,12 +1060,11 @@ async def _get_tx_seg_genomic_metadata(
grch38_ac = mane_data["GRCh38_chr"]

# Always liftover to GRCh38
grch38_data, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos, grch38_ac=grch38_ac
)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position

tx_exons = await self._get_all_exon_coords(tx_ac, genomic_ac=grch38_ac)
if not tx_exons:
Expand Down
12 changes: 8 additions & 4 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
ExonCoord,
GenomicTxSeg,
GenomicTxSegService,
_Grch38Data,
)
from cool_seq_tool.schemas import (
Strand,
Expand Down Expand Up @@ -708,7 +707,7 @@ async def test_get_grch38_ac_pos(test_egc_mapper):
"""Test that _get_grch38_ac_pos works correctly"""
grch38_ac = "NC_000001.11"
grch38_pos = 154192135
expected = _Grch38Data(accession=grch38_ac, position=grch38_pos), None
expected = grch38_ac, grch38_pos, None

# GRCh37 provided
grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.10", 154164611)
Expand All @@ -727,17 +726,22 @@ async def test_get_grch38_ac_pos(test_egc_mapper):
# Unrecognized accession
invalid_ac = "NC_0000026.10"
grch38_data = await test_egc_mapper._get_grch38_ac_pos(invalid_ac, 154164611)
assert grch38_data == (None, f"Unrecognized genomic accession: {invalid_ac}.")
assert grch38_data == (None, None, f"Unrecognized genomic accession: {invalid_ac}.")

# GRCh36 used
grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.9", 154164611)
assert grch38_data == (None, "`genomic_ac` must use GRCh37 or GRCh38 assembly.")
assert grch38_data == (
None,
None,
"`genomic_ac` must use GRCh37 or GRCh38 assembly.",
)

# Unsuccessful liftover
grch38_data = await test_egc_mapper._get_grch38_ac_pos(
"NC_000001.10", 9999999999999999999
)
assert grch38_data == (
None,
None,
"Lifting over 9999999999999999999 on NC_000001.10 from GRCh37 to GRCh38 was unsuccessful.",
)
Expand Down

0 comments on commit 9ed9a47

Please sign in to comment.