From 9ed9a47c086dbd34940de7e76d82f89984c8c406 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 21 Aug 2024 10:59:48 -0400 Subject: [PATCH] revert to basic tuple (lightweight) --- .../mappers/exon_genomic_coords.py | 29 ++++++------------- tests/mappers/test_exon_genomic_coords.py | 12 +++++--- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index dae6ced..25f0235 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -20,17 +20,6 @@ _logger = logging.getLogger(__name__) -class _Grch38Data(BaseModelForbidExtra): - """Model representing GRCh38 accession and position, with errors""" - - accession: StrictStr | None = Field( - None, description="GRCh38 genomic RefSeq accession." - ) - position: StrictInt | None = Field( - None, description="GRCh38 genomic position on `genomic_ac`." - ) - - class ExonCoord(BaseModelForbidExtra): """Model for representing exon coordinate data""" @@ -727,12 +716,11 @@ async def _genomic_to_tx_segment( genomic_ac = genomic_acs[0] # Always liftover to GRCh38 - grch38_data, err_msg = await self._get_grch38_ac_pos( + genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos( genomic_ac, genomic_pos ) if err_msg: return GenomicTxSeg(errors=[err_msg]) - genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position if not transcript: # Select a transcript if not provided @@ -875,20 +863,20 @@ async def _genomic_to_tx_segment( async def _get_grch38_ac_pos( self, genomic_ac: str, genomic_pos: int, grch38_ac: str | None = None - ) -> tuple[_Grch38Data | None, str | None]: + ) -> tuple[str | None, int | None, str | None]: """Get GRCh38 genomic representation for accession and position :param genomic_ac: RefSeq genomic accession (GRCh37 or GRCh38 assembly) :param genomic_pos: Genomic position on ``genomic_ac`` :param grch38_ac: A valid GRCh38 genomic accession for ``genomic_ac``. If not provided, will attempt to retrieve associated GRCh38 accession from UTA. - :return: Tuple containing GRCh38 accession and position, and errors if unable to - get GRCh38 representation + :return: Tuple containing GRCh38 accession, GRCh38 position, and error message + if unable to get GRCh38 representation """ if not grch38_ac: grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac) if not grch38_ac: - return None, f"Unrecognized genomic accession: {genomic_ac}." + return None, None, f"Unrecognized genomic accession: {genomic_ac}." grch38_ac = grch38_ac[0] @@ -904,6 +892,7 @@ async def _get_grch38_ac_pos( genomic_ac, ) return ( + None, None, f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly.", ) @@ -914,6 +903,7 @@ async def _get_grch38_ac_pos( ) if liftover_data is None: return ( + None, None, f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful.", ) @@ -921,7 +911,7 @@ async def _get_grch38_ac_pos( genomic_pos = liftover_data[1] genomic_ac = grch38_ac - return _Grch38Data(accession=genomic_ac, position=genomic_pos), None + return genomic_ac, genomic_pos, None async def _get_genomic_ac_gene( self, @@ -1070,12 +1060,11 @@ async def _get_tx_seg_genomic_metadata( grch38_ac = mane_data["GRCh38_chr"] # Always liftover to GRCh38 - grch38_data, err_msg = await self._get_grch38_ac_pos( + genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos( genomic_ac, genomic_pos, grch38_ac=grch38_ac ) if err_msg: return GenomicTxSeg(errors=[err_msg]) - genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position tx_exons = await self._get_all_exon_coords(tx_ac, genomic_ac=grch38_ac) if not tx_exons: diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index f0e39b1..b2f5d8a 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -8,7 +8,6 @@ ExonCoord, GenomicTxSeg, GenomicTxSegService, - _Grch38Data, ) from cool_seq_tool.schemas import ( Strand, @@ -708,7 +707,7 @@ async def test_get_grch38_ac_pos(test_egc_mapper): """Test that _get_grch38_ac_pos works correctly""" grch38_ac = "NC_000001.11" grch38_pos = 154192135 - expected = _Grch38Data(accession=grch38_ac, position=grch38_pos), None + expected = grch38_ac, grch38_pos, None # GRCh37 provided grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.10", 154164611) @@ -727,17 +726,22 @@ async def test_get_grch38_ac_pos(test_egc_mapper): # Unrecognized accession invalid_ac = "NC_0000026.10" grch38_data = await test_egc_mapper._get_grch38_ac_pos(invalid_ac, 154164611) - assert grch38_data == (None, f"Unrecognized genomic accession: {invalid_ac}.") + assert grch38_data == (None, None, f"Unrecognized genomic accession: {invalid_ac}.") # GRCh36 used grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.9", 154164611) - assert grch38_data == (None, "`genomic_ac` must use GRCh37 or GRCh38 assembly.") + assert grch38_data == ( + None, + None, + "`genomic_ac` must use GRCh37 or GRCh38 assembly.", + ) # Unsuccessful liftover grch38_data = await test_egc_mapper._get_grch38_ac_pos( "NC_000001.10", 9999999999999999999 ) assert grch38_data == ( + None, None, "Lifting over 9999999999999999999 on NC_000001.10 from GRCh37 to GRCh38 was unsuccessful.", )