Skip to content

Commit

Permalink
change output + add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Aug 21, 2024
1 parent d29b41e commit d0f7a04
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 80 deletions.
125 changes: 45 additions & 80 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,6 @@
_logger = logging.getLogger(__name__)


def _check_errors(
values: dict,
required_fields: list[str],
either_or_fields: list[tuple[str, str]] | None = None,
) -> dict:
"""Ensure that required fields are set if ``errors`` field is empty
:param values: Values in model
:param required_fields: List of field names that are required if there are no errors
:param either_or_fields: List of field names where at least one field is required if
there are no errors
:raises ValueError: If required or either/or fields are not provided when there are
no errors
:return: Values in model
"""
if not values.get("errors"):
if any(
values.get(required_field) is None for required_field in required_fields
):
err_msg = f"The following fields are required: {required_fields}."
raise ValueError(err_msg)

if either_or_fields:
for field1, field2 in either_or_fields:
if values.get(field1) is None and values.get(field2) is None:
err_msg = f"Either `{field1}` or `{field2}` is required."
raise ValueError(err_msg)

return values


class _Grch38Data(BaseModelForbidExtra):
"""Model representing GRCh38 accession and position, with errors"""

Expand All @@ -60,17 +29,6 @@ class _Grch38Data(BaseModelForbidExtra):
position: StrictInt | None = Field(
None, description="GRCh38 genomic position on `genomic_ac`."
)
errors: list[StrictStr] = Field([], description="Error messages.")

@model_validator(mode="before")
def check_errors(cls, values: dict) -> dict: # noqa: N805
"""Ensure that fields are (un)set depending on errors
:param values: Values in model
:raises ValueError: If `accession` and `position` are not provided when there are no errors
:return: Values in model
"""
return _check_errors(values, required_fields=["accession", "position"])


class ExonCoord(BaseModelForbidExtra):
Expand Down Expand Up @@ -154,9 +112,17 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
provided when there are no errors
:return: Values in model
"""
return _check_errors(
values, required_fields=["seg", "gene", "genomic_ac", "tx_ac"]
)
if not values.get("errors") and not all(
(
values.get("seg"),
values.get("gene"),
values.get("genomic_ac"),
values.get("tx_ac"),
)
):
err_msg = "`seg`, `gene`, `genomic_ac` and `tx_ac` must be provided"
raise ValueError(err_msg)
return values

model_config = ConfigDict(
json_schema_extra={
Expand Down Expand Up @@ -204,11 +170,18 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
:return: Values in model, including service metadata
"""
values["service_meta"] = service_meta()
return _check_errors(
values,
required_fields=["gene", "genomic_ac", "tx_ac"],
either_or_fields=[("seg_start", "seg_end")],
)
if not values.get("errors") and not all(
(
values.get("gene"),
values.get("genomic_ac"),
values.get("tx_ac"),
values.get("seg_start") or values.get("seg_end"),
)
):
err_msg = "`gene`, `genomic_ac`, `tx_ac` and `seg_start` or `seg_end` must be provided"
raise ValueError(err_msg)

return values

model_config = ConfigDict(
json_schema_extra={
Expand Down Expand Up @@ -754,9 +727,11 @@ async def _genomic_to_tx_segment(
genomic_ac = genomic_acs[0]

# Always liftover to GRCh38
grch38_data = await self._get_grch38_ac_pos(genomic_ac, genomic_pos)
if grch38_data.errors:
return GenomicTxSeg(errors=grch38_data.errors)
grch38_data, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos
)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position

if not transcript:
Expand Down Expand Up @@ -900,24 +875,20 @@ async def _genomic_to_tx_segment(

async def _get_grch38_ac_pos(
self, genomic_ac: str, genomic_pos: int, grch38_ac: str | None = None
) -> _Grch38Data:
) -> tuple[_Grch38Data | None, str | None]:
"""Get GRCh38 genomic representation for accession and position
:param genomic_ac: RefSeq genomic accession
:param genomic_ac: RefSeq genomic accession (GRCh37 or GRCh38 assembly)
:param genomic_pos: Genomic position on ``genomic_ac``
:param grch38_ac: GRCh38 genomic accession for ``genomic_ac``. If not provided,
will attempt to retrieve associated GRCh38 accession from UTA.
:return: GRCh38 accession, GRCh38 position, and errors if unable to get GRCh38
representation
:param grch38_ac: A valid GRCh38 genomic accession for ``genomic_ac``. If not
provided, will attempt to retrieve associated GRCh38 accession from UTA.
:return: Tuple containing GRCh38 accession and position, and errors if unable to
get GRCh38 representation
"""
if not grch38_ac:
grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
if not grch38_ac:
return _Grch38Data(
accession=None,
position=None,
errors=[f"Unrecognized genomic accession: {genomic_ac}."],
)
return None, f"Unrecognized genomic accession: {genomic_ac}."

grch38_ac = grch38_ac[0]

Expand All @@ -932,31 +903,25 @@ async def _get_grch38_ac_pos(
Assembly.GRCH37.value,
genomic_ac,
)
return _Grch38Data(
accession=None,
position=None,
errors=[
f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly."
],
return (
None,
f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly.",
)

chromosome = chromosome[-1].split(":")[-1]
liftover_data = self.liftover.get_liftover(
chromosome, genomic_pos, Assembly.GRCH38
)
if liftover_data is None:
return _Grch38Data(
accession=None,
position=None,
errors=[
f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful."
],
return (
None,
f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful.",
)

genomic_pos = liftover_data[1]
genomic_ac = grch38_ac

return _Grch38Data(accession=genomic_ac, position=genomic_pos)
return _Grch38Data(accession=genomic_ac, position=genomic_pos), None

async def _get_genomic_ac_gene(
self,
Expand Down Expand Up @@ -1105,11 +1070,11 @@ async def _get_tx_seg_genomic_metadata(
grch38_ac = mane_data["GRCh38_chr"]

# Always liftover to GRCh38
grch38_data = await self._get_grch38_ac_pos(
grch38_data, err_msg = await self._get_grch38_ac_pos(
genomic_ac, genomic_pos, grch38_ac=grch38_ac
)
if grch38_data.errors:
return GenomicTxSeg(errors=grch38_data.errors)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
genomic_ac, genomic_pos = grch38_data.accession, grch38_data.position

tx_exons = await self._get_all_exon_coords(tx_ac, genomic_ac=grch38_ac)
Expand Down
41 changes: 41 additions & 0 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ExonCoord,
GenomicTxSeg,
GenomicTxSegService,
_Grch38Data,
)
from cool_seq_tool.schemas import (
Strand,
Expand Down Expand Up @@ -702,6 +703,46 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
assert len(actual.errors) > 0


@pytest.mark.asyncio()
async def test_get_grch38_ac_pos(test_egc_mapper):
"""Test that _get_grch38_ac_pos works correctly"""
grch38_ac = "NC_000001.11"
grch38_pos = 154192135
expected = _Grch38Data(accession=grch38_ac, position=grch38_pos), None

# GRCh37 provided
grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.10", 154164611)
assert grch38_data == expected

# GRCh38 provided, no grch38_ac
grch38_data = await test_egc_mapper._get_grch38_ac_pos(grch38_ac, grch38_pos)
assert grch38_data == expected

# GRCh38 and grch38_ac provided
grch38_data = await test_egc_mapper._get_grch38_ac_pos(
grch38_ac, grch38_pos, grch38_ac=grch38_ac
)
assert grch38_data == expected

# Unrecognized accession
invalid_ac = "NC_0000026.10"
grch38_data = await test_egc_mapper._get_grch38_ac_pos(invalid_ac, 154164611)
assert grch38_data == (None, f"Unrecognized genomic accession: {invalid_ac}.")

# GRCh36 used
grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.9", 154164611)
assert grch38_data == (None, "`genomic_ac` must use GRCh37 or GRCh38 assembly.")

# Unsuccessful liftover
grch38_data = await test_egc_mapper._get_grch38_ac_pos(
"NC_000001.10", 9999999999999999999
)
assert grch38_data == (
None,
"Lifting over 9999999999999999999 on NC_000001.10 from GRCh37 to GRCh38 was unsuccessful.",
)


@pytest.mark.asyncio()
async def test_get_all_exon_coords(
test_egc_mapper, nm_152263_exons, nm_152263_exons_genomic_coords
Expand Down

0 comments on commit d0f7a04

Please sign in to comment.