Skip to content

Commit

Permalink
fix: make gene optional param for converting genomic coords
Browse files Browse the repository at this point in the history

---------

Co-authored-by: Kori Kuzma <korikuzma@gmail.com>
  • Loading branch information
katiestahl and korikuzma authored Sep 26, 2024
1 parent 1444346 commit 6fa7efb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
45 changes: 39 additions & 6 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,18 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
"""Ensure that fields are (un)set depending on errors
:param values: Values in model
:raises ValueError: If `seg`, `gene`, `genomic_ac` and `tx_ac` are not
:raises ValueError: If `seg`, `genomic_ac` and `tx_ac` are not
provided when there are no errors
:return: Values in model
"""
if not values.get("errors") and not all(
(
values.get("seg"),
values.get("gene"),
values.get("genomic_ac"),
values.get("tx_ac"),
)
):
err_msg = "`seg`, `gene`, `genomic_ac` and `tx_ac` must be provided"
err_msg = "`seg`, `genomic_ac` and `tx_ac` must be provided"
raise ValueError(err_msg)
return values

Expand Down Expand Up @@ -154,20 +153,21 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
on errors
:param values: Values in model
:raises ValueError: If `gene`, `genomic_ac`, `tx_ac` and `seg_start` or `seg_end`
:raises ValueError: If `genomic_ac`, `tx_ac` and `seg_start` or `seg_end`
not provided when there are no errors
:return: Values in model, including service metadata
"""
values["service_meta"] = service_meta()
if not values.get("errors") and not all(
(
values.get("gene"),
values.get("genomic_ac"),
values.get("tx_ac"),
values.get("seg_start") or values.get("seg_end"),
)
):
err_msg = "`gene`, `genomic_ac`, `tx_ac` and `seg_start` or `seg_end` must be provided"
err_msg = (
"`genomic_ac`, `tx_ac` and `seg_start` or `seg_end` must be provided"
)
raise ValueError(err_msg)

return values
Expand Down Expand Up @@ -866,6 +866,13 @@ async def _genomic_to_tx_segment(
if err_msg:
return GenomicTxSeg(errors=[err_msg])

# gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
if not gene:
_gene, err_msg = await self._get_tx_ac_gene(transcript)
if err_msg:
return GenomicTxSeg(errors=[err_msg])
gene = _gene

return GenomicTxSeg(
gene=gene,
genomic_ac=genomic_ac,
Expand Down Expand Up @@ -1011,6 +1018,32 @@ async def _get_genomic_ac_gene(

return results[0]["hgnc"], None

async def _get_tx_ac_gene(
self,
tx_ac: str,
) -> tuple[str | None, str | None]:
"""Get gene given a transcript.
If multiple genes are found for a given ``tx_ac``, only one
gene will be returned.
:param tx_ac: RefSeq transcript, e.g. ``"NM_004333.6"``
:return: HGNC gene symbol associated to transcript and
warning
"""
query = f"""
SELECT DISTINCT hgnc
FROM {self.uta_db.schema}.tx_exon_aln_v
WHERE tx_ac = '{tx_ac}'
ORDER BY hgnc
LIMIT 1;
""" # noqa: S608
results = await self.uta_db.execute_query(query)
if not results:
return None, f"No gene(s) found given {tx_ac}"

return results[0]["hgnc"], None

async def _get_tx_seg_genomic_metadata(
self,
genomic_ac: str,
Expand Down
5 changes: 1 addition & 4 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,10 +934,7 @@ async def test_genomic_to_transcript_fusion_context(
"get_nearest_transcript_junction": True,
}
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
assert (
resp.errors[0]
== "`gene` or `transcipt` must be provided to select the adjacent transcript junction"
)
assert resp.errors[0] == "Must provide either `gene` or `transcript`"

inputs = { # Test when transcript is provided
"chromosome": "5",
Expand Down

0 comments on commit 6fa7efb

Please sign in to comment.