Skip to content

Commit

Permalink
Add support for mito m. in translate_from (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
larrybabb authored Mar 20, 2024
1 parent cef704d commit dc115d1
Show file tree
Hide file tree
Showing 19 changed files with 941 additions and 495 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ extras =
psycopg2-binary
biocommons.seqrepo>=0.5.1
bioutils>=0.5.2
hgvs>=1.4
hgvs@git+https://github.com/biocommons/hgvs@225-uncertain-ranges
requests
dill~=0.3.7
click
Expand Down
47 changes: 31 additions & 16 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,9 @@ class Translator:
r"(?P<chr>[^-]+)-(?P<pos>\d+)-(?P<ref>[ACGTURYKMSWBDHVN]+)-(?P<alt>[ACGTURYKMSWBDHVN]+)",
re.IGNORECASE
)
hgvs_re = re.compile(r"[^:]+:[cgnpr]\.")
hgvs_re = re.compile(r"[^:]+:[cgmnpr]\.")
spdi_re = re.compile(r"(?P<ac>[^:]+):(?P<pos>\d+):(?P<del_len_or_seq>\w*):(?P<ins_seq>\w*)")


def __init__(
self,
data_proxy,
Expand Down Expand Up @@ -86,17 +85,34 @@ def _get_hgvs_refget_ac(self, sv: hgvs.sequencevariant.SequenceVariant):

@staticmethod
def _ir_stype(a):
"""Get accession's sequence type"""
if a.startswith("refseq:NM_"):
return "n"
if a.startswith("refseq:NP_"):
return "p"
if a.startswith("refseq:NG_"):
return "g"
if a.startswith("refseq:NC_"):
return "g"
if a.startswith("GRCh"):
return "g"
"""
The purpose of this function is to provide a convenient way to extract the sequence type from an accession by matching its prefix to a known set of prefixes.
Args:
a (str): The accession string.
Returns:
str or None: The sequence type associated with the accession string, or None if no matching prefix is found.
"""

prefix_dict = {
"refseq:NM_": "n",
"refseq:NC_012920": "m",
"refseq:NG_": "g",
"refseq:NC_00": "g",
"refseq:NW_": "g",
"refseq:NT_": "g",
"refseq:NR_": "n",
"refseq:NP_": "p",
"refseq:XM_": "n",
"refseq:XR_": "n",
"refseq:XP_": "p",
"GRCh": "g",
}

for prefix, stype in prefix_dict.items():
if a.startswith(prefix):
return stype
return None

def translate_from(self, var, fmt=None, **kwargs):
Expand Down Expand Up @@ -152,7 +168,6 @@ def translate_to(self, vo, fmt):
t = self.to_translators[fmt]
return t(vo)


############################################################################
# INTERNAL

Expand Down Expand Up @@ -580,7 +595,7 @@ def _to_hgvs(self, vo, namespace="refseq"):
if ns.startswith("GRC") and namespace is None:
continue

if not (any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG"))):
if not (any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG", "NR", "NW", "NT", "XM", "XR", "XP"))):
continue

var.ac = a
Expand Down Expand Up @@ -727,7 +742,7 @@ def _post_process_imported_cnv(self, copy_number):

from ga4gh.vrs.dataproxy import create_dataproxy
# dp = create_dataproxy("seqrepo+file:///usr/local/share/seqrepo/latest")
dp = create_dataproxy("seqrepo + http://localhost:5555/seqrepo")
dp = create_dataproxy("seqrepo + http://localhost:5000/seqrepo")
tlr = Translator(data_proxy=dp)

expressions = [
Expand Down
51 changes: 49 additions & 2 deletions tests/extras/cassettes/test_from_beacon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:19
response:
Expand All @@ -35,7 +35,54 @@ interactions:
Content-Type:
- application/json
Date:
- Mon, 19 Feb 2024 07:31:12 GMT
- Wed, 13 Mar 2024 11:29:22 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:MT
response:
body:
string: "{\n \"added\": \"2016-08-24T06:13:07Z\",\n \"aliases\": [\n \"Ensembl:MT\",\n
\ \"ensembl:MT\",\n \"GRCh37.p10:MT\",\n \"GRCh37.p10:chrM\",\n \"GRCh37.p11:MT\",\n
\ \"GRCh37.p11:chrM\",\n \"GRCh37.p12:MT\",\n \"GRCh37.p12:chrM\",\n
\ \"GRCh37.p13:MT\",\n \"GRCh37.p13:chrM\",\n \"GRCh37.p2:MT\",\n
\ \"GRCh37.p2:chrM\",\n \"GRCh37.p5:MT\",\n \"GRCh37.p5:chrM\",\n
\ \"GRCh37.p9:MT\",\n \"GRCh37.p9:chrM\",\n \"GRCh38:MT\",\n \"GRCh38:chrM\",\n
\ \"GRCh38.p1:MT\",\n \"GRCh38.p1:chrM\",\n \"GRCh38.p10:MT\",\n \"GRCh38.p10:chrM\",\n
\ \"GRCh38.p11:MT\",\n \"GRCh38.p11:chrM\",\n \"GRCh38.p12:MT\",\n
\ \"GRCh38.p12:chrM\",\n \"GRCh38.p2:MT\",\n \"GRCh38.p2:chrM\",\n
\ \"GRCh38.p3:MT\",\n \"GRCh38.p3:chrM\",\n \"GRCh38.p4:MT\",\n \"GRCh38.p4:chrM\",\n
\ \"GRCh38.p5:MT\",\n \"GRCh38.p5:chrM\",\n \"GRCh38.p6:MT\",\n \"GRCh38.p6:chrM\",\n
\ \"GRCh38.p7:MT\",\n \"GRCh38.p7:chrM\",\n \"GRCh38.p8:MT\",\n \"GRCh38.p8:chrM\",\n
\ \"GRCh38.p9:MT\",\n \"GRCh38.p9:chrM\",\n \"MD5:c68f52674c9fb33aef52dcf399755519\",\n
\ \"NCBI:NC_012920.1\",\n \"refseq:NC_012920.1\",\n \"SEGUID:eQNFYXnsCzhp/MkfBUBVnuFZzTA\",\n
\ \"SHA1:7903456179ec0b3869fcc91f0540559ee159cd30\",\n \"VMC:GS_k3grVkjY-hoWcCUojHw6VU6GE3MZ8Sct\",\n
\ \"sha512t24u:k3grVkjY-hoWcCUojHw6VU6GE3MZ8Sct\",\n \"ga4gh:SQ.k3grVkjY-hoWcCUojHw6VU6GE3MZ8Sct\",\n
\ \"hs37-1kg:MT\",\n \"hs37d5:MT\"\n ],\n \"alphabet\": \"ACGNT\",\n
\ \"length\": 16569\n}\n"
headers:
Connection:
- close
Content-Length:
- '1355'
Content-Type:
- application/json
Date:
- Wed, 13 Mar 2024 11:29:22 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand Down
Loading

0 comments on commit dc115d1

Please sign in to comment.