Skip to content

Commit

Permalink
Merge pull request #230 from ga4gh/2-alpha-allele-translator
Browse files Browse the repository at this point in the history
Updates for spdi and hgvs to/from
  • Loading branch information
theferrit32 authored Aug 18, 2023
2 parents db1486c + 89331e9 commit 5ffdbb7
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 29 deletions.
36 changes: 22 additions & 14 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

from collections.abc import Mapping
from typing import Union
import logging
import re

Expand Down Expand Up @@ -87,7 +88,8 @@ def translate_to(self, vo, fmt):


############################################################################
## INTERNAL
# INTERNAL


def _from_beacon(self, beacon_expr, assembly_name=None):
"""Parse beacon expression into VRS Allele
Expand Down Expand Up @@ -324,7 +326,7 @@ def ir_stype(a):
if not self.is_valid_allele(vo):
raise ValueError("_to_hgvs requires a VRS Allele with SequenceLocation and LiteralSequenceExpression")

sequence = str(vo.location.sequence)
sequence = str(export_sequencelocation_sequence_id(vo.location.sequence))
aliases = self.data_proxy.translate_sequence_identifier(sequence, namespace)

# infer type of sequence based on accession
Expand All @@ -350,9 +352,10 @@ def ir_stype(a):
ref = self.data_proxy.get_sequence(sequence, start, end)
start += 1
ival = hgvs.location.Interval(
start=hgvs.location.start,
end=hgvs.location.end)
alt = str(vo.state.sequence) or None # "" => None
start=str(vo.location.start),
end=str(vo.location.end)
)
alt = str(vo.state.sequence.root) or None # "" => None
edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)

posedit = hgvs.posedit.PosEdit(pos=ival, edit=edit)
Expand Down Expand Up @@ -389,7 +392,6 @@ def ir_stype(a):

return list(set(hgvs_exprs))


def _to_spdi(self, vo, namespace="refseq"):
"""generates a *list* of SPDI expressions for VRS Allele.
Expand All @@ -411,11 +413,11 @@ def _to_spdi(self, vo, namespace="refseq"):
if not self.is_valid_allele(vo):
raise ValueError("_to_spdi requires a VRS Allele with SequenceLocation and LiteralSequenceExpression")

sequence = str(vo.location.sequence)
sequence = str(export_sequencelocation_sequence_id(vo.location.sequence))
aliases = self.data_proxy.translate_sequence_identifier(sequence, namespace)
aliases = [a.split(":")[1] for a in aliases]
start, end = vo.location.start, vo.location.end
spdi_tail = f":{start}:{end-start}:{vo.state.sequence}"
spdi_tail = f":{start}:{end-start}:{vo.state.sequence.root}"
spdis = [a + spdi_tail for a in aliases]
return spdis

Expand All @@ -434,13 +436,13 @@ def _hgvs_parser(self):


def _post_process_imported_allele(self, allele):
"""Provide common post-processing for imported Alleles IN-PLACE.
"""
Provide common post-processing for imported Alleles IN-PLACE.
"""

if self.translate_sequence_identifiers:
seq_id = self.data_proxy.translate_sequence_identifier(allele.location.sequence.root, "ga4gh")[0]
allele.location.sequence = seq_id
allele.location.sequence.root = seq_id

if self.normalize:
allele = normalize(allele, self.data_proxy)
Expand Down Expand Up @@ -469,10 +471,16 @@ def _seq_id_mapper(self, ir):
to_translators = {
"hgvs": _to_hgvs,
"spdi": _to_spdi,
#"gnomad": to_gnomad,
# "gnomad": to_gnomad,
}


def export_sequencelocation_sequence_id(
location_sequence: Union[models.IRI, models.SequenceReference]):
if isinstance(location_sequence, models.IRI):
return location_sequence.root
elif isinstance(location_sequence, models.SequenceReference):
return location_sequence.refgetAccession


if __name__ == "__main__":
Expand Down Expand Up @@ -504,8 +512,8 @@ def _seq_id_mapper(self, ir):
},
"type": "Allele"
}, {
"end": 22,
"start": 21,
"end": 22,
"start": 21,
}
]
formats = ["hgvs", "gnomad", "beacon", "spdi", "vrs", None]
Expand Down
28 changes: 24 additions & 4 deletions src/ga4gh/vrs/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,28 @@
_logger = logging.getLogger(__name__)


def _normalize_allele(allele, data_proxy):
sequence = SequenceProxy(data_proxy, allele.location.sequence)
def _normalize_allele(input_allele, data_proxy):
"""
Converts .location.sequence into an IRI if it is a SequenceReference because it makes the code simpler.
If it started as a sequence reference, put it back as one at the end.
"""
allele = pydantic_copy(input_allele)
sequence_reference = None
if isinstance(allele.location.sequence, models.SequenceReference):
sequence_reference = allele.location.sequence
allele.location.sequence = models.IRI(sequence_reference.refgetAccession)

sequence = SequenceProxy(data_proxy, allele.location.sequence.root)

ival = (allele.location.start, allele.location.end)

_allele_state = allele.state.type
_states_with_sequence = ["ReferenceLengthExpression", "LiteralSequenceExpression"]
if _allele_state in _states_with_sequence:
alleles = (None, allele.state.sequence)
alleles = (None, allele.state.sequence.root)
elif _allele_state == "RepeatedSequenceExpression" and \
allele.state.seq_expr.type in _states_with_sequence:
alleles = (None, allele.state.seq_expr.sequence.root)
else:
alleles = (None, "")

Expand All @@ -40,15 +53,22 @@ def _normalize_allele(allele, data_proxy):
new_allele.location.end = new_ival[1]

if new_allele.state.type in _states_with_sequence:
new_allele.state.sequence = new_alleles[1]
new_allele.state.sequence = models.SequenceString(new_alleles[1])
except ValueError:
# Occurs for ref agree Alleles (when alt = ref)
pass

if sequence_reference:
new_allele.location.sequence = sequence_reference

return new_allele


# TODO _normalize_genotype?


def _normalize_haplotype(o, data_proxy=None):

o.members = sorted(o.members, key=ga4gh_digest)
return o

Expand Down
7 changes: 5 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@ def dataproxy():

@pytest.fixture(scope="session")
def rest_dataproxy():
return SeqRepoRESTDataProxy(base_url="http://localhost:5000/seqrepo")
return SeqRepoRESTDataProxy(
base_url=os.environ.get(
"SEQREPO_REST_URL",
"http://localhost:5000/seqrepo"))


@pytest.fixture(scope="session")
def tlr(rest_dataproxy):
return Translator(
data_proxy=rest_dataproxy,
default_assembly_name="GRCh38",
# TODO: Set these to defaults and adjust relevant tests
# TODO: Set these to defaults and adjust relevant tests
identify=False,
normalize=False,
translate_sequence_identifiers=True,
Expand Down
60 changes: 51 additions & 9 deletions tests/extras/cassettes/test_to_spdi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,49 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000013.11
response:
body:
string: "{\n \"added\": \"2016-08-27T23:50:14Z\",\n \"aliases\": [\n \"GRCh38:13\",\n
\ \"GRCh38:chr13\",\n \"GRCh38.p1:13\",\n \"GRCh38.p1:chr13\",\n \"GRCh38.p10:13\",\n
\ \"GRCh38.p10:chr13\",\n \"GRCh38.p11:13\",\n \"GRCh38.p11:chr13\",\n
\ \"GRCh38.p12:13\",\n \"GRCh38.p12:chr13\",\n \"GRCh38.p2:13\",\n
\ \"GRCh38.p2:chr13\",\n \"GRCh38.p3:13\",\n \"GRCh38.p3:chr13\",\n
\ \"GRCh38.p4:13\",\n \"GRCh38.p4:chr13\",\n \"GRCh38.p5:13\",\n \"GRCh38.p5:chr13\",\n
\ \"GRCh38.p6:13\",\n \"GRCh38.p6:chr13\",\n \"GRCh38.p7:13\",\n \"GRCh38.p7:chr13\",\n
\ \"GRCh38.p8:13\",\n \"GRCh38.p8:chr13\",\n \"GRCh38.p9:13\",\n \"GRCh38.p9:chr13\",\n
\ \"MD5:a5437debe2ef9c9ef8f3ea2874ae1d82\",\n \"NCBI:NC_000013.11\",\n
\ \"refseq:NC_000013.11\",\n \"SEGUID:2oDBty0yKV9wHo7gg+Bt+fPgi5o\",\n
\ \"SHA1:da80c1b72d32295f701e8ee083e06df9f3e08b9a\",\n \"VMC:GS__0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n
\ \"sha512t24u:_0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n \"ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT\"\n
\ ],\n \"alphabet\": \"ACGKNTY\",\n \"length\": 114364328\n}\n"
headers:
Connection:
- close
Content-Length:
- '1002'
Content-Type:
- application/json
Date:
- Thu, 17 Aug 2023 03:03:25 GMT
Server:
- Werkzeug/2.2.3 Python/3.11.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT
response:
Expand All @@ -35,9 +77,9 @@ interactions:
Content-Type:
- application/json
Date:
- Mon, 19 Sep 2022 15:07:31 GMT
- Thu, 17 Aug 2023 03:03:25 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
- Werkzeug/2.2.3 Python/3.11.4
status:
code: 200
message: OK
Expand All @@ -51,7 +93,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32936731&end=32936732
response:
Expand All @@ -65,9 +107,9 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Sep 2022 15:07:31 GMT
- Thu, 17 Aug 2023 03:03:25 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
- Werkzeug/2.2.3 Python/3.11.4
status:
code: 200
message: OK
Expand All @@ -81,7 +123,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT
response:
Expand All @@ -107,9 +149,9 @@ interactions:
Content-Type:
- application/json
Date:
- Mon, 19 Sep 2022 15:07:32 GMT
- Thu, 17 Aug 2023 03:03:25 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
- Werkzeug/2.2.3 Python/3.11.4
status:
code: 200
message: OK
Expand Down

0 comments on commit 5ffdbb7

Please sign in to comment.