Skip to content

Commit

Permalink
build: update cool-seq-tool + ga4gh.vrs versions
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jan 10, 2024
1 parent e0a7709 commit 84fe73d
Show file tree
Hide file tree
Showing 27 changed files with 171 additions and 133 deletions.
4 changes: 2 additions & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ black = "*"
fastapi = "*"
uvicorn = "*"
pydantic = "==2.*"
"ga4gh.vrs" = {version = "~=2.0.0a1", extras = ["extras"]}
"ga4gh.vrs" = {version = "~=2.0.0a2", extras = ["extras"]}
gene-normalizer = "~=0.3.0.dev1"
boto3 = "*"
cool-seq-tool = "~=0.3.0.dev1"
cool-seq-tool = "~=0.4.0.dev1"
bioutils = "*"
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ install_requires =
fastapi
uvicorn
pydantic ==2.*
ga4gh.vrs[extras] ~= 2.0.0a1
ga4gh.vrs[extras] ~= 2.0.0a2
gene-normalizer ~=0.3.0.dev1
boto3
cool-seq-tool ~=0.3.0.dev1
cool-seq-tool ~=0.4.0.dev1
bioutils

tests_require =
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/validators.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ genomic_delins:
- query: X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG
- query: 16-2138199-GTGAG-G
- query: 1-55509715-AC-A
- query: chr6-31239170-C-CA
should_not_match:
- query: NC_000023.21:g.32386323delinsGA
- query: NC_000007.13:g.159138664delinsAT
Expand Down Expand Up @@ -197,7 +198,6 @@ genomic_insertion:
- query: NC_000022.10:g.30051593_30051594insT
- query: NC_000017.10:g.37880993_37880994insGCTTACGTGATG
- query: ERBB2 g.37880993_37880994insGCTTACGTGATG
- query: chr6-31239170-C-CA
should_not_match:
- query: NC_000022.10:g.51304566_51304567insT
- query: NC_000022.10:g.51304567_51304568insT
Expand Down
4 changes: 0 additions & 4 deletions tests/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,6 @@ async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g2
resp = await test_handler.normalize("DIS3 P63A")
assertion_checks(resp, dis3_p63a)

# Case where NA priority
resp = await test_handler.normalize("TP53 G262C")
assertion_checks(resp, tp53_g262c)


@pytest.mark.asyncio
async def test_polypeptide_truncation(test_handler, vhl):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_amplification_to_cx_var(
assert resp.copy_number_change is None
assert resp.amplification_label == "BRAF Amplification"
assert resp.warnings == [
"End inter-residue coordinate (9955599320) is out of " "index on NC_000007.13"
"End inter-residue coordinate (9955599321) is out of index on NC_000007.13"
]

# invalid gene
Expand Down
27 changes: 14 additions & 13 deletions variation/gnomad_vcf_to_protein_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import Dict, List, Optional, Tuple

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.mappers import MANETranscript
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.schemas import ResidueMode
from cool_seq_tool.sources import (
MANETranscriptMappings,
UTADatabase,
ManeTranscriptMappings,
UtaDatabase,
)

from variation.classify import Classify
Expand Down Expand Up @@ -125,9 +125,9 @@ def __init__(
classifier: Classify,
validator: Validate,
translator: Translate,
uta: UTADatabase,
mane_transcript: MANETranscript,
mane_transcript_mappings: MANETranscriptMappings,
uta: UtaDatabase,
mane_transcript: ManeTranscript,
mane_transcript_mappings: ManeTranscriptMappings,
) -> None:
"""Initialize the GnomadVcfToProteinVariation class
Expand Down Expand Up @@ -401,7 +401,7 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService:
g_start_pos = classification_token.pos
g_end_pos = classification_token.pos
ref_seq, w = self.seqrepo_access.get_reference_sequence(
alt_ac, g_start_pos
alt_ac, start=g_start_pos, end=g_start_pos
)
if not ref_seq:
all_warnings.add(w)
Expand Down Expand Up @@ -476,9 +476,9 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService:
current_mane_data,
(mane_c_pos_change[0] + 1, mane_c_pos_change[1] + 1),
)
if mane_p["pos"][0] > mane_p["pos"][1]:
mane_p["pos"] = (mane_p["pos"][1], mane_p["pos"][0])
p_ac = mane_p["refseq"]
if mane_p.pos[0] > mane_p.pos[1]:
mane_p.pos = (mane_p.pos[1], mane_p.pos[0])
p_ac = mane_p.refseq
aa_alt = self._get_gnomad_vcf_protein_alt(
classification_token,
alt_type,
Expand All @@ -493,12 +493,13 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService:
# mane_p is 0-based, but to_vrs allele takes 1-based
variation = self.to_vrs_allele(
p_ac,
mane_p["pos"][0],
mane_p["pos"][1],
mane_p.pos[0],
mane_p.pos[1],
"p",
alt_type,
[],
alt=aa_alt,
residue_mode=ResidueMode.INTER_RESIDUE,
)
if variation:
translation_result = TranslationResult(
Expand All @@ -508,7 +509,7 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService:

tr_copy = deepcopy(translation_result)
tr_copy.vrs_seq_loc_ac = p_ac
tr_copy.vrs_seq_loc_ac_status = mane_p["status"]
tr_copy.vrs_seq_loc_ac_status = mane_p.status

try:
vrs_variation = tr_copy.vrs_variation
Expand Down
7 changes: 4 additions & 3 deletions variation/hgvs_dup_del_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Dict, List, Optional, Union

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import ga4gh_identify
from ga4gh.vrs import models, normalize

Expand Down Expand Up @@ -138,11 +139,11 @@ def allele_mode(
return None

if alt_type == AltType.DUPLICATION:
# start is start - 1, end is end
ref, _ = self.seqrepo_access.get_reference_sequence(
vrs_seq_loc_ac,
location["start"] + 1,
location["end"] + 1,
start=location["start"],
end=location["end"],
residue_mode=ResidueMode.INTER_RESIDUE,
)

if ref:
Expand Down
6 changes: 3 additions & 3 deletions variation/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from urllib.parse import unquote

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.sources import UTADatabase
from cool_seq_tool.sources import UtaDatabase
from ga4gh.vrs import models

from variation.classify import Classify
Expand Down Expand Up @@ -38,7 +38,7 @@ def __init__(
classifier: Classify,
validator: Validate,
translator: Translate,
uta: UTADatabase,
uta: UtaDatabase,
) -> None:
"""Initialize Normalize class.
Expand All @@ -47,7 +47,7 @@ def __init__(
:param classifier: Classifier class for classifying tokens
:param validator: Validator class for validating valid inputs
:param translator: Translating valid inputs
:param UTADatabase uta: Access to db containing alignment data
:param UtaDatabase uta: Access to db containing alignment data
"""
super().__init__(
seqrepo_access,
Expand Down
6 changes: 3 additions & 3 deletions variation/to_copy_number_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from urllib.parse import unquote

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.sources import UTADatabase
from cool_seq_tool.sources import UtaDatabase
from ga4gh.core import ga4gh_identify
from ga4gh.vrs import models
from gene.query import QueryHandler as GeneQueryHandler
Expand Down Expand Up @@ -80,7 +80,7 @@ def __init__(
validator: Validate,
translator: Translate,
gene_normalizer: GeneQueryHandler,
uta: UTADatabase,
uta: UtaDatabase,
) -> None:
"""Initialize theToCopyNumberVariation class
Expand Down Expand Up @@ -673,7 +673,7 @@ def amplification_to_cx_var(
else:
# Validate start/end are actually on the sequence
_, w = self.seqrepo_access.get_reference_sequence(
sequence_id, start, end
sequence_id, start=start, end=end
)
if w:
warnings.append(w)
Expand Down
8 changes: 4 additions & 4 deletions variation/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from typing import List, Optional

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.mappers import MANETranscript
from cool_seq_tool.sources import UTADatabase
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.sources import UtaDatabase
from ga4gh.vrs import models

from variation.hgvs_dup_del_mode import HGVSDupDelMode
Expand Down Expand Up @@ -43,8 +43,8 @@ class Translate:
def __init__(
self,
seqrepo_access: SeqRepoAccess,
mane_transcript: MANETranscript,
uta: UTADatabase,
mane_transcript: ManeTranscript,
uta: UtaDatabase,
vrs: VRSRepresentation,
hgvs_dup_del_mode: HGVSDupDelMode,
) -> None:
Expand Down
39 changes: 25 additions & 14 deletions variation/translators/genomic_del_dup_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ async def translate(
grch38_data = None
vrs_variation = None
vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA
residue_mode = ResidueMode.RESIDUE

if do_liftover or endpoint_name == Endpoint.NORMALIZE:
errors = []
Expand All @@ -126,18 +127,23 @@ async def translate(
warnings += errors
return None

pos0 = grch38_data.pos0
pos1 = grch38_data.pos1
pos0 = grch38_data.pos0 - 1
if grch38_data.pos1 is None:
pos1 = grch38_data.pos0
else:
pos1 = grch38_data.pos1
residue_mode = ResidueMode.INTER_RESIDUE
ac = grch38_data.ac

if alt_type == AltType.DELETION:
if classification.nomenclature == Nomenclature.GNOMAD_VCF:
ref = classification.matching_tokens[0].ref
invalid_ref_msg = self.validate_reference_sequence(
ac,
pos0 - 1,
pos0 - 1 + len(ref),
pos0,
pos0 + (len(ref) - 1),
ref,
residue_mode=residue_mode,
)
if invalid_ref_msg:
warnings.append(invalid_ref_msg)
Expand All @@ -146,6 +152,7 @@ async def translate(
pos0 = classification.pos0
pos1 = classification.pos1
ac = validation_result.accession
grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1)

assembly = ClinVarAssembly.GRCH38
else:
Expand All @@ -168,10 +175,13 @@ async def translate(
warnings += errors
return None

pos0 = grch38_data.pos0
ac = grch38_data.ac
pos0 = grch38_data.pos0 - 1
if grch38_data.pos1 is None:
pos1 = grch38_data.pos0
else:
pos1 = grch38_data.pos1
ac = grch38_data.ac

residue_mode = ResidueMode.INTER_RESIDUE
self.is_valid(classification.gene_token, ac, pos0, pos1, errors)

if errors:
Expand All @@ -181,21 +191,22 @@ async def translate(
mane = await self.mane_transcript.get_mane_transcript(
ac,
pos0,
pos1,
"g",
end_pos=pos1,
try_longest_compatible=True,
residue_mode=ResidueMode.RESIDUE,
residue_mode=residue_mode,
gene=classification.gene_token.token
if classification.gene_token
else None,
)

if mane:
# mane is 0 - based, but we are using residue
ac = mane["refseq"]
vrs_seq_loc_ac_status = mane["status"]
pos0 = mane["pos"][0] + mane["coding_start_site"] + 1
pos1 = mane["pos"][1] + mane["coding_start_site"] + 1
ac = mane.refseq
vrs_seq_loc_ac_status = mane.status
pos0 = mane.pos[0] + mane.coding_start_site
pos1 = mane.pos[1] + mane.coding_start_site
residue_mode = ResidueMode.INTER_RESIDUE
else:
return None

Expand All @@ -209,7 +220,7 @@ async def translate(
if alt_type == AltType.INSERTION:
alt = classification.inserted_sequence

start = pos0 - 1
start = pos0 if residue_mode == ResidueMode.INTER_RESIDUE else pos0 - 1
end = pos1 if pos1 else pos0

refget_accession = get_refget_accession(self.seqrepo_access, ac, warnings)
Expand Down
24 changes: 14 additions & 10 deletions variation/translators/genomic_delins.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,40 +66,43 @@ async def translate(
mane = await self.mane_transcript.get_mane_transcript(
validation_result.accession,
classification.pos0,
classification.pos1
if classification.pos1 is not None
else classification.pos0,
AnnotationLayer.GENOMIC,
end_pos=classification.pos1,
try_longest_compatible=True,
residue_mode=ResidueMode.RESIDUE.value,
residue_mode=ResidueMode.RESIDUE,
gene=gene,
)

if mane:
vrs_seq_loc_ac_status = mane["status"]
vrs_seq_loc_ac_status = mane.status
if gene:
classification = CdnaDelInsClassification(
matching_tokens=classification.matching_tokens,
nomenclature=classification.nomenclature,
gene_token=classification.gene_token,
pos0=mane["pos"][0] + 1,
pos1=mane["pos"][1] + 1,
pos0=mane.pos[0] + 1, # 1-based for classification
pos1=mane.pos[1] + 1, # 1-based for classification
inserted_sequence=classification.inserted_sequence,
)
vrs_seq_loc_ac = mane["refseq"]
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
else:
vrs_seq_loc_ac = mane["alt_ac"]
vrs_seq_loc_ac = mane.alt_ac
coord_type = AnnotationLayer.GENOMIC

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
mane["pos"][0] + 1,
mane["pos"][1] + 1,
mane.pos[0],
mane.pos[1],
coord_type,
AltType.DELINS,
warnings,
alt=classification.inserted_sequence,
cds_start=mane["coding_start_site"] if gene else None,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand All @@ -111,6 +114,7 @@ async def translate(
AltType.DELINS,
warnings,
alt=classification.inserted_sequence,
residue_mode=ResidueMode.RESIDUE,
)

if vrs_allele and vrs_seq_loc_ac:
Expand Down
Loading

0 comments on commit 84fe73d

Please sign in to comment.