diff --git a/src/hgvs/assemblymapper.py b/src/hgvs/assemblymapper.py index 25980b86..be416cd7 100644 --- a/src/hgvs/assemblymapper.py +++ b/src/hgvs/assemblymapper.py @@ -9,8 +9,12 @@ HGVSError, HGVSInvalidVariantError, HGVSUnsupportedOperationError, + HGVSInvalidIntervalError, ) from hgvs.variantmapper import VariantMapper +from hgvs.posedit import PosEdit +from hgvs.edit import NARefAlt +from hgvs.location import SimplePosition, Interval _logger = logging.getLogger(__name__) @@ -172,7 +176,31 @@ def n_to_c(self, var_n): return self._maybe_normalize(var_out) def c_to_p(self, var_c): - var_out = super(AssemblyMapper, self).c_to_p(var_c) + var_out = super(AssemblyMapper, self)._c_to_p(var_c) + + if ( + var_c.posedit.edit.type in ['ins', 'dup'] + and var_c.type in "cnr" + and var_c.posedit.pos is not None + and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) + and var_out.posedit is None + ): + try: + var_g = self.c_to_g(var_c) + strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand + + for shuffle_direction in [3, 5]: + shifted_var_g = self._far_shift(var_g, shuffle_direction, strand) + shifted_var_c = super(AssemblyMapper, self).g_to_c( + shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method + ) + var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c) + + if var_out.posedit is not None: + break + except (HGVSInvalidVariantError, HGVSInvalidIntervalError, HGVSUnsupportedOperationError): + pass + return self._maybe_normalize(var_out) def relevant_transcripts(self, var_g): @@ -268,6 +296,34 @@ def _maybe_normalize(self, var): # fall through to return unnormalized variant return var + def _far_shift(self, var_g, shuffle_direction, strand): + """Attempt to shift a variant all the way left or right. Rewrite + duplications as insertions so that the change is shifted as far as + possible.""" + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction + ) + shifted_var_g = normalizer.normalize(var_g) + if shifted_var_g.posedit.edit.type == 'dup': + self._replace_reference(shifted_var_g) + if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5): + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1), + end=SimplePosition(base=shifted_var_g.posedit.pos.start.base), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + else: + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.end.base), + end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + return shifted_var_g + # # Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs) diff --git a/src/hgvs/variantmapper.py b/src/hgvs/variantmapper.py index d73b98b7..5e958da5 100644 --- a/src/hgvs/variantmapper.py +++ b/src/hgvs/variantmapper.py @@ -427,6 +427,21 @@ def c_to_p(self, var_c, pro_ac=None): """ + var_p = self._c_to_p(var_c, pro_ac=pro_ac) + + if ( + var_c.posedit.edit.type in ['ins', 'dup'] + and var_c.type in "cnr" + and var_c.posedit.pos is not None + and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) + and var_p.posedit is None + ): + raise HGVSUnsupportedOperationError('c_to_p not supported on VariantMapper for this var_c, try AssemblyMapper') + + return var_p + + + def _c_to_p(self, var_c, pro_ac=None): if not (var_c.type == "c"): raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c)) if self._validator: diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index 97ad36b0..1dc80590 100644 Binary files a/tests/data/cache-py3.hdp and b/tests/data/cache-py3.hdp differ diff --git a/tests/data/gcp/real.tsv b/tests/data/gcp/real.tsv index 49c8378f..2ad53c9c 100644 --- a/tests/data/gcp/real.tsv +++ b/tests/data/gcp/real.tsv @@ -58,6 +58,5 @@ ID00056 NC_000010.10:g.89693009delG NM_000314.4:c.492+1delG NP_000305.3:p.? ID00057 NC_000010.10:g.89711873A>C NM_000314.4:c.493-2A>C NP_000305.3:p.? ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln) ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.? -ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.? ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1? ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*) diff --git a/tests/issues/test_714.py b/tests/issues/test_714.py new file mode 100644 index 00000000..fe667d6a --- /dev/null +++ b/tests/issues/test_714.py @@ -0,0 +1,66 @@ +import os + +import hgvs +import pytest +from support import CACHE + +cases = [ + { + "name": "ins with splice region preserved", + "var_c": "NM_004119.2:c.1837+21_1837+22insCGAGAGAATATGAATATGATCTCAAATGGGAGTTTCCAAGAGAAAATTTAGAGTTTGGTAAGAATGGAATGTGCCAAA", + "var_p": "NP_004110.2:p.(Lys614_Val615insAsnGlyMetCysGlnThrArgGluTyrGluTyrAspLeuLysTrpGluPheProArgGluAsnLeuGluPheGlyLys)" + }, + { + "name": "dup with splice region preserved", + "var_c": "NM_004119.2:c.1835_1837+3dup", + "var_p": "NP_004110.2:p.(Gly613_Lys614insIleGly)" + }, + { + "name": "dup with splice region preserved", + "var_c": "NM_005228.4:c.2284-5_2290dup", + "var_p": "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)" + }, + { + "name": "dup with splice region preserved", + "var_c": "NM_004456.4:c.2196-1_2196dup", + "var_p": "NP_004447.2:p.(Tyr733AspfsTer8)" + }, + { + "name": "dup with splice region preserved", + "var_c": "NM_016222.3:c.27+2_27+5dup", + "var_p": "NP_057306.2:p.(Arg10ValfsTer20)" + }, + { + "name": "dup with splice region preserved", + "var_c": "NM_182758.2:c.2953-31_2953-26dup", + "var_p": "NP_877435.2:p.?" + }, + { + "name": "dup with broken cigar mapping", + "var_c": "NM_000267.3:c.8315-290_8457dup", + "var_p": "NP_000258.1:p.?" + } +] + + +@pytest.fixture(scope="module") +def hp(): + return hgvs.parser.Parser() + + +@pytest.fixture(scope="module") +def hdp(): + return hgvs.dataproviders.uta.connect( + mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE + ) + + +@pytest.fixture(scope="module") +def am37(hdp): + return hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name="GRCh37") + + +@pytest.mark.parametrize("case", cases) +def test_real_c_to_p(case, hp, am37): + var_c = hp.parse(case["var_c"]) + assert str(am37.c_to_p(var_c)) == case["var_p"] diff --git a/tests/support/mock_input_source.py b/tests/support/mock_input_source.py index 90a2dd6a..75a32c3d 100644 --- a/tests/support/mock_input_source.py +++ b/tests/support/mock_input_source.py @@ -53,6 +53,9 @@ def get_tx_seq(self, ac): def get_seq(self, ac, start_i=None, end_i=None): return self.get_tx_seq(ac)[start_i:end_i] + def get_pro_ac_for_tx_ac(self, ac): + return 'MOCK' + # # internal methods #