Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ins or dups where splice region is preserved #719

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion src/hgvs/assemblymapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
HGVSError,
HGVSInvalidVariantError,
HGVSUnsupportedOperationError,
HGVSInvalidIntervalError,
)
from hgvs.variantmapper import VariantMapper
from hgvs.posedit import PosEdit
from hgvs.edit import NARefAlt
from hgvs.location import SimplePosition, Interval

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -172,7 +176,31 @@ def n_to_c(self, var_n):
return self._maybe_normalize(var_out)

def c_to_p(self, var_c):
var_out = super(AssemblyMapper, self).c_to_p(var_c)
var_out = super(AssemblyMapper, self)._c_to_p(var_c)

if (
var_c.posedit.edit.type in ['ins', 'dup']
and var_c.type in "cnr"
and var_c.posedit.pos is not None
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
and var_out.posedit is None
):
try:
var_g = self.c_to_g(var_c)
strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand

for shuffle_direction in [3, 5]:
shifted_var_g = self._far_shift(var_g, shuffle_direction, strand)
shifted_var_c = super(AssemblyMapper, self).g_to_c(
shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method
)
var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c)

if var_out.posedit is not None:
break
except (HGVSInvalidVariantError, HGVSInvalidIntervalError, HGVSUnsupportedOperationError):
pass

return self._maybe_normalize(var_out)

def relevant_transcripts(self, var_g):
Expand Down Expand Up @@ -268,6 +296,34 @@ def _maybe_normalize(self, var):
# fall through to return unnormalized variant
return var

def _far_shift(self, var_g, shuffle_direction, strand):
"""Attempt to shift a variant all the way left or right. Rewrite
duplications as insertions so that the change is shifted as far as
possible."""
normalizer = hgvs.normalizer.Normalizer(
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction
)
shifted_var_g = normalizer.normalize(var_g)
if shifted_var_g.posedit.edit.type == 'dup':
self._replace_reference(shifted_var_g)
if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5):
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1),
end=SimplePosition(base=shifted_var_g.posedit.pos.start.base),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
else:
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.end.base),
end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
return shifted_var_g


# <LICENSE>
# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
Expand Down
15 changes: 15 additions & 0 deletions src/hgvs/variantmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,21 @@ def c_to_p(self, var_c, pro_ac=None):

"""

var_p = self._c_to_p(var_c, pro_ac=pro_ac)

if (
var_c.posedit.edit.type in ['ins', 'dup']
and var_c.type in "cnr"
and var_c.posedit.pos is not None
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
and var_p.posedit is None
):
raise HGVSUnsupportedOperationError('c_to_p not supported on VariantMapper for this var_c, try AssemblyMapper')

return var_p


def _c_to_p(self, var_c, pro_ac=None):
if not (var_c.type == "c"):
raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c))
if self._validator:
Expand Down
Binary file modified tests/data/cache-py3.hdp
Binary file not shown.
1 change: 0 additions & 1 deletion tests/data/gcp/real.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,5 @@ ID00056 NC_000010.10:g.89693009delG NM_000314.4:c.492+1delG NP_000305.3:p.?
ID00057 NC_000010.10:g.89711873A>C NM_000314.4:c.493-2A>C NP_000305.3:p.?
ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln)
ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.?
ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.?
ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1?
ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*)
66 changes: 66 additions & 0 deletions tests/issues/test_714.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os

import hgvs
import pytest
from support import CACHE

cases = [
{
"name": "ins with splice region preserved",
"var_c": "NM_004119.2:c.1837+21_1837+22insCGAGAGAATATGAATATGATCTCAAATGGGAGTTTCCAAGAGAAAATTTAGAGTTTGGTAAGAATGGAATGTGCCAAA",
"var_p": "NP_004110.2:p.(Lys614_Val615insAsnGlyMetCysGlnThrArgGluTyrGluTyrAspLeuLysTrpGluPheProArgGluAsnLeuGluPheGlyLys)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_004119.2:c.1835_1837+3dup",
"var_p": "NP_004110.2:p.(Gly613_Lys614insIleGly)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_005228.4:c.2284-5_2290dup",
"var_p": "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_004456.4:c.2196-1_2196dup",
"var_p": "NP_004447.2:p.(Tyr733AspfsTer8)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_016222.3:c.27+2_27+5dup",
"var_p": "NP_057306.2:p.(Arg10ValfsTer20)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_182758.2:c.2953-31_2953-26dup",
"var_p": "NP_877435.2:p.?"
},
{
"name": "dup with broken cigar mapping",
"var_c": "NM_000267.3:c.8315-290_8457dup",
"var_p": "NP_000258.1:p.?"
}
]


@pytest.fixture(scope="module")
def hp():
return hgvs.parser.Parser()


@pytest.fixture(scope="module")
def hdp():
return hgvs.dataproviders.uta.connect(
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE
)


@pytest.fixture(scope="module")
def am37(hdp):
return hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name="GRCh37")


@pytest.mark.parametrize("case", cases)
def test_real_c_to_p(case, hp, am37):
var_c = hp.parse(case["var_c"])
assert str(am37.c_to_p(var_c)) == case["var_p"]
3 changes: 3 additions & 0 deletions tests/support/mock_input_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def get_tx_seq(self, ac):
def get_seq(self, ac, start_i=None, end_i=None):
return self.get_tx_seq(ac)[start_i:end_i]

def get_pro_ac_for_tx_ac(self, ac):
return 'MOCK'

#
# internal methods
#
Expand Down
Loading