Skip to content

Commit

Permalink
Rewrite dup as ins and try both shuffle directions
Browse files Browse the repository at this point in the history
  • Loading branch information
b0d0nne11 committed Feb 8, 2024
1 parent 586f430 commit f2f89da
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 10 deletions.
52 changes: 42 additions & 10 deletions src/hgvs/assemblymapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
HGVSUnsupportedOperationError,
)
from hgvs.variantmapper import VariantMapper
from hgvs.posedit import PosEdit
from hgvs.edit import NARefAlt
from hgvs.location import SimplePosition, Interval

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -180,17 +183,18 @@ def c_to_p(self, var_c):
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
and var_out.posedit is None
):
if self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand == 1:
normalizer = hgvs.normalizer.Normalizer(
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=5
)
else:
normalizer = hgvs.normalizer.Normalizer(
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=3
var_g = self.c_to_g(var_c)
strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand

for shuffle_direction in [3, 5]:
shifted_var_g = self._far_shift(var_g, shuffle_direction, strand)
shifted_var_c = super(AssemblyMapper, self).g_to_c(
shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method
)
var_g = normalizer.normalize(self.c_to_g(var_c))
var_c = self.g_to_c(var_g, var_c.ac)
var_out = super(AssemblyMapper, self)._c_to_p(var_c)
var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c)

if var_out.posedit is not None:
break

return self._maybe_normalize(var_out)

Expand Down Expand Up @@ -287,6 +291,34 @@ def _maybe_normalize(self, var):
# fall through to return unnormalized variant
return var

def _far_shift(self, var_g, shuffle_direction, strand):
"""Attempt to shift a variant all the way left or right. Rewrite
duplications as insertions so that the change is shifted as far as
possible."""
normalizer = hgvs.normalizer.Normalizer(
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction
)
shifted_var_g = normalizer.normalize(var_g)
if shifted_var_g.posedit.edit.type == 'dup':
self._replace_reference(shifted_var_g)
if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5):
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1),
end=SimplePosition(base=shifted_var_g.posedit.pos.start.base),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
else:
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.end.base),
end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
return shifted_var_g


# <LICENSE>
# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
Expand Down
Binary file modified tests/data/cache-py3.hdp
Binary file not shown.
41 changes: 41 additions & 0 deletions tests/test_hgvs_assemblymapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,47 @@ def test_map_of_ins_splice_region_preserved(self):

self.assertEqual(str(var_p), hgvs_p)

def test_map_of_dup_splice_region_preserved(self):
hgvs_c = "NM_004119.3:c.1835_1837+3dup"
hgvs_p = "NP_004110.2:p.(Gly613_Lys614insIleGly)"

var_c = self.hp.parse_hgvs_variant(hgvs_c)
var_p = self.am.c_to_p(var_c)

self.assertEqual(str(var_p), hgvs_p)

hgvs_c = "NM_005228.5:c.2284-5_2290dup"
hgvs_p = "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)"

var_c = self.hp.parse_hgvs_variant(hgvs_c)
var_p = self.am.c_to_p(var_c)

self.assertEqual(str(var_p), hgvs_p)

hgvs_c = "NM_004456.4:c.2196-1_2196dup"
hgvs_p = "NP_004447.2:p.(Tyr733AspfsTer8)"

var_c = self.hp.parse_hgvs_variant(hgvs_c)
var_p = self.am.c_to_p(var_c)

self.assertEqual(str(var_p), hgvs_p)

hgvs_c = "NM_024529.4:c.130_131+1dup"
hgvs_p = "NP_078805.3:p.(Gly44dup)"

var_c = self.hp.parse_hgvs_variant(hgvs_c)
var_p = self.am.c_to_p(var_c)

self.assertEqual(str(var_p), hgvs_p)

hgvs_c = "NM_016222.3:c.27+2_27+5dup"
hgvs_p = "NP_057306.2:p.(Arg10ValfsTer20)"

var_c = self.hp.parse_hgvs_variant(hgvs_c)
var_p = self.am.c_to_p(var_c)

self.assertEqual(str(var_p), hgvs_p)


class Test_RefReplacement(unittest.TestCase):
test_cases = [
Expand Down

0 comments on commit f2f89da

Please sign in to comment.