From f2f89da03b8757c0af8cb2da23a2fd477f5e1ec1 Mon Sep 17 00:00:00 2001 From: Brendan ODonnell Date: Wed, 7 Feb 2024 08:36:54 -0500 Subject: [PATCH] Rewrite dup as ins and try both shuffle directions --- src/hgvs/assemblymapper.py | 52 ++++++++++++++++++++++++------ tests/data/cache-py3.hdp | Bin 945102 -> 1012073 bytes tests/test_hgvs_assemblymapper.py | 41 +++++++++++++++++++++++ 3 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/hgvs/assemblymapper.py b/src/hgvs/assemblymapper.py index 3900548e..78f35eec 100644 --- a/src/hgvs/assemblymapper.py +++ b/src/hgvs/assemblymapper.py @@ -13,6 +13,9 @@ HGVSUnsupportedOperationError, ) from hgvs.variantmapper import VariantMapper +from hgvs.posedit import PosEdit +from hgvs.edit import NARefAlt +from hgvs.location import SimplePosition, Interval _logger = logging.getLogger(__name__) @@ -180,17 +183,18 @@ def c_to_p(self, var_c): and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) and var_out.posedit is None ): - if self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand == 1: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=5 - ) - else: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=3 + var_g = self.c_to_g(var_c) + strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand + + for shuffle_direction in [3, 5]: + shifted_var_g = self._far_shift(var_g, shuffle_direction, strand) + shifted_var_c = super(AssemblyMapper, self).g_to_c( + shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method ) - var_g = normalizer.normalize(self.c_to_g(var_c)) - var_c = self.g_to_c(var_g, var_c.ac) - var_out = super(AssemblyMapper, self)._c_to_p(var_c) + var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c) + + if var_out.posedit is not None: + break return self._maybe_normalize(var_out) @@ -287,6 +291,34 @@ def _maybe_normalize(self, var): # fall through to return unnormalized variant return var + def _far_shift(self, var_g, shuffle_direction, strand): + """Attempt to shift a variant all the way left or right. Rewrite + duplications as insertions so that the change is shifted as far as + possible.""" + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction + ) + shifted_var_g = normalizer.normalize(var_g) + if shifted_var_g.posedit.edit.type == 'dup': + self._replace_reference(shifted_var_g) + if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5): + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1), + end=SimplePosition(base=shifted_var_g.posedit.pos.start.base), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + else: + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.end.base), + end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + return shifted_var_g + # # Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs) diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index 9cb5277a95527fb2e447133d88dd8f64cc004e9f..6a00c4783accddca744df8bfdff8ac49475a9922 100644 GIT binary patch delta 55636 zcmdU&378yZ)wX9}GRcI5O$6CQ1jUfaN>qdpAW}_(Vo*>KS;A86kPtR)z$h+p!Q}r2 z5k&8_n{KW0oPvyVw^H!&;x`V=hd67v^ {Sob#OLJn!2* zJzX_Dw|#fPuYR)o`766u_N?q(*|)NP<-p3pl|w6sS58_vvU2juDIXo(Z`*|p0;M`YmYqSq0z3c8xEW{`q1dqHHk`IM2;W6?daHG^WfAk_KpRs?%Cs4 zZ#TOABC2EIn?Uoex^GQ~o;kYhC&4 z-E`+~aMQE=HEWJsS}t$as(~|3J@NQsPhJzW$2XcWu+`@K?!RPqYP}-O_}ZeIT`Hj3 z!x;P%dcP7m3?RU;;!tVv6pl$y=cur zYnY7u8iB9Xd`NR0CVQA$tphX%ple-_0}1nncfW9GdiVIf^G2sNiEUQz9f4bR#tMl2 z2(zkhZ05W+V#_3Um&BHoiG58vw`#tw`35Et>j2Gf!&f0zKsK6~#YBFbI41H2H{JOs z-1Ow1bkm#P=%z3Kl$-wi({2Xx&$t=PKkH^F|D2oQ{PS)mW~Yhp(ba0jVm1IiXY;*rdMer26IK_v|#fX`{7jPgjO-E5iY08Sb_D z`!qk%{1lU9NGOxCpP!4i2cP%f-TBY(Rb*&`T!#uLJsyi=-D_jx`bUQmBHR8V_`d_g z+L$%FZEc*UDBn_)y~?8eS}A^``K{(bOcJF7H2)pGiYNu7Ya=k%p~~YS$|F2trq|~T zBTP2;QdIdEMpbreQ{_}u`KGG8u&m0T)#5Lj|Eu{cCaKZ^n*R-7MU?_lRRVKDsWLFF ze&0}i&iLnd8QrAxh&e?W?ofuk%QAE?r}-XDuci-^WJoB(*;emD+fy)kKfa0#ZIJ6w zVLd)#HmWZkLWu14tKfh92V!l^ozu29PF9rL6=hLbl*vjlMKh|IibhphG*7`K84}81_OhGN z_RE-jM|>3-+921V!g_qfOsPLGfDqYsUxferFszN)?LT8qP?WDL%7JB3o}m;oG`nhM zVv;Bwp!pcSiYNu7Ya=k%p~`xD#BA4S3?NJPcQrZome6G8{I+KiUjb*gs>vZ`O?Fd> z1)67So`p%8Os=By2S(~&@2@w;FPKw#yd19#UsHw` zlx0|C^LuOd(d>&!G9;9N?0sKF+e5#`<7F|viVSU#>ri1mK3*o(5AR2a>}9vY|5gl- zmzfLN*2Zy)@>NCIuPn*|N^zj(d79^Ak|-UZ`9b(9q7;xmUIKF+s;sxi%QlVS`d|7G zCfoB4RJjVHD(ydak5!egsLG4VsvN2oFV?(7^HNMwr2{lS3|~c+0#a21b7HA7R6n2( zVY0XWFRDE2@1e@P_O)`1s(e{ho?BMsXth|XIYx6VCaKZ^njeR+qDld&DuFq%R2i&) zv=?Er`yWP?3%Wk&TIqO=wp3NVq$-Qcs+^)0r)o~qoQ_GVbb#h(;H#)oK&nb$PApXh z>O1u!O!gB5$xiMLRpzw64{)@q+@dO3S(S6t;#|#nnpa?wDjlHtEAdrSDIirPFejEO z{q?JQ5GK2}2UTYDhAJJu>m8*kUsRQS%Bs9tEncI!Nb_1uQl$ele;vMxDg~sf1m?t2 zrLVqY55i=7PDYh4V0_KizE+M@m77)N;Ib-jQHzT;Z`G{EBvm><^ELP?suYl_5||T9 zmEQV?yAdYq*#=e4#GuMp`}>PWsLB^qrCwI$?P_tE<{g@MVv;Hyp!wzaDykHasuGwJ zOO>A5{pgZiJrhxuk15Kmne8v{U#=)ODa!N8qP$Nj-mkez^8ri}r5!WB8dpV=wTO)b zxt=sx_bb^(ckQIw{n?8TLXtZ%tdm)@+FrpOt|XsVl6}jPT(1xx)_g?sQB0Dg4Kn{2 zo{AzRz=nwHO_8py{*HH{$M3y;q_1&OcE*>H)k zpTi_ccz3;4g3LdUry@xkU}HT>q9P66@T*NLpUd%NOSZMY1nz)|^Emn0jB(~^)xeU4 zixw?h*gWeLp-*0CXx`o3)m5AwYK86AfjE29g$oxgS+r!~!i7t?xw~ZX;%0^7gwfnt zvv!*?H=R?;mo6LM?rnXWG-ei~)tJipwt9beecn{g@zf9Nu3b|5MD3#5KD8CKH`SKc z8nvwUlG;~lug1Q!_Eg5stsPanv36N)|Jv%>k+rL8GixW+fBuA}tNQj?yzfD4)}Af8 z&H284dDkXg`5o-068SeB{nm2N+BLrJzGv;4vK?~$j$K^~7cX42c*&wgix)3mv~ckv zZ|ucB8wO%mk3r#g2-gz67UuyWZ_QY#C$sLaSSbSTmXZl6wz3{jxzo3 zc#n;P92qWRpv4dei&-`_$2V(7t_(FKDJOabbBTv?1Wx|0GBTvwTyaEMm@xoR8_z=1 zlkli-mysWfG^#G~kwD*gi$u^kT)3oR3FEm3JCFli_|y|6%(SY?g&NY8Cg}FbxELOC zV>ike-z*13FkorB+#-RYhIwFsNxT&1XtK3Qr1mS&Z zgVR}1J8L^dl5@f&7K*aVNHiP}>`GBm&I1l3BRFb@RG~$NCm|1@W=PEJS=zOmix-orp_WV-+_LO%XClC2Xp)i1`mnP-*sU@pwR*%2Ht8 zl`SJEFzt*0;S|nN%>o} zag!OjF2Wc~_%b(6OqeK*YnBC~HViPdko70GbQHUxa?~>0o0RN9ejYj;GMg4R`Z0(@ z!POH0J<=7x_(aS}~;D#G0LE^y)`*Zm9;#|*PB{U*{z%dq8_=+IZdF)CC8uRr3V6teQ`D0OqjHEE6ad~f z*vMb@=GjnFa!NhWsSCOz9kt_6b)Rbe6TVj^S>ZlWf< zmGIhnF)7S{(l**td%n3@8W@fV1Vkpn#!RG%cVd;vG3qBq&c1+z#{d8z+MT)@FJ`C* z9s;obTn65CgjRBl7GhqFHdYD=CUq4{M1FQiE|!T)?Fr)`ltV!GoErcPppdNP+K5cB zXgf#{;EWJqvNw5J#+kH?Y`j?kG~*prRl$i1?GhSEksG4Zp5T#49{3A={Q*br>?EJ1 ztsW&omVzX#8gLTu&JCxj8j=jv*c)5EgmKcN3m261Rnl5E#PcsUl#0d~G~Rq$u{;L_ zSu;$sxH)bm6FPaMQUfZI)h!jQz>EmnkUu4qAedQ>RiR8e z?A5k7a^KeU0p==e@Xe84XIC8CcMPlgL9eKyi#!IS6k%d{i)L+DxX9x`*03YV$kG-J zYr!hse?bfvMce|x=F^utNzo1bWEbt)Sfm7hj}j%D;49EAjdEgR+<}54iFPze{g5KD zmVw-Yx1~glQuL9&q9cHG1m)1ju4NJ}NYsjHJ(@3M0qR_c%%CZ}{tBT@j9C{tk-@@) zFA(tBhk^#HXp#Wf1aQo8L}TRd++xC7$jD-A7B@hxZ@N=MUQn`HArw>1SjdE>q#2+Z zYVQff$EpiW;?Xqya?GxsI|2?h2vP?kV*@ul@%~fHWRNz~+_YdKf`iC0T4@5%DTciG z-~

Jzbt*b#fy&8jKe>hL}?j#mn;_fDYJ=LkC zQK{G!Tl;p`_Idpy5lLW-gCT%!6(*9RC3ith0K#!(bCD#Nh~^Zf0?%OpE$H+pzJ5bh zg&|-6)2LW!AxS*kj3ErMQb|9ToIeZ2?3?e*8x=3}Iqx=xQC;T?KCw|eGa;tps3w+R9;%i1*z*9ky#7s@aYQl*ArdsG zWJ^KI$q7nPTV|>yoV?t`YKd0EMPV02FpQZ6E7~DdCT6h5`ctK70n8{)Qy@nR$QCYp zwOmLd3~6c>{0ISrTq~%N0+0@6tH@jwXUlGN1sNfE{)teQlF4diULloYQ^Z1%X94_) zNKC1SQPeo{U9bcY0eDPGP$Z9P!NY0PXJOud4iwStLF`ae3JFJeJ774t$=sJL400ziRJM`<__h*8Ui3`!O4 z**0N1j3ZA?0FshD+5zuBn82P>s|Ez&H^9Ltrq5*wlSIOCM@Uq1Zj88361c%GDU?uj zS(F5uAT&4|%=(Mg98)*ql*gS|V!=BI1uBK?UgHW@FI9`ts7MsN8Ing(!wP2ecIeAZ zN08Dj>yKdjRT3Hyi_|2Br1+A#K_PH+2~ec(|nbcOqjl zNRkbVjSU-3MhZhVrJM%D^t*A~SxEds70|pH0wTWj{+F#dp!T2*;nQu%aGF+euk4mdWdi`NdvW*N^qo;mWj@o z9G8gG8%L2!id{1#|K{w06+Q6!TQA}9cr(@nxkra7N-~bcgv-GJYnm%h7Y`T|cj90u zrdYHaWm93BjUkfBAc5y!ks?J&MCIFQ69@+Z3B=YAi1InrCh|q8H~56q0&x5Naar}c8v>?F;k9_3LR3T2?CNZp{+bq zZA-(+DMNp|0a}&`Arz##xltv0aNorA)5jPx*yz;8NKo`JEwrMk(2^m!T9jiGDDBTb z+qZR-FlkN$FNuc|;DnWw+9;lq+o`xtyG1w>I$^}T0dKR0+Okx|QaI<>_aDr#e2@+0 z5;9foS$PN$HvQZ+L$SaV3Zq~2AZaXg>`OjrHXR^D>WQIQV0!*Dc8VOl5fx(rFI7^k zB;Tl(M(9OYtJ^$*wh=_wS0*z;J00D3W84QMTsATtkq3Vr@>#c-j+I z0gFdLiU=-_K`aRdQ!s=iId$QYYS>YF|IL61j8+tnk$_2S3n%na>$n1td_Ywr8M_dd zA#(QIPlii(M1KmI5IKU?iApcZ8Z95e&xxn-DP8nB2WZYXbA-UB-lmxBvs> zYLprg#r;6{f{|fFhYZb223r@$omX5_IC8-A_!Nz~v&I(8nAI88ST;U$zuwLJ<7&Uv zr~PJ!$-nUOy8AW1T<(Wh?Asq#2aezOK=<~IhaCTd=8u@>^`^gNzs8?~yXnq<=ce-U z6+tSpiVv9}|B^)SxT(iYKWiKM)8j(J3iRp=w%w+|#f#a{YZmurOD|g7yQ**Yob<;u z*$bw0WeY#b=UQyWXCP$XcsAFl?tNnEb1g>Cr0i79Mw)4uS~r)cc2xlL>5OxQ>LbG4 zaK#IUHtJqCxvd)AfYtxhoxSnc{d%*f9Je0?$L6N%id%@AOYk-cKBr9Z>C)Oo^9;=l z3v*Pehvd!85y?Uw#ZtfkNsROogUnp0M29)h1=_tV(EiF& z*BqcZ5Q9J+fcf)qR0JyFH3VFL3n;GK|JM((e$=V|PjB|_HzUv!-V#e_to>PXOBMQ- z3hiH3=tb&sh~`kui!mtFftbGpM@69)ULye4pF)q~NoW9h>aXm}Ui^Ph=$9BCWOLe| zrngd|Z>!LPvO-6y%Tb!6HA^ul)Pa~EgQKEQ3$GD?6I!7`Y3O#^9p|SMm z@2yAKlT_$V6{?jLI!Rql)|{d_6@x+@i1}$aDhjpm8UZ+=6&gaG`sw}IoXb(@BN!B# z)vnOiDs-0$Eh;N?mb#p+IY)CY28B8h^Yd_26l&o$0&qerG>km;Pxoh=y$gk2j#hnGIc~)7UD^=yan)hkmk3pZw$X!9qufkE$r;In@Uo1|c znfm7jvn8KGp$}o!@eq5m0)1bBb}I|?A!WHvbG_!n7zFA7%s+yoB2Wo$;y=VjQKx>` zPJtS#H{L_-TrF&DGKyM1zKDd=oV%9lIF{ruV4_U12F$8j*37f zyotYrHbR~H^x^Exdl0Da-dI8%ucmiYp&zNxer1L3P?v9NzNPs#28B8h^E+`=6l&o$ z0&qevp=rodf8%hr?N3qY?HFGxroX^%J;5 z{wEw2g<5!x0G!ZEXcOeA?>mycZ>Ov`+iT}gXl}bgPg9}$RA`^FLVs76e`x-xc^HF2 z9f*0?iZ+E>c#Qy@&}BR2cpoQF{^s#v_sDwAM7m&YhJP~#3U!TIK5+TsS7sL`OK8L6EgMOw8-LFE=E-UmL zb=gC+r)D7rg*p)Py>L_%YT-2ka6&%^ZHYYf>vzm9It+zQJQIcb=gpqq{&IRuiGHa> zV`YiJW?;TV9S+9ukw*C*lh=MuQ7deamz$NaRh!0@7+!MWbt7CV z=%pcxDh0kxf#(L5YU;LLCSd}MylA5N-A=PO^FL?K+<7zRjBoe#zIlzbzxH2>gs1Ia ziriGcc-Vg_a%1hh+68>D(b=^(){d=xr1sLjS_qG5{&E{=(dT2d+qXrgh{3uodoAg`xUVJ~NNHocsM5vSGYj=R}WSgF%NTzBHG zmmo#4h`|qDx>|+nSrS<*Sud{3VV20TD8trI*Re!ZMxDyIJ|-@p@%k4~m-FNGA{s8UF;&JM$b(~GW1j+EzXKK4 zmt1T@LXrWokc4ZX?8*+*#7K}*f>_)v6IVt_DK2MYIQRfXq|M43WI22J4{;916+*Hn z&S^@_f-|@h5AI0`<+7ZVEofnb41*vW)$-$PJfM(6R%6U)-Q-yqa@?Xq$~My;Kq8B{ zU@fo(jVE0MlDGveJ5mV7rS5?f3R8?{1W|ZoY?1~Utg;bIBbO(k&_wnNl6{Ei6 zB+yl8E{1{Dcb{>xa1lh3MXg{^)K$)afQdp_f>g={38j-g4$`HUvO^|9{5TJ9))HL- z6E~0Jp0@1cA_OkP#&p6Dd~-FhOL+V_XDEjW@*=91UV5UI zGarvhN>QF~sTg5dv%>2jrlS zDnwHq_ifnP208&86U|0vqzYMNLB=4l{BtJB2POF0nHzjS1tTYyaPrgAvlFMluG5Ht zxN&eplAz(fsNgV;jjT{J&5?}B_sAS$;FV!{!# zWDhQtBxIT-2E>+L=xY6_ggCL|96{L!Nm407;i6i!Ly-L9SS8~y=$MfD4-wT}Y|)eQ zPJq}T#gH?~pA*Q@VBy2%0il+XEPLsJqb3C5l{~Hj92ui)m3B&$Ey6;6*PVbml>#`1 zB8DjXPt(M>v_T55+!;|4OSHPJSEytMAzErgMN*g@^XEk7 z5<_gLXt9_|XNg$Ks2ItCX=Cm~FIo*X<;Y>&8Bcra5WoUbiXdfY)BxC#Bc>W$z;DUHh`Vcxmf@&D&vE2YkE(7;=p!0zE;QUG_S`rFPeK5`&^g0(oJ{%>J_VR`t=ivwAI%=uWh=V-pc&U_uKhP@ z(_6o|EIxeZ>{&DC%pN~=tb5b?$ISk?=7xU(flig{vtu^o+xqu{{z8)ey{{u@m)V2;+pK@g4Joy>E?9A-6LJuvR7d48(P|xZSlfqbuDfFufaPl zb(iKln(tyB0W`lGAHR>6ASahG)BhTr9^F3@)9Lkxhaj7s{9c&ej9JyYTe`Bgg?fgd z_eykWndtAO_mJifnm=L^(GJY~PxvZC&v-?nf?2$A@FU`Q82s2xcYcqXp8Q@nz4?7^ z`tqN+>Cb=aW+4BWo5B3&Ziez-xEapxcQYyfrJIrbS8gWf54f3<|Ju!H{+kv2gG{c^ z{;h`_H)SL-S9~ z!MtEatn8&%s}u&M(uJfgrA||+AFI^DvQmSzpATt< zHIpz&sSeD11Yc!Mofb;9FgF0Drq=&Hh*;TiA3~|GU^sJ-PCm7iI#s29q*9B^N=;Xv zjWwHSHpL{RIxzFi@KuyLHI!;$ZU9P+*7qAktn4+PK&b_vM5(^9`RQbMOR7_p>W4~I zD@*kx1=?D(jb>X+lBxqV-wt0zs#6+O%*S1-sXSRcwkN77^;-u}D_i(!r1~7j3oF`J z)ycN?1Et!zEY(g5w6o@Enx|uuR2`uCF8C@^og7lNFgF0HCfDCHfLhtbpGB%0F`Rg5 ze@;J1rM|CHdzF=%tv+)!b2alYNvRIZd_KO4QYXdZuZ8)zEA`la`i;~V4Iozb^qW!Y z^%$P1+FvJ}s8ZiksRPPNJx6`^(Cn#Mh)GIyVCH+_t0;A1DAmH;0F;_ke_KCdW&7NM zQoqCaOqKo#P3xKJ1f{xLsrD&LwVwiInwn;ROp>YtG_T{UNOeLy{92fgyHt<;nQFLx zRUc|)cYYbEZuyE*&FXmkzKT?NNY%pJ0HhkK z@85@7*&A*{s{1iK{@Pzt9j{W~QK|W5r4Co0mursD9EnLvbztU4;j1Whd_4YIn2)Q7Q#L5o-6-o^~kRE^SuN00`s=JhG-?CJ>0-c~aQF9U|N!0RU>+cUh`8D9|d+8#QmjB&j+; z^EczGNVPPiYGG~wQuWqv=tiyV?CyJpvIj9pRs2O^>j8MQQhigYUS5{!zZ7U(bBX3{ zm?TvPXnrZaid093R4vR6K&qblyl&LWUOgYF26hXnX0<=9AEi`xDAmSgsot$X@6lYL zxe}A4>Hy8(i?1TpQ6W_ea|4j7yLKaDW%XyG)K@V+{?gwVwieZqDs{U`9b8uGgX(jQ z=332%FiEL)%=|iB6`??Elp!B?p;mNFd9=SNkAMI6kx7jkvzxz$M6b9tBq}Z(Z%K57 z65XamQ_B+FpfI1%d{T2GCP~CQm$edP{wX{ah1vj{|JUtteABb?Rzkyf7AjXu`Tlvf z1#eepTVmphp8O2P{J(BJt#cv~=DXURVaHuv{6n`^%&r|!n{xh!(>Lj~y(g3S<_uQz zN~u=rRvoE5tt+-7OrBXsSow;rR@lrov}2Ftk}ZjFx`;DHYk4fL5E)o!g;kryDKAfk zan>kKx^bpyExGg_@~k;?XDvwAY0YgE2gTdVYmX)^JN<~0&p7Jz<4;|-ruK*Wu7mz7 zx-+|Ld2Qd?9<{l(Lu=pTpUxdvTUdK$ZHL++wfEGP*4|X#Wuv95`WNlJX#VWhMOxo- zHGW%jCnm7+yFC03X6fopwmGqJn}@e!s@G}F+uZ*>XD^(w^-P_oIHNzBbF4U#>*=vL z4QSVCEY26E5M}T*spk=mQV>I{%xK(UFx#`c3owpD9xwTUnSV1|5t?71aE26wC(B$X-(K+}Eegcfw}(y&*_Ls4sD)C+J;^v59Hgzo^uac55G zhV$e}9rcTrqU)#>HRKXGWZ8%NmX}Usm<2OSt4NZEijr3LL{N0O^R22b8uuaLbz-R% z!x?4+oUsH69(ROxvX z&V>*U9HQ-=13{=ENsb)~aExt6S3bprN`CAuED#41M8oA4S?(deg92E> zW`K!32MJBR0mG=|#kCsxqaJ(KvT8BwA`s0=!GT9GgV-S%z~G-)kwwi|=mdaZE(uCy z2KBsq5G3^&mPCwy`(Z2k<&-Fj$q>WdWoSnxO3X+-Dqin#|mNoiz7ms z-~_=uu~j`e$S7KOKZ;r*yaFWjC|OjnCFu|@dv+2vHZ}ze4lsEBm68@lDyMK{6`Djk z5V4?egjzurOdmrEf_2x9qwx?@FcK|qg@^(hfM6S2p8svb8Gx+oGNuM!bm;+p(E}#- z5_sxMPeLPEY0RM8q>*(23>v8`yfU^LfbQ`8ud+cxmX2A?hSD^+?*Z{HW->!fQi2=h z((GUvh{_BpFLy_*V`o;5X=iK4#w zjc|7&)SjUxN-=`|y6j!2ipxb%m6ANo#Vor=YP5roXmHX-1k=rG-Sh$Fb1zPnV-j#j zBu;%^r{^smT?!XORREE|Btn$XHUbmZREOA*5PW?xk+KCD>IG7?8$T9gVtsuf5^=o# zwRHb#dV9oP`d3XZB#$ReOd-U?%W5 z3^^lM!qOI3n&wF>rY8DBf2P#gvT(A+ksZiGEh0PiIZ$G-#4|;58V|UEhdT}_8#f6> zti_3qwPAQ%Crq}83WHP>w_jNm+Tw6ONQ$)=ImC9?ZIi31sfY=tJ^{OcCKU%^)!~N^koqG5!9jOlT%# z_z5MKe&I_n2NV(3TCh{Oq>{bp3Mm}4Q?iV9mpzgMGt4xObbubwy#BX><;4Jx1R~fA zKIYg#LWy~b_*hQypm&TL)^hCGh7^LjY@%QU83OQ}U86sJ6JUh-u`%8|&;M!9Eo;ihg2|9UVw-?}j70H(k%g;K zVRd?J%hXa|Z*JU+Z{ut$*fJ)s2*K;$NtxRmN-;uu4-T;h06Bhpi=r-6iyOtwSdiFD zQSk`!;ldqM+Xy1mi$RC@J2ms6ukyw?UpfS2Fy_Iz2wsH)30*HlGU_}PQ%hN?Hx#*N5Zy z?g$Rc5Y6Ae0!xmuSAY@#e&E`A6i(RI)}c1xkt*;QvE2#HJuF<5Hi5Ph6ZODr>%RZO z0oG~~i`mn~0V5uI;L?h0>oY^>6qD52+(N z4pKER1X5CVbTk(d*fLZO_M)bn*k_22Q;U&KK6)9=4WL+B9Qyh;IX#!6RJ$3B9tYhkEN=8)nv5QBFRN(QS1PEkXvytNAXpo8!szcJnu6s5c)_IAD zW{ja~Kp~4nWOHG~h?1LOeZ35lLrT=4OZW*ub-{63iX1y;AY|7UfVO;-2cfcTOeDGA3kGXc*mZz$?DbgVg6NWi|X1>ydN9y&U!Z6 zam4h=nkoMVW0O|9ero>MZVSd{%^3gv7kW3@aTD`5)ok`}Kw3mb4&V<=xB_;;So#B# zCz-yrW}AP3fgF`TFp2q)e(MiRJd@%dm~8o!;#^Febg7*CS+lB5hRt>cBAuLhniQU{ z+2!o`bCb0t@*N$${jBDnn@nz`b3Km;J!5Qa&01c3bOSTWNu=z(w_P}rZF$FqBmDVE z@hP*Pzl=XWx%q!Nv$^a-{x84@mmSlUb$w?+*V5*npFG>ryKA1K*#lGSp4owz?}>x6 zn~#8Z!)g2UPVauKw7Vf)zqdR4%lF}Z#1G^>HZxth+^Vx&-Y3d?N}2bevVF1UC7PFF zY7PHYKpSL!7@i98Ho)S%^*#c0<@>tn&KJAs$)D?{H{Z`qU!J+?&ueZ5^8MWm=5;qi z`2lX`4(A8Dos>V%%}D-yH zPxhGyk*@c5NY`6jblOtxOqDxH<$B7>9ZlhUspc5XvGV9{@Gk_~5cA{kRFrE2{0}NO zJm+y!t{2(rOM9}ddtWt@9fskf^V8?1w3J(+awn_YXj!>a)az8uX`0h9D%XLSpMj^M zTpQqqsazkj)i3SIu2Z?6hjMeh&7UMVi-QRIURte;uBRa&3Sc zrg8(wR{wf$cIZ|pH|t3#*Eg%UVy-3KIZAh$(oHH$_Z9`aSo2oRYK%W3Iw12kcq-Dh z0saT2dmMj645C|o*S_rgvypE1IWD^5<3wA^ovU)EtK3joxwos=Wtw+r-idM1bs*-K zieOkRfqxr1na~PHD zK+Hdnr=naN;D)K(2(s0m)Sta;2FiUA!)KGX{}RzqxieMnpLdmh`T4SXeMR$C&DSt0 z*MXSdil?Gn8{mei++<{{FYnKOzAMV@K2zm(OP@p8T65>C+zOR@xUAf_)a%=tJ2iJ< zRIURt{|=sta&3ScrgBq|&Hq~R^7B!y=K_@L8=IH@KC30&1xk08()E`gu0K++A8YQ> z+>7yLNe5(pAD)VIZGitl=^n?IC8Pd7lk*0$ZQh744kpZ;8=CEbNecec_Em!%h!=@Km&G1N_fv#|xwXpLa}{^vYiV_y%XT1@9PUTVdi_ z@%(4LTg+bo^zsIH^WEu)^PR6km2t4la;@$05(9907O?~U!^9k|(JH~h1oABv< zPM$z#&02lxmGc|1$6Mp2agWN^&evbjUH{sU7tdePT|1@ru3D|Od+l|#{c4wDuc)o8 zJ-2p5?Z2?6)DPzaPW$)X`;a+ft@p;KJC7S{Ho+vYd{d7%JEv>)^leUVjCwE?Q@wWn z+F?H9lxseCtDpD!Ij53Nl*E}HPouNXng}}Az_&iI2A>MILG-_IM>?L~@PbCC z!yLZjRE8t5AK`q)qAgO~Qyz&aCpix!_W~JrNdRw=G?stBtYCa1r*D|t|%EY6{R_fJC#I2TF{*bF%51} z;g4m2rW!H9gbZszBi4_FV|4_gBy1U)R+o^4*(3pDFlH=?9Uhz!;z0yx3Rtckf|^Sy zG)WsMk>KoY_7P)4&YTRKSho(!)I!|?1nEMKG_72_VtBMJfIKY6Xc9&$l#LY%Yyknd z8@t>rj^LK+&S3>Z(6jQKMKxjuNg^X?6&R zRRoS&M)yi84a-)|2tF`&`N1+8m8E-u z8Bw-K3|y2Qp5pq@@Y=~VgvD!B+mOFSZA%!wO_8~ZrkQkBLoNB?ivV$qlMU9E_zs)` zQ&@{~^V*RpETbYcxPuYZ3cfxcffi2y5f)hrlop^hqkL-Ac}#!>m1u!PtJ;iM!D-1J8h^f+G2fY}M2efU`x)3B$Dx}f{3)KNIo_`ou z;y8p?h+{9yJUd18SY}Fd1>xA4b&GGNE?3lsU8sOTm6pNOLw|xQ<8^?WcF#sCl){lAcabgS|niQI($^{98 zu_u(YRA7q2Ep>=8iAg*iTF|P21szDc8x6=ZJoZ*ch#(Le#v~c-%EsXXPdvucb7KMp zh(r6AaZFjPwngLDU*Eg=VDXYJuD+N%fA-9Itq=DsTygcqVDa*E{Jy95Y%%`Xk9QAG zeuggwXJ~dkCtY?CG2=JAZ1~Cb1&%*c^DIpBVu?BIb3S>lo9=v`o67YCQBlaX=kep! zW?WzJ#Ny>@dR&+sJ?Kh{;uKdlFQai%Kq|>HIr6Nn#Di=89Su;IU!$rCYKUy`XJ`b*SxGN`^*5p zEA4#wGrN{HuO~RfdWULWta%A0{ke1pbpBF&{I*nroLt6C*AzS=-|+{JpEA_=)*y_t z%ijsz(aWLB=P9K35?ZKl67Vqb$|=l*=#BT&Q^!CP~$SnXklGk*W#FX9Wc2`jTo0VvjRM#5ZQQazSc zHMzcefX=eoyV2@{7)~;#e-_q~>J3WuNu@fVEY*K0(75Ij&D$_Zst(NjQhXJunvkrj zz+7KaJ<3%T55SQ+r=C|2WV7CbROevU@c?|iQhh?Hb}CEtZUuUe<_gV~m?TvPX8vA$ z6{(t#NEMhHf>e|0U+zb(?6p@Q)#NMV5jea3$?A1#b%RtSYEUV%A4*kfL-HA-uiuZf>s)eOap; zROl0$Pik(&B&|9?^H1TcXjMR}RbWnVt;VmNJ!z=%_CD0gzJ3)_9sGf`tlFQgE>fzG zDb=*HR9{e_n>An5+=59`4b|H~^Dp76NVOKSkucYjRFCQ9!eG6-kIu3ykSW{!>dKW_uDJt~wCVuOzlpD+RRQU;3e5GS)nj@H4%9!!~nKLS@O)rXa8&$3iMRiK|~ey;fiCP~#_Zv)Nm$5)YR zEo38Mt}m$`@KD1o!9=W&8t-EdX?I(tki?*^E=J|Xnv1L zO7+#-F!P7-Rg`K$HUe|KDb+RbD1QVFHQwJtU)c+RL}B2){q5ttKPsPWIfgh$_C4~=CHtJLKmibvnP_Frr+ zP^u3pRlO`#-?<3XuNlw`VvqD!Kw-(Mi3yE3}!G>C0qgD&bT0K#Pw$yB;c@idR<=?cFG4ri) zRivsQHrBrL_V~XGa8^fZpP3aVD`e6c*u9RZRZY0<*LJpPCk@D~ ISw7?c0i;wrB>(^b delta 50 zcmaF)$oAZI%Z3)l7N!>F7M2#)7Pc1l7LFFq7OocV7M>Q~EqtB|%(dPO+kF)H8oZfG F^#Ih|5DfqT diff --git a/tests/test_hgvs_assemblymapper.py b/tests/test_hgvs_assemblymapper.py index 33d30013..d4e7ee5b 100644 --- a/tests/test_hgvs_assemblymapper.py +++ b/tests/test_hgvs_assemblymapper.py @@ -210,6 +210,47 @@ def test_map_of_ins_splice_region_preserved(self): self.assertEqual(str(var_p), hgvs_p) + def test_map_of_dup_splice_region_preserved(self): + hgvs_c = "NM_004119.3:c.1835_1837+3dup" + hgvs_p = "NP_004110.2:p.(Gly613_Lys614insIleGly)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_005228.5:c.2284-5_2290dup" + hgvs_p = "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_004456.4:c.2196-1_2196dup" + hgvs_p = "NP_004447.2:p.(Tyr733AspfsTer8)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_024529.4:c.130_131+1dup" + hgvs_p = "NP_078805.3:p.(Gly44dup)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_016222.3:c.27+2_27+5dup" + hgvs_p = "NP_057306.2:p.(Arg10ValfsTer20)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + class Test_RefReplacement(unittest.TestCase): test_cases = [