From 7f18cfd676c8efb32c57314bbecc27c27f654d8c Mon Sep 17 00:00:00 2001 From: Brendan ODonnell Date: Wed, 13 Dec 2023 16:44:24 -0500 Subject: [PATCH] Fix ins/dups where splice region is preserved --- src/hgvs/assemblymapper.py | 22 +++++++++++++++++++++- src/hgvs/variantmapper.py | 15 +++++++++++++++ tests/data/cache-py3.hdp | Bin 921577 -> 945102 bytes tests/data/gcp/real.tsv | 1 - tests/support/mock_input_source.py | 3 +++ tests/test_hgvs_assemblymapper.py | 10 ++++++++++ 6 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/hgvs/assemblymapper.py b/src/hgvs/assemblymapper.py index dcc5e0a6..3900548e 100644 --- a/src/hgvs/assemblymapper.py +++ b/src/hgvs/assemblymapper.py @@ -171,7 +171,27 @@ def n_to_c(self, var_n): return self._maybe_normalize(var_out) def c_to_p(self, var_c): - var_out = super(AssemblyMapper, self).c_to_p(var_c) + var_out = super(AssemblyMapper, self)._c_to_p(var_c) + + if ( + var_c.posedit.edit.type in ['ins', 'dup'] + and var_c.type in "cnr" + and var_c.posedit.pos is not None + and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) + and var_out.posedit is None + ): + if self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand == 1: + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=5 + ) + else: + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=3 + ) + var_g = normalizer.normalize(self.c_to_g(var_c)) + var_c = self.g_to_c(var_g, var_c.ac) + var_out = super(AssemblyMapper, self)._c_to_p(var_c) + return self._maybe_normalize(var_out) def relevant_transcripts(self, var_g): diff --git a/src/hgvs/variantmapper.py b/src/hgvs/variantmapper.py index 17d6b145..6ee26076 100644 --- a/src/hgvs/variantmapper.py +++ b/src/hgvs/variantmapper.py @@ -429,6 +429,21 @@ def c_to_p(self, var_c, pro_ac=None): """ + var_p = self._c_to_p(var_c, pro_ac=None) + + if ( + var_c.posedit.edit.type in ['ins', 'dup'] + and var_c.type in "cnr" + and var_c.posedit.pos is not None + and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) + and var_p.posedit is None + ): + raise HGVSUnsupportedOperationError('c_to_p not supported on VariantMapper for this var_c, try AssemblyMapper') + + return var_p + + + def _c_to_p(self, var_c, pro_ac=None): if not (var_c.type == "c"): raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c)) if self._validator: diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index e7ed304c4ce67c901eece222d8d9475b312ba12f..9cb5277a95527fb2e447133d88dd8f64cc004e9f 100644 GIT binary patch delta 21114 zcmdU%33y!9b;q?YmRBsxEH=itY=P*Tk;YnT(x8Y&FvFN&%o=v=*uEIs^1@;Q4~WRB z7N|vs(5!9Pmy|XH$`S%Gs|ia&K3YOjND3xjO9E-X4}uBw|2y~1_`NqW{saP}@o?U9 z@BN>1e)qmv-Wl<`Z+AWZO!tKYl>=1+)dMvHwF7kn^#ct9jRWHbng+)IV8Zj$t50dD zy!N-#C%jRp>#dtqlS!w$3$q@p=?d6uZ5tYFnx8*8l`0;MTf6$y6>Cmky=>Lmq1=M} z8%-w;?VCG4w?4Niw>Eci?!w$)?yB7Nxuv-?bN`wz|S#G7y9I2M`a;tKSaxQ;K z_2R*rPan3heQ5EQh87G-z#Y+FQQ;O~j+8Oo2Dgyoqxv^YnYA*=vp-*EuztznwI{Dx z+&{GV%cmXNFsY)Vr?+=uUvE!eum1J)*``~x-7H@$2}U^v#>|U671<^uSX{pKu?bvY@qtHs}t-o z)VJ%D_WF7lKo5@P45hxFKs$D9y_6a;_DNqo7;Y&rS}aHgX^YKH5j0_ILs;fn`KW&> z6{%T7a%{3B2SJU|zl8z0p@~pbZyIv(n6GPsgcwct|2*lG{bh^U*)!M5B>?%vbPvD7IhNTKZw(VESY0D({LFh2LL~>i0C0*fXXLc?9hgD zjwgq>*Ppr( zpaQYwBmbpUh8yywk+?|EAj+RVPka_681bUXQeQlq3cS5%4InMJsY$&J4)d>38pBDP zk(&iX4kYGTNQY!dhoJDP9fsnIbZmJ5mtOp%E4?g3qQ1pHv-4&zU zGO^@Opr#_LU0LI)9VKk*!g4{Kc=6!%EdxszL7t#O%1dZ;pgl=|$+}3OxX%(%lTQ0Z zC4a!6U#Mg(5#tf*4Tt!WJO&%V-(vLC9QxWTmr~o{oskqQmNe2%V%sMm`-n zgMw7pAy0b)G<_O0i1fK6=&Qta6367EW~8|O$xdhKF4n9pp9`M6Upq;og&;BQq60Z~ z*EqTo0h+88B2p8QXjlq%5(nlDClcWOhgD?0Gz2A6V+hMc=Za@FRwo&4h;YR~^QwVD zNN*pf_Ic$YLx9rP0IO9Z1TJlzS*Z1=7|B?Pz)EtK6GQ1>(;`(G3?KH z9rjSdqwQhT1PAq$)oI###Gkmyp9douWKZqrNh0bAG60c`wR#vjAa>yd2{?$?lo-TF z`7Z9z4t%geLJsu)PqPvsUG=xgkqRAjN~!6j#@P&5-i$Y5YEef8U{h@YsD)O^D+s2E zfFvaq#i&D&5fEOPI=%lOLj*M8%9=DxyW}MmLXoCUcy&`i76U|J&Rs(_`vWQ?kWolL#Gr!Q4u1o`e*T}uUcgAqbF=^zE~DU z7oLa^j`G#|Q!sMLp#su_ZXOu|_(0NovX42!22fm)Je_P#QSfJ_tK^wjxapzp48#{?==dXLCh>2363E;B0Zv%VM2W9B%~4Er%%i~Ws(_R3dA2F0fipD z{u7{dp=R@_v&@1$=|QxHk?812lXiI-L0G#wHbLSEJKN&I1G;+J7=kyZmI^w`ACP#I z7AU~OjF<-o+7LB+A{wJ{YG;|#kql8+H|=21v6%3!Q9z`jNRi3c)p z2>H@x&D!`LazVR#5`p&YG;_&vASOJiUA=hX1(!<1XUHM#MAoPGKL(%%7;~;;veZ8K zHC$tThUzCL%n4bskab=6J@8EllF{^LZRu;NEm`X{i$gI;5Z| zOqXQ*#9B@|ZI7WV&d*iYV`=q%7wIc^_>FqQA2!dKrZ4v?ebtY*6jp7lPERR*?{Dhq z)m?y8I#WpJs#?Y`=ll4Xm=*nn%hQ#AeOR&M`1#f`m2PiMr-lY=PFsHJ+O1#o&tb^9 znDsJ?m(JR6fnr&s{al4x%cjz;>)&wSfA3uKToNoCv95Od-WM_WV$2t0-c4br@{kIZ z*{YVkzsl&ZVFtG&F|{bsL{{mp?zT*8;kEm#oA=(p@FC0<+tDb6(KNqL2s=|99feit z%4zv;F#4OAZ@r6zkW{Vv;?QY}qq9TsgLP|GEnl`|#gN?>DNJsyn=!}ks@Uz5#BG}r z#CAi_a-9UMpSzv_Zou5wzv1$0PoH87!{}Vi+4=p2BW|y3+3@3w7#>(vc!*8q{tm#grKQT0aeztjiWkvR?XPPRq=hscF$o9Xvs3Lp7 zRr4wq7jFaIg~Pis_h5b`lYK{zT$!l5S8c;d)_Kj_zk77UPL(r_$2Q|V_1UJDU}}>B z&aOFI(f&$iuqK`ANJRU2qFqI_Go#V|J2CzT=2^@yWqhZc3g0XCKHK{i$HAvV?SVKz1H5jM5%Q8smM6PtSX7@G$7Q#Ot6aW>=J6KtB?lWfMj zr`SwzKVvh|{hZAt_jG@~wv*j2`U|feJid7bH|4g8%cgf){`=#UUN*B{z^;@5SSkVR zJbUzis$FUfddWuk7g+G4;$$)KpYgMc+Yt-wYEtU3S5ablKz@jL-L z>hlIH-If5h9$+f~Ha80F_h9-1=2gsVG9Fl&sQaVZV!%R4fSKl)0X8+iu~E#jdwxm4 zzA2*>*4CZ?b}qoq1lX=oV1EMBo0u(_w`4r9GEw(uwZ(vik^nQ!F#~L;{K1W4rfC+i z+hhRNl>l}Qz?K6n69rbYT9|4vb(ne?53EeoHK;8HER+P8X^t6SQ}VAhh*@^VcLZ#u z%wTm_a)F%%2Vy>f`70R@tW4B>Qf)C{p(Mae zbIbr6pTD$D%(8C*Y{>(Dg>@!Y*lK`%4q#KGz&;J8KFlGQLuEX$GEtXRTMSqz2{6+f zGr*ei&((@q_L@fo?6^k_SZ8+vSUlQ1XCcwl9s?i97ffQ6C(GtDsr ztTA6-D`we)o)oa_WB``@Sg{gdrvt1d3Tz3OmSUD+oQwxnChE>mTMSqz2{6+fGr$`1 zC)J2q_V2a`*u1w4SVwY&tpM0*0PBtdTMec)n6;R7G9Fl&s5?t-F<_x2z)W+@0ISb$ ztQNEE=63|_qKcbXVPzj&&ji@10NW)B?0hg?fC(@c%6MR9qV6KK#ejv905i=o1FSCJ zRV`-O--uOqMWq4DB=7Ak2iPe9n-&H3WiWjO^Ht2(WIV7kQ8%cz7_d+hV5T`{fYs*j zs1md6EmZ>6T5Z6}zImSku#*9{XB5~5Fb!d@!2G?82UaHP3Tlf13nc+&nqvl7O}?W_ z%(4fK7qI(f_~4pcVGdv?0qnpiuy29s+nDcQzANK_m5I84R9g&KCSZ zS+;7HfSn~HVCm%j>16;r5nww`rdV+L4N?sXx{ z-jEWnb!m9DrIO!_mcr`QIR*AJj{8|H_Y+hshjq^7$=O)*?0#lTDMAY3EAonNWn z?!KNqet~E`Eu+=dmb^E;1X`bk)?QIs_rmDMnENpI%XnG|N%s@A#9&1P0~B`{tcr@- z5!?rBE72d(3w)fT1oD2z5?9>e@p#?w-{Cl`@)kEtg->j^SejC7~#oPM9}J~_UkBCK0lwDCTq=LywIOW#oMCDVgeUISdzFFTVtd`i=R-UG})7&!s0v zXW$t6EKl}PMr6Q`2>fqo_*KDHjFg1)ht%f}t#_yJBmC0*R#ukg*UH9c(ioPFwReqQ z21_R`-D&AG{mwu9+5d0_>iF*qT3*y;;Oo;balLHDyu6zFbjxs#?&vSvS3hBDFkHR2 zq&&AZJya^1`>U^5S6;B}(u(Xe%P*}M9M_>*Q~KDp&dA-N>(^F?CF=uTnXQjpn;N&4 ztY0UxH!y!%U2&j_Puksvw%ek%;COncsAqR1U3`gL^c!MBsP=WWHyuClr>ht4u+{< zzuN*RR-W%Ih~AtlJ-x^WQln?J=Z`^euPD7)^6d7&%*N~~lMU?W3T3Kpjv8b15}Kn# zZ%vnAe0cPVW$0C37rp0Y_;eci{9yE~5WOGt4v5nG7|iy^9Dw<_Oh~Ux)wQZIMlYfH z(V#c;UDsaWvfo!W2fnm@)Az({pR0{mw6tx!tPZ^|yv9X&&4p42rW4a8<1f`>X}cuq zxQ1f5N|J%s94%fWUTpr?ChzfDEtaGQe<*t6Z};>fb#9|)Md^K@*AS(55X^copThLY zgsZJg)g7$H7`}w&Xwmy{uhn9Kdd=OU_n=I?=xy+gWqV8}t#^5D1M+;s>=ZE=1S1ec0c|h>)kWvcFYHOA;AG)Ifxhr9HOwd<1~5xvJ{c$^%WCEBZchHki)AoQqj6 zO32tg1Z=Ue8BY+m%rII_4_OKgjsiRxFg4MBO*k7Qg2}DirTxwt0}s^{s~Io!(5HIM#l3h*2znv?pn3Qc$Fjruldp7HPWlI zSl^zt^JNv;hh=niMvLY~&nnzAq4#ujwcP@TMbE`~vb(X2Re^Fx$UqW-V=zXA9 zXHUR4RI8b=cJBtgInir%Bg`JaJcxNnCR}Z0s_tPm#_%OH zM~mJEy4vpBtFAfl_49eN#Op4Z!G?~xy8GIZyoY23?_FW`OmxLP0jVc3Phoy0<5yg< zhF%hOKUZ4}nMnp)HAjoth*n%v^8V#^-altg))neY=#TF!W}n&5uFz-+-PV}ZyLW+C zWps@_3#DITp2Pe~#`7wc&`YB3*J_LLDoF-jbBFK(SL`0rc6jvVBuT7_X9K;59!Qyhi$p7M8wWJ&X%9 zFL;dhI#{)qYCK-MEnD!czvQ;c9ja#R4$bB%KIA>JbQ z)$k=N#qR|fef)?P*lqDyJ-h{UZ%5H>0oYrZKV#mO5jov<%Oy13J8FuNODK*Cxe#3D zJ(p&-cc~sy*`nh5q8A}01F1-H-AGw2d>W*tMqk$I){0L(rUBC^7P<+-|Io z88X!3o92IpKWWdTJ5(6LgW!*DfU6Jmh7?tnrj`vT`ZENdj*Pq*$ zN7K>Oo$2f-{UyGSrY)73n;y1%s$*`tE0gG+>O4H%&HuQsTt1cR*01dn{oC47+mi3p zAC+WU(}iE(R?~9mFzDLaJ3Cu5F?4NhnU0Lk4dc8s-JJqO;nq*pH5V3LXp=RguA>tc zg%_sOPv70mV}1?{=VzkLPr9umlgcQJp~934>YI0q_wjSs-r3QG$Wym$qeH5*doCT0 z*tjh(`n&vY^^up#OhfaoWO^P{=8*%;Nu*AzYaq>AAMl%JfpHj7jM4{|`j5lXU<9 delta 51 zcmX?i-SXvpi-s1)7N!>F7M2#)7Pc1l7LFFq7OocV7M?A<55$=ne=sb6EYACbt%uQl HN~sC NM_000314.4:c.493-2A>C NP_000305.3:p.? ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln) ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.? -ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.? ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1? ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*) diff --git a/tests/support/mock_input_source.py b/tests/support/mock_input_source.py index 184fbb49..5daf003e 100644 --- a/tests/support/mock_input_source.py +++ b/tests/support/mock_input_source.py @@ -55,6 +55,9 @@ def get_tx_seq(self, ac): def get_seq(self, ac, start_i=None, end_i=None): return self.get_tx_seq(ac)[start_i:end_i] + def get_pro_ac_for_tx_ac(self, ac): + return 'MOCK' + # # internal methods # diff --git a/tests/test_hgvs_assemblymapper.py b/tests/test_hgvs_assemblymapper.py index d2730104..33d30013 100644 --- a/tests/test_hgvs_assemblymapper.py +++ b/tests/test_hgvs_assemblymapper.py @@ -201,6 +201,16 @@ def test_c_to_p_with_stop_gain(self): self.assertEqual(str(var_p), hgvs_p) + def test_map_of_ins_splice_region_preserved(self): + hgvs_c = "NM_004119.2:c.1837+21_1837+22insCGAGAGAATATGAATATGATCTCAAATGGGAGTTTCCAAGAGAAAATTTAGAGTTTGGTAAGAATGGAATGTGCCAAA" + hgvs_p = "NP_004110.2:p.(Lys614_Val615insAsnGlyMetCysGlnThrArgGluTyrGluTyrAspLeuLysTrpGluPheProArgGluAsnLeuGluPheGlyLys)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + class Test_RefReplacement(unittest.TestCase): test_cases = [ # These casese attempt to test reference update in four dimensions: