diff --git a/src/variation/hgvs_dup_del_mode.py b/src/variation/hgvs_dup_del_mode.py index 722c221e..175a12d3 100644 --- a/src/variation/hgvs_dup_del_mode.py +++ b/src/variation/hgvs_dup_del_mode.py @@ -2,7 +2,7 @@ from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.schemas import ResidueMode -from ga4gh.core import ga4gh_identify +from ga4gh.core import entity_models, ga4gh_identify from ga4gh.vrs import models, normalize from variation.schemas.normalize_response_schema import HGVSDupDelModeOption @@ -49,6 +49,7 @@ def default_mode( baseline_copies: int | None = None, copy_change: models.CopyChange | None = None, alt: str | None = None, + extensions: list[entity_models.Extension] | None = None, ) -> dict | None: """Use default characteristics to return a variation. If baseline_copies not provided and endpoints are ambiguous - copy_number_change @@ -65,6 +66,7 @@ def default_mode( :param baseline_copies: Baseline copies for Copy Number Count variation :param copy_change: copy change for Copy Number Change Variation :param alt: Alteration + :param extensions: List of extensions for variation :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Variation object represented as a dict """ @@ -72,11 +74,17 @@ def default_mode( variation = None if not baseline_copies and alt_type in AMBIGUOUS_REGIONS: - variation = self.copy_number_change_mode(alt_type, location, copy_change) + variation = self.copy_number_change_mode( + alt_type, location, copy_change, extensions=extensions + ) elif baseline_copies: - variation = self.copy_number_count_mode(alt_type, location, baseline_copies) + variation = self.copy_number_count_mode( + alt_type, location, baseline_copies, extensions=extensions + ) else: - variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt) + variation = self.allele_mode( + location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions + ) return variation def copy_number_count_mode( @@ -84,12 +92,14 @@ def copy_number_count_mode( alt_type: AltType, location: dict, baseline_copies: int, + extensions: list[entity_models.Extension] | None = None, ) -> dict: """Return a VRS Copy Number Variation. :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param baseline_copies: Baseline copies number + :param extensions: List of extensions for variation :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Copy Number object represented as a dict """ @@ -98,7 +108,9 @@ def copy_number_count_mode( copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1 seq_loc = models.SequenceLocation(**location) seq_loc.id = ga4gh_identify(seq_loc) - cn = models.CopyNumberCount(copies=copies, location=seq_loc) + cn = models.CopyNumberCount( + copies=copies, location=seq_loc, extensions=extensions + ) cn.id = ga4gh_identify(cn) return cn.model_dump(exclude_none=True) @@ -107,12 +119,14 @@ def copy_number_change_mode( alt_type: AltType, location: dict, copy_change: models.CopyChange | None = None, + extensions: list[entity_models.Extension] | None = None, ) -> dict: """Return copy number change variation :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param copy_change: The copy change + :param extensions: List of extensions for variation :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: Copy Number Change variation as a dict """ @@ -127,7 +141,9 @@ def copy_number_change_mode( seq_loc = models.SequenceLocation(**location) seq_loc.id = ga4gh_identify(seq_loc) - cx = models.CopyNumberChange(location=seq_loc, copyChange=copy_change) + cx = models.CopyNumberChange( + location=seq_loc, copyChange=copy_change, extensions=extensions + ) cx.id = ga4gh_identify(cx) return cx.model_dump(exclude_none=True) @@ -137,6 +153,7 @@ def allele_mode( alt_type: AltType, vrs_seq_loc_ac: str, alt: str, + extensions: list[entity_models.Extension] | None = None, ) -> dict | None: """Return a VRS Allele with a normalized LiteralSequenceExpression or ReferenceLengthExpression. @@ -145,6 +162,7 @@ def allele_mode( :param alt_type: Alteration type :param vrs_seq_loc_ac: Accession used in VRS Sequence Location :param alt: Alteration + :param extensions: List of extensions for variation :return: VRS Allele object represented as a dict """ if alt_type in AMBIGUOUS_REGIONS: @@ -168,6 +186,7 @@ def allele_mode( allele = models.Allele( location=models.SequenceLocation(**location), state=models.LiteralSequenceExpression(sequence=state), + extensions=extensions, ) try: @@ -189,6 +208,7 @@ def interpret_variation( baseline_copies: int | None = None, copy_change: models.CopyChange | None = None, alt: str | None = None, + extensions: list[entity_models.Extension] | None = None, ) -> dict: """Interpret variation using HGVSDupDelMode @@ -201,6 +221,7 @@ def interpret_variation( :param baseline_copies: Baseline copies number :param copy_change: The copy change :param alt: The alteration + :param extensions: List of extensions for variation :return: VRS Variation object """ variation = None @@ -212,13 +233,16 @@ def interpret_variation( baseline_copies=baseline_copies, copy_change=copy_change, alt=alt, + extensions=extensions, ) elif hgvs_dup_del_mode == HGVSDupDelModeOption.ALLELE: - variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt) + variation = self.allele_mode( + location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions + ) elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT: if baseline_copies: variation = self.copy_number_count_mode( - alt_type, location, baseline_copies + alt_type, location, baseline_copies, extensions=extensions ) else: errors.append( @@ -226,7 +250,7 @@ def interpret_variation( ) elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_CHANGE: variation = self.copy_number_change_mode( - alt_type, location, copy_change=copy_change + alt_type, location, copy_change=copy_change, extensions=extensions ) if not variation: diff --git a/src/variation/translators/genomic_del_dup_base.py b/src/variation/translators/genomic_del_dup_base.py index 1d2ef807..1aba8f4e 100644 --- a/src/variation/translators/genomic_del_dup_base.py +++ b/src/variation/translators/genomic_del_dup_base.py @@ -2,7 +2,7 @@ from typing import NamedTuple -from cool_seq_tool.schemas import ResidueMode +from cool_seq_tool.schemas import ManeGeneData, ResidueMode from ga4gh.vrs import models from pydantic import StrictInt, StrictStr, ValidationError @@ -30,6 +30,7 @@ class DelDupData(NamedTuple): ac: StrictStr pos0: StrictInt pos1: StrictInt | None + mane_genes: list[ManeGeneData] | None class GenomicDelDupTranslator(Translator): @@ -51,32 +52,34 @@ async def get_grch38_data( :param ac: Genomic RefSeq accession :return: Data on GRCh38 assembly if successful liftover. Else, `None` """ - pos0, pos1, new_ac = None, None, None + pos0, pos1, new_ac, mane_genes = None, None, None, None if classification.pos1: # `g_to_grch38` return inter-residue, but we want residue here # so we increment start by 1 grch38_pos = await self.mane_transcript.g_to_grch38( - ac, classification.pos0 + 1, classification.pos1 + ac, classification.pos0 + 1, classification.pos1, get_mane_genes=True ) if grch38_pos: pos0, pos1 = grch38_pos.pos new_ac = grch38_pos.ac + mane_genes = grch38_pos.mane_genes else: # `g_to_grch38` return inter-residue, but we want residue here # so we increment start by 1 grch38_pos = await self.mane_transcript.g_to_grch38( - ac, classification.pos0 + 1, classification.pos0 + ac, classification.pos0 + 1, classification.pos0, get_mane_genes=True ) if grch38_pos: pos0, _ = grch38_pos.pos new_ac = grch38_pos.ac + mane_genes = grch38_pos.mane_genes if not new_ac: errors.append(f"Unable to find a GRCh38 accession for: {ac}") try: - data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1) + data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes) except ValidationError: data = None return data @@ -114,6 +117,7 @@ async def translate( vrs_variation = None vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA residue_mode = ResidueMode.RESIDUE + mane_genes = None if do_liftover or endpoint_name == Endpoint.NORMALIZE: errors = [] @@ -122,15 +126,16 @@ async def translate( warnings.append(w) return None + grch38_data = await self.get_grch38_data( + classification, errors, validation_result.accession + ) + if assembly == ClinVarAssembly.GRCH37 and errors: + warnings += errors + return None + mane_genes = grch38_data.mane_genes + # assembly is either 37 or 38 if assembly == ClinVarAssembly.GRCH37: - grch38_data = await self.get_grch38_data( - classification, errors, validation_result.accession - ) - if errors: - warnings += errors - return None - pos0 = grch38_data.pos0 - 1 if grch38_data.pos1 is None: pos1 = grch38_data.pos0 @@ -158,7 +163,9 @@ async def translate( pos0 = classification.pos0 pos1 = classification.pos1 ac = validation_result.accession - grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + grch38_data = DelDupData( + ac=ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes + ) assembly = ClinVarAssembly.GRCH38 else: @@ -184,6 +191,7 @@ async def translate( ac = grch38_data.ac pos0 = grch38_data.pos0 - 1 pos1 = grch38_data.pos0 if grch38_data.pos1 is None else grch38_data.pos1 + mane_genes = grch38_data.mane_genes residue_mode = ResidueMode.INTER_RESIDUE self.is_valid(classification.gene_token, ac, pos0, pos1, errors) @@ -246,6 +254,7 @@ async def translate( baseline_copies=baseline_copies, copy_change=copy_change, alt=alt, + extensions=self._mane_gene_extensions(mane_genes), ) elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_COUNT: vrs_variation = self.hgvs_dup_del_mode.copy_number_count_mode( diff --git a/src/variation/translators/genomic_delins.py b/src/variation/translators/genomic_delins.py index abf2fc0f..b10908d1 100644 --- a/src/variation/translators/genomic_delins.py +++ b/src/variation/translators/genomic_delins.py @@ -88,9 +88,11 @@ async def translate( vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification + extensions = None else: vrs_seq_loc_ac = mane.ac coord_type = AnnotationLayer.GENOMIC + extensions = self._mane_gene_extensions(mane.mane_genes) vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, @@ -102,6 +104,7 @@ async def translate( alt=classification.inserted_sequence, cds_start=mane.coding_start_site if gene else None, residue_mode=ResidueMode.INTER_RESIDUE, + extensions=extensions, ) else: vrs_seq_loc_ac = validation_result.accession diff --git a/src/variation/translators/genomic_insertion.py b/src/variation/translators/genomic_insertion.py index 00dc6ee4..d14009d0 100644 --- a/src/variation/translators/genomic_insertion.py +++ b/src/variation/translators/genomic_insertion.py @@ -89,9 +89,11 @@ async def translate( vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification + extensions = None else: vrs_seq_loc_ac = mane.ac coord_type = AnnotationLayer.GENOMIC + extensions = self._mane_gene_extensions(mane.mane_genes) vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, @@ -103,6 +105,7 @@ async def translate( alt=classification.inserted_sequence, cds_start=mane.coding_start_site if gene else None, residue_mode=ResidueMode.INTER_RESIDUE, + extensions=extensions, ) else: vrs_seq_loc_ac = validation_result.accession diff --git a/src/variation/translators/genomic_reference_agree.py b/src/variation/translators/genomic_reference_agree.py index 61cae262..7ffefbae 100644 --- a/src/variation/translators/genomic_reference_agree.py +++ b/src/variation/translators/genomic_reference_agree.py @@ -87,9 +87,11 @@ async def translate( vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification + extensions = None else: vrs_seq_loc_ac = mane.ac coord_type = AnnotationLayer.GENOMIC + extensions = self._mane_gene_extensions(mane.mane_genes) vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, @@ -100,6 +102,7 @@ async def translate( warnings, cds_start=mane.coding_start_site if gene else None, residue_mode=ResidueMode.INTER_RESIDUE, + extensions=extensions, ) else: vrs_seq_loc_ac = validation_result.accession diff --git a/src/variation/translators/genomic_substitution.py b/src/variation/translators/genomic_substitution.py index d2f4f5f9..bc5813ab 100644 --- a/src/variation/translators/genomic_substitution.py +++ b/src/variation/translators/genomic_substitution.py @@ -109,9 +109,11 @@ async def translate( vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification + extensions = None else: vrs_seq_loc_ac = mane.ac coord_type = AnnotationLayer.GENOMIC + extensions = self._mane_gene_extensions(mane.mane_genes) vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, @@ -123,6 +125,7 @@ async def translate( alt=classification.alt, cds_start=mane.coding_start_site if gene else None, residue_mode=ResidueMode.INTER_RESIDUE, + extensions=extensions, ) else: vrs_seq_loc_ac = validation_result.accession diff --git a/src/variation/translators/translator.py b/src/variation/translators/translator.py index e8ddb481..5e8f6e1e 100644 --- a/src/variation/translators/translator.py +++ b/src/variation/translators/translator.py @@ -4,8 +4,9 @@ from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.mappers import ManeTranscript -from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from cool_seq_tool.schemas import AnnotationLayer, ManeGeneData, ResidueMode from cool_seq_tool.sources import UtaDatabase +from ga4gh.core import entity_models from ga4gh.vrs import models from variation.hgvs_dup_del_mode import HGVSDupDelMode @@ -253,3 +254,25 @@ async def get_p_or_cdna_translation_result( ) return None + + @staticmethod + def _mane_gene_extensions( + mane_genes: list[ManeGeneData], + ) -> list[entity_models.Extension] | None: + """Transform mane genes to list of extensions + + This is only used in Genomic translators + + :param mane_genes: Optional list of mane gene data + :return: List of extensions containing mane gene data if found. Otherwise, + ``None`` + """ + mane_genes_exts = None + if mane_genes: + mane_genes_exts = [ + entity_models.Extension( + name="mane_genes", + value=mane_genes, + ) + ] + return mane_genes_exts diff --git a/src/variation/vrs_representation.py b/src/variation/vrs_representation.py index 30ab1398..ca8c7c9d 100644 --- a/src/variation/vrs_representation.py +++ b/src/variation/vrs_representation.py @@ -2,7 +2,7 @@ from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.schemas import AnnotationLayer, ResidueMode -from ga4gh.core import ga4gh_identify +from ga4gh.core import entity_models, ga4gh_identify from ga4gh.vrs import models, normalize from pydantic import ValidationError @@ -97,6 +97,7 @@ def vrs_allele( sstate: models.LiteralSequenceExpression | models.ReferenceLengthExpression, alt_type: AltType, errors: list[str], + extensions: list[entity_models.Extension] | None = None, ) -> dict | None: """Create a VRS Allele object. @@ -106,6 +107,7 @@ def vrs_allele( :param sstate: State :param alt_type: Type of alteration :param errors: List of errors + :param extensions: List of extensions for variation :return: VRS Allele object represented as a Dict """ refget_accession = get_refget_accession(self.seqrepo_access, ac, errors) @@ -117,7 +119,7 @@ def vrs_allele( except ValueError as e: errors.append(f"Unable to get sequence location: {e}") return None - allele = models.Allele(location=location, state=sstate) + allele = models.Allele(location=location, state=sstate, extensions=extensions) # Ambiguous regions do not get normalized if alt_type not in AMBIGUOUS_REGIONS: try: @@ -152,6 +154,7 @@ def to_vrs_allele( cds_start: int | None = None, alt: str | None = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, + extensions: list[entity_models.Extension] | None = None, ) -> dict | None: """Translate accession and position to VRS Allele Object. @@ -164,6 +167,7 @@ def to_vrs_allele( :param cds_start: Coding start site :param alt: Alteration :param residue_mode: Residue mode for ``start`` and ``end`` positions + :param extensions: List of extensions for variation :return: VRS Allele Object """ coords = self.get_start_end(coordinate, start, end, cds_start, errors) @@ -224,4 +228,6 @@ def to_vrs_allele( return None sstate = models.LiteralSequenceExpression(sequence=state) - return self.vrs_allele(ac, new_start, new_end, sstate, alt_type, errors) + return self.vrs_allele( + ac, new_start, new_end, sstate, alt_type, errors, extensions=extensions + ) diff --git a/tests/conftest.py b/tests/conftest.py index 67bae951..b82fd44b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -606,12 +606,25 @@ def _vrs_id_and_digest_existence_checks(vrs_obj_dict, prefix=None): assert location_vrs_id == f"ga4gh:SL.{location_vrs_digest}" -def assertion_checks(normalize_response, test_variation, check_vrs_id=False): +def assertion_checks( + normalize_response, test_variation, mane_genes_exts=False, check_vrs_id=False +): """Check that normalize_response and test_variation are equal.""" actual = normalize_response.variation.model_dump(exclude_none=True) if not check_vrs_id: _vrs_id_and_digest_existence_checks(actual) + # Check MANE genes existence + if mane_genes_exts: + extensions = actual.pop("extensions") + assert len(extensions) == 1 + + mane_genes_ext = extensions[0] + assert mane_genes_ext["name"] == "mane_genes" + for mane_gene in mane_genes_ext["value"]: + assert mane_gene["ncbi_gene_id"] + assert mane_gene["symbol"] + expected = test_variation.model_copy().model_dump(exclude_none=True) if not check_vrs_id: _delete_id_and_digest(expected) diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py index b018ec6e..42f0d5ae 100644 --- a/tests/test_hgvs_dup_del_mode.py +++ b/tests/test_hgvs_dup_del_mode.py @@ -763,60 +763,60 @@ async def test_genomic_dup1( # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/allele?hgvsOrDescriptor=NC_000003.12%3Ag.49531262dup q = "NC_000003.12:g.49531262dup" # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup1_38_cn, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_38_cn, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030072, ) - assertion_checks(resp, genomic_dup1_cx, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_cx, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True, mane_genes_exts=True) q = "NC_000003.11:g.49568695dup" # 37 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup1_38_cn, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_38_cn, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030072, ) - assertion_checks(resp, genomic_dup1_cx, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_cx, check_vrs_id=True, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True) + assertion_checks(resp, genomic_dup1_lse, check_vrs_id=True, mane_genes_exts=True) # Free Text for q in ["DAG1 g.49568695dup", "DAG1 g.49531262dup"]: # 37 # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup1_free_text_lse) + assertion_checks(resp, genomic_dup1_free_text_lse, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup1_free_text_lse) + assertion_checks(resp, genomic_dup1_free_text_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup1_free_text_cn) + assertion_checks(resp, genomic_dup1_free_text_cn, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup1_free_text_lse) + assertion_checks(resp, genomic_dup1_free_text_lse, mane_genes_exts=True) # Invalid invalid_queries = [ @@ -842,51 +842,51 @@ async def test_genomic_dup2( # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/allele?hgvsOrDescriptor=NM_004006.2%3Ac.20_23dup q = "NC_000023.11:g.33211290_33211293dup" # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup2_lse) + assertion_checks(resp, genomic_dup2_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup2_38_cn) + assertion_checks(resp, genomic_dup2_38_cn, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) - assertion_checks(resp, genomic_dup2_cx) + assertion_checks(resp, genomic_dup2_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup2_lse) + assertion_checks(resp, genomic_dup2_lse, mane_genes_exts=True) q = "NC_000023.10:g.33229407_33229410dup" # 37 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup2_lse) + assertion_checks(resp, genomic_dup2_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup2_38_cn) + assertion_checks(resp, genomic_dup2_38_cn, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) - assertion_checks(resp, genomic_dup2_cx) + assertion_checks(resp, genomic_dup2_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup2_lse) + assertion_checks(resp, genomic_dup2_lse, mane_genes_exts=True) # Free text for q in ["DMD g.33211290_33211293dup", "DMD g.33229407_33229410dup"]: # 37 # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_dup2_free_text_default) + assertion_checks(resp, genomic_dup2_free_text_default, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_dup2_free_text_cn) + assertion_checks(resp, genomic_dup2_free_text_cn, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_dup2_free_text_default) + assertion_checks(resp, genomic_dup2_free_text_default, mane_genes_exts=True) # Greater than 100 bps -> rse q = "NC_000023.11:g.33211290_33211490dup" resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_dup2_rle2) + assertion_checks(resp, genomic_dup2_rle2, mane_genes_exts=True) # Invalid invalid_queries = [ @@ -1167,54 +1167,54 @@ async def test_genomic_del1( """Test that genomic deletion works correctly.""" q = "NC_000003.12:g.10149811del" # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del1_38_cn) + assertion_checks(resp, genomic_del1_38_cn, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030064, ) - assertion_checks(resp, genomic_del1_cx) + assertion_checks(resp, genomic_del1_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) q = "NC_000003.11:g.10191495del" # 37 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del1_38_cn) + assertion_checks(resp, genomic_del1_38_cn, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030064, ) - assertion_checks(resp, genomic_del1_cx) + assertion_checks(resp, genomic_del1_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) # Free text for q in ["VHL g.10191495del", "VHL g.10149811del"]: # 37 # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del1_free_text_lse) + assertion_checks(resp, genomic_del1_free_text_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del1_free_text_cn) + assertion_checks(resp, genomic_del1_free_text_cn, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del1_free_text_lse) + assertion_checks(resp, genomic_del1_free_text_lse, mane_genes_exts=True) # Invalid invalid_queries = [ @@ -1239,68 +1239,68 @@ async def test_genomic_del2( """Test that genomic deletion works correctly.""" q = "NC_000003.12:g.10146595_10146613del" # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del2_38_cn) + assertion_checks(resp, genomic_del2_38_cn, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030069, ) - assertion_checks(resp, genomic_del2_cx) + assertion_checks(resp, genomic_del2_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) q = "NC_000003.11:g.10188279_10188297del" # 37 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del2_38_cn) + assertion_checks(resp, genomic_del2_38_cn, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE, copy_change=models.CopyChange.EFO_0030069, ) - assertion_checks(resp, genomic_del2_cx) + assertion_checks(resp, genomic_del2_cx, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) # Free text for q in ["VHL g.10188279_10188297del", "VHL g.10146595_10146613del"]: # 37 # 38 resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del2_free_text_default) + assertion_checks(resp, genomic_del2_free_text_default, mane_genes_exts=True) resp = await test_handler.normalize( q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 ) - assertion_checks(resp, genomic_del2_free_text_cnv) + assertion_checks(resp, genomic_del2_free_text_cnv, mane_genes_exts=True) resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) - assertion_checks(resp, genomic_del2_free_text_default) + assertion_checks(resp, genomic_del2_free_text_default, mane_genes_exts=True) # Check that del > 100 bps returns LSE q = "NC_000023.11:g.33211290_33211490del" resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_del2_lse2) + assertion_checks(resp, genomic_del2_lse2, mane_genes_exts=True) # gnomad vcf q = "3-10146594-AATGTTGACGGACAGCCTAT-A" resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) q = "3-10188278-AATGTTGACGGACAGCCTAT-A" resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) # Invalid invalid_queries = [ diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 4e0f65a9..43a951ee 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -588,13 +588,13 @@ async def test_cdna_and_genomic_substitution( assertion_checks(resp, braf_v600e_nucleotide) resp = await test_handler.normalize("NC_000007.13:g.140453136A>T") - assertion_checks(resp, braf_v600e_genomic_sub) + assertion_checks(resp, braf_v600e_genomic_sub, mane_genes_exts=True) resp = await test_handler.normalize("7-140453136-A-T") # 37 - assertion_checks(resp, braf_v600e_genomic_sub) + assertion_checks(resp, braf_v600e_genomic_sub, mane_genes_exts=True) resp = await test_handler.normalize("7-140753336-A-T") # 38 - assertion_checks(resp, braf_v600e_genomic_sub) + assertion_checks(resp, braf_v600e_genomic_sub, mane_genes_exts=True) resp = await test_handler.normalize("BRAF V600E (g.140453136A>T)") assertion_checks(resp, braf_v600e_nucleotide) @@ -604,7 +604,7 @@ async def test_cdna_and_genomic_substitution( # More than 1 gene (EGFR and EGFR-AS1) resp = await test_handler.normalize("NC_000007.13:g.55249071C>T") - assertion_checks(resp, genomic_sub_grch38) + assertion_checks(resp, genomic_sub_grch38, mane_genes_exts=True) resp = await test_handler.normalize("EGFR g.55249071C>T") assertion_checks(resp, genomic_substitution) @@ -612,7 +612,7 @@ async def test_cdna_and_genomic_substitution( # MNV genomic substitution (CA009580) q = "5-112175770-GGAA-AGAA" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_sub_mnv) + assertion_checks(resp, gnomad_vcf_genomic_sub_mnv, mane_genes_exts=True) @pytest.mark.asyncio() @@ -637,20 +637,17 @@ async def test_genomic_reference_agree( ): """Test that genomic reference agree normalizes correctly.""" resp = await test_handler.normalize("NC_000007.13:g.140453136=") - assertion_checks( - resp, - grch38_braf_genom_reference_agree, - ) + assertion_checks(resp, grch38_braf_genom_reference_agree, mane_genes_exts=True) resp = await test_handler.normalize("7-140453136-A-A") - assertion_checks(resp, grch38_braf_genom_reference_agree) + assertion_checks(resp, grch38_braf_genom_reference_agree, mane_genes_exts=True) resp = await test_handler.normalize("7-140753336-A-A") - assertion_checks(resp, grch38_braf_genom_reference_agree) + assertion_checks(resp, grch38_braf_genom_reference_agree, mane_genes_exts=True) q = "7-140753336-ACT-ACT" resp = await test_handler.normalize(q) - assertion_checks(resp, grch38_braf_genom_reference_agree) + assertion_checks(resp, grch38_braf_genom_reference_agree, mane_genes_exts=True) resp = await test_handler.normalize("BRAF g.140453136=") assertion_checks(resp, cdna_reference_agree) @@ -685,14 +682,14 @@ async def test_genomic_delins( ): """Test that Genomic DelIns normalizes correctly.""" resp = await test_handler.normalize("NC_000007.13:g.140453135_140453136delinsAT") - assertion_checks(resp, grch38_genomic_delins1) + assertion_checks(resp, grch38_genomic_delins1, mane_genes_exts=True) resp = await test_handler.normalize("NC_000003.12:g.10149938delinsAA") - assertion_checks(resp, grch38_genomic_delins2) + assertion_checks(resp, grch38_genomic_delins2, mane_genes_exts=True) q = "3-10149938-C-AA" resp = await test_handler.normalize(q) - assertion_checks(resp, grch38_genomic_delins2) + assertion_checks(resp, grch38_genomic_delins2, mane_genes_exts=True) q = "BRAF g.140453135_140453136delinsAT" resp = await test_handler.normalize(q) @@ -702,11 +699,11 @@ async def test_genomic_delins( # allele?hgvsOrDescriptor=NM_000249.3%3Ac.489_498delinsGAGGCTTT q = "3-37050340-AAAAGCTTTA-GAGGCTTT" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_delins1) + assertion_checks(resp, gnomad_vcf_genomic_delins1, mane_genes_exts=True) q = "16-68846036-AG-TGAGTTT" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_delins2) + assertion_checks(resp, gnomad_vcf_genomic_delins2, mane_genes_exts=True) # NC_000023.10:g.70350063_70350064delinsAGGCAGCGCATAAAGCGCATTCTCCG # NC_000023.10:g.70350063_70350064insGGCAGCGCATAAAGCGCATTCTCC @@ -714,33 +711,33 @@ async def test_genomic_delins( # allele?hgvsOrDescriptor=NC_000023.11%3Ag.71130213_71130214insGGCAGCGCATAAAGCGCATTCTCC noqa: E501 q = "X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_delins3) + assertion_checks(resp, gnomad_vcf_genomic_delins3, mane_genes_exts=True) # CA523275412 q = "1-55509715-AC-A" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_delins4) + assertion_checks(resp, gnomad_vcf_genomic_delins4, mane_genes_exts=True) # CA497925643 q = "17-7578455-CGCGG-CGCG" resp = await test_handler.normalize(q) - assertion_checks(resp, gnomad_vcf_genomic_delins5) + assertion_checks(resp, gnomad_vcf_genomic_delins5, mane_genes_exts=True) q = "3-10146594-AATGTTGACGGACAGCCTAT-A" resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) q = "3-10188278-AATGTTGACGGACAGCCTAT-A" resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_del2_lse) + assertion_checks(resp, genomic_del2_lse, mane_genes_exts=True) q = "3-10149810-CT-C" # 38 resp = await test_handler.normalize(q) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) # gnomad should always return lse even if provided other hgvs dup del mode option resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_COUNT) - assertion_checks(resp, genomic_del1_lse) + assertion_checks(resp, genomic_del1_lse, mane_genes_exts=True) @pytest.mark.asyncio() @@ -806,7 +803,7 @@ async def test_genomic_deletion(test_handler, genomic_deletion): # CA915940709 q = "NC_000003.12:g.10146527_10146528del" resp1 = await test_handler.normalize(q) - assertion_checks(resp1, genomic_deletion) + assertion_checks(resp1, genomic_deletion, mane_genes_exts=True) resp2 = await test_handler.normalize("NC_000003.12:g.10146527_10146528delCT") assert resp2.variation.id == resp1.variation.id @@ -853,14 +850,14 @@ async def test_genomic_insertion( resp = await test_handler.normalize( "NC_000017.10:g.37880993_37880994insGCTTACGTGATG" ) - assertion_checks(resp, grch38_genomic_insertion_variation) + assertion_checks(resp, grch38_genomic_insertion_variation, mane_genes_exts=True) resp = await test_handler.normalize("ERBB2 g.37880993_37880994insGCTTACGTGATG") assertion_checks(resp, genomic_insertion) q = "17-37880993-G-GGCTTACGTGATG" resp = await test_handler.normalize(q) - assertion_checks(resp, grch38_genomic_insertion_variation) + assertion_checks(resp, grch38_genomic_insertion_variation, mane_genes_exts=True) @pytest.mark.asyncio()