diff --git a/Pipfile b/Pipfile index 0815201f..0197868e 100644 --- a/Pipfile +++ b/Pipfile @@ -14,8 +14,9 @@ hgvs = "*" pydantic = "==1.*" fastapi = "*" uvicorn = "*" -gene-normalizer = "==0.1.39" +gene-normalizer = "==0.1.*" "ga4gh.vrs" = "*" +"ga4gh.vrsatile.pydantic" = "==0.0.*" [dev-packages] cool_seq_tool = {editable = true, path = "."} diff --git a/cool_seq_tool/data_sources/feature_overlap.py b/cool_seq_tool/data_sources/feature_overlap.py index 90781087..b5171636 100644 --- a/cool_seq_tool/data_sources/feature_overlap.py +++ b/cool_seq_tool/data_sources/feature_overlap.py @@ -4,10 +4,12 @@ from typing import Dict, Optional import pandas as pd +from ga4gh.core import ga4gh_identify +from ga4gh.vrs import models from cool_seq_tool.data_sources import SeqRepoAccess from cool_seq_tool.paths import MANE_REFSEQ_GFF_PATH -from cool_seq_tool.schemas import ResidueMode +from cool_seq_tool.schemas import Assembly, ResidueMode class FeatureOverlapError(Exception): @@ -36,7 +38,8 @@ def _load_mane_refseq_gff_data(self) -> pd.core.frame.DataFrame: :return: DataFrame containing MANE RefSeq GFF data for CDS. Columsn include `type`, `chromosome` (chromosome without 'chr' prefix), `cds_start`, - `cds_stop`, `info_name` (name of record), and `gene` + `cds_stop`, `info_name` (name of record), and `gene`. `cds_start` and + `cds_stop` use inter-residue coordinates. """ df = pd.read_csv( self.mane_refseq_gff_path, @@ -69,10 +72,11 @@ def _load_mane_refseq_gff_data(self) -> pd.core.frame.DataFrame: df["cds_start"] = df["cds_start"].astype(int) df["cds_stop"] = df["cds_stop"].astype(int) + # Convert to inter-residue coordinates + df["cds_start"] -= 1 + # Only retain certain columns - df = df[ - ["type", "chromosome", "cds_start", "cds_stop", "info_name", "gene"] - ] + df = df[["type", "chromosome", "cds_start", "cds_stop", "info_name", "gene"]] return df @@ -84,7 +88,7 @@ def _get_chr_from_alt_ac(self, identifier: str) -> str: :return: Chromosome. 1..22, X, Y. No 'chr' prefix. """ aliases, error_msg = self.seqrepo_access.translate_identifier( - identifier, "GRCh38" + identifier, Assembly.GRCH38.value ) if error_msg: @@ -92,10 +96,10 @@ def _get_chr_from_alt_ac(self, identifier: str) -> str: if not aliases: raise FeatureOverlapError( - f"Unable to find GRCh38 aliases for: {identifier}" + f"Unable to find {Assembly.GRCH38.value} aliases for: {identifier}" ) - chr_pattern = r"^GRCh38:(?PX|Y|([1-9]|1[0-9]|2[0-2]))$" + chr_pattern = rf"^{Assembly.GRCH38.value}:(?PX|Y|([1-9]|1[0-9]|2[0-2]))$" # noqa: E501 for a in aliases: chr_match = re.match(chr_pattern, a) if chr_match: @@ -103,13 +107,13 @@ def _get_chr_from_alt_ac(self, identifier: str) -> str: if not chr_match: raise FeatureOverlapError( - f"Unable to find GRCh38 chromosome for: {identifier}" + f"Unable to find {Assembly.GRCH38.value} chromosome for: {identifier}" ) chr_groupdict = chr_match.groupdict() return chr_groupdict["chromosome"] - def get_grch38_cds_overlap( + def get_grch38_mane_cds_overlap( self, start: int, end: int, @@ -117,43 +121,49 @@ def get_grch38_cds_overlap( identifier: Optional[str] = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, ) -> Optional[Dict]: - """Get feature overlap for GRCh38 genomic data + """Given GRCh38 genomic data, find the overlapping MANE features (gene and cds) :param start: GRCh38 start position :param end: GRCh38 end position :param chromosome: Chromosome. 1..22, X, or Y. If not provided, must provide `identifier`. If both `chromosome` and `identifier` are provided, `chromosome` will be used. - :param identifier: Genomic identifier on GRCh38 assembly. If not provided, - must identifier `chromosome`. If both `chromosome` and `identifier` are - provided, `chromosome` will be used. + :param identifier: Genomic identifier on GRCh38 assembly. If not provided, must + provide `chromosome`. If both `chromosome` and `identifier` are provided, + `chromosome` will be used. :param residue_mode: Residue mode for `start` and `end` - :raise FeatureOverlapError: If missing required fields - :return: Feature overlap dictionary where the key is the gene name and the value - is the list of CDS overlap (cds_start, cds_stop, overlap_start, - overlap_stop). Will return residue coordinates. + :raise FeatureOverlapError: If missing required fields or unable to find + associated ga4gh identifier + :return: MANE feature (gene/cds) overlap data represented as a dict + { + gene: { + 'cds': VRS Sequence Location + 'overlap': VRS Sequence Location + } + } """ + ga4gh_seq_id = None if chromosome: if not re.match(r"^X|Y|([1-9]|1[0-9]|2[0-2])$", chromosome): raise FeatureOverlapError("`chromosome` must be 1, ..., 22, X, or Y") else: if identifier: chromosome = self._get_chr_from_alt_ac(identifier) + if identifier.startswith("ga4gh:SQ."): + ga4gh_seq_id = identifier else: raise FeatureOverlapError( "Must provide either `chromosome` or `identifier`" ) - # GFF is 1-based, so we need to convert inter-residue to residue - # RESIDUE | | 1 | | 2 | | 3 | | - # INTER_RESIDUE | 0 | | 1 | | 2 | | 3 | - if residue_mode == ResidueMode.INTER_RESIDUE: - if start != end: - start += 1 - else: - end += 1 + # Convert residue to inter-residue + if residue_mode == ResidueMode.RESIDUE: + if start == end: + start -= 1 + + start -= 1 - # Get feature dataframe + # Get feature dataframe (df uses inter-residue) feature_df = self.df[ (self.df["chromosome"] == chromosome) & (self.df["cds_start"] <= end) # noqa: W503 @@ -171,10 +181,57 @@ def get_grch38_cds_overlap( lambda x: end if end <= x else x ) - return ( - feature_df.groupby(["gene"])[ - ["info_name", "cds_start", "cds_stop", "overlap_start", "overlap_stop"] - ] - .apply(lambda x: x.set_index("info_name").to_dict(orient="records")) - .to_dict() - ) + # Get ga4gh identifier for chromosome + if not ga4gh_seq_id: + grch38_chr = f"{Assembly.GRCH38.value}:{chromosome}" + ga4gh_aliases, error_msg = self.seqrepo_access.translate_identifier( + grch38_chr, "ga4gh" + ) + + # Errors should never happen but catching just in case + if error_msg: + raise FeatureOverlapError(str(error_msg)) + elif not ga4gh_aliases: + raise FeatureOverlapError( + f"Unable to find ga4gh identifier for: {grch38_chr}" + ) + + ga4gh_seq_id = ga4gh_aliases[0] + + def _get_seq_loc(start_pos: int, stop_pos: int, ga4gh_sequence_id: str) -> Dict: + """Get VRS Sequence Location represented as a dict + + :param start_pos: Start position + :param stop_pos: Stop position + :param ga4gh_sequence_id: ga4gh sequence identifier + :return: VRS Sequence Location represented as dictionary with the ga4gh ID + included + """ + _sl = models.SequenceLocation( + sequence_id=ga4gh_sequence_id, + interval=models.SequenceInterval( + start=models.Number(value=start_pos), + end=models.Number(value=stop_pos), + ), + ) + _sl._id = ga4gh_identify(_sl) + return _sl.as_dict() + + resp = {} + for gene, group in feature_df.groupby("gene"): + _gene_overlap_data = [] + + for cds_row in group.itertuples(): + _gene_overlap_data.append( + { + "cds": _get_seq_loc( + cds_row.cds_start, cds_row.cds_stop, ga4gh_seq_id + ), + "overlap": _get_seq_loc( + cds_row.overlap_start, cds_row.overlap_stop, ga4gh_seq_id + ), + } + ) + resp[gene] = _gene_overlap_data + + return resp diff --git a/setup.cfg b/setup.cfg index aae3eeba..1fe24254 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,8 +23,9 @@ install_requires = pydantic ==1.* uvicorn fastapi - gene-normalizer ==0.1.39 + gene-normalizer ==0.1.* ga4gh.vrs + ga4gh.vrsatile.pydantic ==0.0.* [options.package_data] cool_seq_tool = diff --git a/tests/unit/test_feature_overlap.py b/tests/unit/test_feature_overlap.py index b2a7b888..b979311b 100644 --- a/tests/unit/test_feature_overlap.py +++ b/tests/unit/test_feature_overlap.py @@ -79,24 +79,40 @@ def test_get_chr_from_alt_ac(test_feature_overlap): def test_get_grch38_cds_overlap(test_feature_overlap): - """Test that get_grch38_cds_overlap works correctly""" + """Test that get_grch38_mane_cds_overlap works correctly""" # Variant fully contains exon (negative strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 140726490, 140726520, identifier="ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" ) assert resp == { "BRAF": [ { - "cds_start": 140726494, - "cds_stop": 140726516, - "overlap_start": 140726494, - "overlap_stop": 140726516, + "cds": { + "_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, } ] } # Using inter-residue (start == stop) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 140726500, 140726500, identifier="ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", @@ -105,157 +121,335 @@ def test_get_grch38_cds_overlap(test_feature_overlap): assert resp == { "BRAF": [ { - "cds_start": 140726494, - "cds_stop": 140726516, - "overlap_start": 140726500, - "overlap_stop": 140726501, + "cds": { + "_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.EqiyoLjrKnKg5F56bjRlBFBUihSkgX5w", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726500, "type": "Number"}, + "end": {"value": 140726500, "type": "Number"}, + }, + }, } ] } # Variant is fully contained within exon (positive strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 55019308, 55019341, chromosome="7" ) assert resp == { "EGFR": [ { - "cds_start": 55019278, - "cds_stop": 55019365, - "overlap_start": 55019308, - "overlap_stop": 55019341, + "cds": { + "_id": "ga4gh:VSL.fLukxPP69_vJU-1EYdNgM2waELFsJ0gI", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 55019277, "type": "Number"}, + "end": {"value": 55019365, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.7CpeGak8icEAK4Eobs3ChFo9jqKdSidh", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 55019307, "type": "Number"}, + "end": {"value": 55019341, "type": "Number"}, + }, + }, } ] } # Variant partially overlaps with exon, from the exon's start side (negative strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 140726503, 140726520, chromosome="7" ) assert resp == { "BRAF": [ { - "cds_start": 140726494, - "cds_stop": 140726516, - "overlap_start": 140726503, - "overlap_stop": 140726516, + "cds": { + "_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.l48V673TzeyfKeLfm3JUN-VTdEPqv80p", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726502, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, } ] } # Variant partially overlaps with exon, from the exon's stop side (negative strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 140726490, 140726505, identifier="NC_000007.14" ) assert resp == { "BRAF": [ { - "cds_start": 140726494, - "cds_stop": 140726516, - "overlap_start": 140726494, - "overlap_stop": 140726505, + "cds": { + "_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726516, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.QWkbljVU4ijJQsDS3vTVVfslmuuGbtEd", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"value": 140726493, "type": "Number"}, + "end": {"value": 140726505, "type": "Number"}, + }, + }, } ] } # Variant overlaps with multiple exons (positive strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 21522390, 21523491, chromosome="Y" ) assert resp == { "RBMY1B": [ { - "cds_start": 21522383, - "cds_stop": 21522493, - "overlap_start": 21522390, - "overlap_stop": 21522493, + "cds": { + "_id": "ga4gh:VSL.b3uUu78bDJGPE2XnzWB4BHaIhP2PZRS1", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21522382, "type": "Number"}, + "end": {"value": 21522493, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.-NoMsiuXihe5cwF2ANgmFMDlji3k3qwC", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21522389, "type": "Number"}, + "end": {"value": 21522493, "type": "Number"}, + }, + }, }, { - "cds_start": 21522935, - "cds_stop": 21523045, - "overlap_start": 21522935, - "overlap_stop": 21523045, + "cds": { + "_id": "ga4gh:VSL.07koDLghr5iKDUK2WtK4R-MFonLIrAAa", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21522934, "type": "Number"}, + "end": {"value": 21523045, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.07koDLghr5iKDUK2WtK4R-MFonLIrAAa", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21522934, "type": "Number"}, + "end": {"value": 21523045, "type": "Number"}, + }, + }, }, { - "cds_start": 21523480, - "cds_stop": 21523590, - "overlap_start": 21523480, - "overlap_stop": 21523491, + "cds": { + "_id": "ga4gh:VSL.QtREnzEQbynxC-nvQLf9hm_3b3kPjBNI", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21523479, "type": "Number"}, + "end": {"value": 21523590, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.rdFImJ11G9vMjIeu_lMoeeBtDWuw5ssk", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "type": "SequenceInterval", + "start": {"value": 21523479, "type": "Number"}, + "end": {"value": 21523491, "type": "Number"}, + }, + }, }, ] } # Variant overlaps with multiple exons (negative strand) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 154779177, 154781317, chromosome="X" ) assert resp == { "MPP1": [ { - "cds_start": 154781239, - "cds_stop": 154781313, - "overlap_start": 154781239, - "overlap_stop": 154781313, + "cds": { + "_id": "ga4gh:VSL.xwoKqCpJnxAMMx0BNDrsG-T05fFs3vzJ", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "interval": { + "type": "SequenceInterval", + "start": {"value": 154781238, "type": "Number"}, + "end": {"value": 154781313, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.xwoKqCpJnxAMMx0BNDrsG-T05fFs3vzJ", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "interval": { + "type": "SequenceInterval", + "start": {"value": 154781238, "type": "Number"}, + "end": {"value": 154781313, "type": "Number"}, + }, + }, }, { - "cds_start": 154779177, - "cds_stop": 154779353, - "overlap_start": 154779177, - "overlap_stop": 154779353, + "cds": { + "_id": "ga4gh:VSL.KfGMgFOaQRFXpG8X3mK75XbIEFMH6Sg9", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "interval": { + "type": "SequenceInterval", + "start": {"value": 154779176, "type": "Number"}, + "end": {"value": 154779353, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.KfGMgFOaQRFXpG8X3mK75XbIEFMH6Sg9", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "interval": { + "type": "SequenceInterval", + "start": {"value": 154779176, "type": "Number"}, + "end": {"value": 154779353, "type": "Number"}, + }, + }, }, ] } # Variant overlap with cds in multiple genes and alt chromosome accession # chr19_KI270930v1_alt with exact start/stop CDS - resp = test_feature_overlap.get_grch38_cds_overlap(135329, 135381, chromosome="19") + resp = test_feature_overlap.get_grch38_mane_cds_overlap( + 135329, 135381, chromosome="19" + ) expected = { "KIR2DL5B": [ { - "cds_start": 135329, - "cds_stop": 135381, - "overlap_start": 135329, - "overlap_stop": 135381, + "cds": { + "_id": "ga4gh:VSL.SflvFED1mwAVOHbdM4FE2phzNPRSAi_J", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"value": 135328, "type": "Number"}, + "end": {"value": 135381, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.SflvFED1mwAVOHbdM4FE2phzNPRSAi_J", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"value": 135328, "type": "Number"}, + "end": {"value": 135381, "type": "Number"}, + }, + }, } ], "FCGBP": [ { - "cds_start": 135264, - "cds_stop": 135807, - "overlap_start": 135329, - "overlap_stop": 135381, + "cds": { + "_id": "ga4gh:VSL.6SSrOjGBl-txA6rGNXVfMFvsnNksxG4K", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"value": 135263, "type": "Number"}, + "end": {"value": 135807, "type": "Number"}, + }, + }, + "overlap": { + "_id": "ga4gh:VSL.SflvFED1mwAVOHbdM4FE2phzNPRSAi_J", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"value": 135328, "type": "Number"}, + "end": {"value": 135381, "type": "Number"}, + }, + }, } ], } assert resp == expected # Using inter-residue (start != stop) - resp = test_feature_overlap.get_grch38_cds_overlap( + resp = test_feature_overlap.get_grch38_mane_cds_overlap( 135328, 135381, chromosome="19", residue_mode=ResidueMode.INTER_RESIDUE ) assert resp == expected # No overlap found - resp = test_feature_overlap.get_grch38_cds_overlap(1, 2, chromosome="19") + resp = test_feature_overlap.get_grch38_mane_cds_overlap(1, 2, chromosome="19") assert resp is None # Testing invalid # chromosome does not match regex pattern with pytest.raises(FeatureOverlapError) as e: - test_feature_overlap.get_grch38_cds_overlap( + test_feature_overlap.get_grch38_mane_cds_overlap( 154779177, 154781317, chromosome="chrX" ) assert str(e.value) == "`chromosome` must be 1, ..., 22, X, or Y" # identifier is GRCh37 with pytest.raises(FeatureOverlapError) as e: - test_feature_overlap.get_grch38_cds_overlap( + test_feature_overlap.get_grch38_mane_cds_overlap( 154779177, 154781317, identifier="NC_000023.10" ) assert str(e.value) == "Unable to find GRCh38 aliases for: NC_000023.10" # no identifier or chromosome provided with pytest.raises(FeatureOverlapError) as e: - test_feature_overlap.get_grch38_cds_overlap(154779177, 154781317) + test_feature_overlap.get_grch38_mane_cds_overlap(154779177, 154781317) assert str(e.value) == "Must provide either `chromosome` or `identifier`"