diff --git a/Pipfile b/Pipfile index f6b798af..05f48d94 100644 --- a/Pipfile +++ b/Pipfile @@ -14,7 +14,6 @@ hgvs = "*" pydantic = "*" fastapi = "*" uvicorn = "*" -gene-normalizer = ">=0.1.40.dev1, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8, != 0.2.9, != 0.2.10" "ga4gh.vrs" = "*" [dev-packages] diff --git a/cool_seq_tool/app.py b/cool_seq_tool/app.py index 28e7b734..fe7fabb0 100644 --- a/cool_seq_tool/app.py +++ b/cool_seq_tool/app.py @@ -4,8 +4,6 @@ import logging from biocommons.seqrepo import SeqRepo -from gene.query import QueryHandler as GeneQueryHandler -from gene.database import create_db from cool_seq_tool.mappers import ( MANETranscript, AlignmentMapper, ExonGenomicCoordsMapper @@ -29,7 +27,7 @@ def __init__( transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH, lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH, mane_data_path: Path = MANE_SUMMARY_PATH, - db_url: str = UTA_DB_URL, gene_query_handler: Optional[GeneQueryHandler] = None, + db_url: str = UTA_DB_URL, sr: Optional[SeqRepo] = None ) -> None: """Initialize CoolSeqTool class @@ -39,9 +37,6 @@ def __init__( :param mane_data_path: Path to RefSeq MANE summary data :param db_url: PostgreSQL connection URL Format: `driver://user:password@host/database/schema` - :param gene_query_handler: Gene normalizer query handler instance. If this is - provided, will use a current instance. If this is not provided, will create - a new instance. :param sr: SeqRepo instance. If this is not provided, will create a new instance """ if not sr: @@ -53,13 +48,10 @@ def __init__( self.mane_transcript_mappings = MANETranscriptMappings( mane_data_path=mane_data_path) self.uta_db = UTADatabase(db_url=db_url) - if not gene_query_handler: - gene_query_handler = GeneQueryHandler(create_db()) - self.gene_query_handler = gene_query_handler self.alignment_mapper = AlignmentMapper( self.seqrepo_access, self.transcript_mappings, self.uta_db) self.mane_transcript = MANETranscript( self.seqrepo_access, self.transcript_mappings, - self.mane_transcript_mappings, self.uta_db, self.gene_query_handler) + self.mane_transcript_mappings, self.uta_db) self.exon_genomic_coords_mapper = ExonGenomicCoordsMapper(self.uta_db, self.mane_transcript) diff --git a/cool_seq_tool/mappers/mane_transcript.py b/cool_seq_tool/mappers/mane_transcript.py index f7a00ad0..04520027 100644 --- a/cool_seq_tool/mappers/mane_transcript.py +++ b/cool_seq_tool/mappers/mane_transcript.py @@ -12,7 +12,6 @@ from typing import Optional, Set, Tuple, Dict, List, Union import pandas as pd -from gene.query import QueryHandler as GeneQueryHandler from cool_seq_tool.schemas import ( AnnotationLayer, Assembly, ResidueMode, TranscriptPriorityLabel @@ -27,20 +26,13 @@ logger = logging.getLogger(__name__) -class MANETranscriptError(Exception): - """Custom exception for MANETranscript class""" - - pass - - class MANETranscript: """Class for retrieving MANE transcripts.""" def __init__(self, seqrepo_access: SeqRepoAccess, transcript_mappings: TranscriptMappings, mane_transcript_mappings: MANETranscriptMappings, - uta_db: UTADatabase, - gene_query_handler: GeneQueryHandler) -> None: + uta_db: UTADatabase) -> None: """Initialize the MANETranscript class. :param seqrepo_access: Access to seqrepo queries @@ -49,13 +41,11 @@ def __init__(self, seqrepo_access: SeqRepoAccess, :param mane_transcript_mappings: Access to MANE Transcript accession mapping data :param uta_db: UTADatabase instance to give access to query UTA database - :param gene_query_handler: Access to Gene Normalizer """ self.seqrepo_access = seqrepo_access self.transcript_mappings = transcript_mappings self.mane_transcript_mappings = mane_transcript_mappings self.uta_db = uta_db - self.gene_query_handler = gene_query_handler @staticmethod def _get_reading_frame(pos: int) -> int: @@ -869,126 +859,3 @@ async def g_to_mane_c( refseq_c_ac=current_mane_data["RefSeq_nuc"], ensembl_c_ac=current_mane_data["Ensembl_nuc"], alt_ac=grch38["ac"] if grch38 else None) - - # Will be added once Chromosome Locations are added back to VRS 2.0-alpha - # def _get_hgnc_data(self, gene: str) -> Dict: - # """Return HGNC data for a given gene - - # :param gene: Gene query - # :return: HGNC data - # """ - # hgnc_data = {} - # gene_resp = self.gene_query_handler.normalize_unmerged(gene) - # hgnc_matches = gene_resp.source_matches.get(SourceName.HGNC) - # if hgnc_matches and hgnc_matches.records: - # hgnc_data = hgnc_matches.records[0].dict() - # else: - # logger.warning(f"Unable to get HGNC symbol for {gene}") - # return hgnc_data - - # async def get_mapped_mane_data( - # self, gene: str, assembly: Assembly, genomic_position: int, - # residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE - # ) -> Optional[MappedManeData]: - # """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, # noqa: E501 - # will return mapped MANE data. - - # :param str gene: Gene symbol or identifier - # :param Assembly assembly: Assembly for the provided genomic position - # :param int genomic_position: Position on the genomic reference sequence to find # noqa: E501 - # MANE data for - # :param ResidueMode residue_mode: Starting residue mode for `start_pos` - # and `end_pos`. Will always return coordinates in inter-residue - # :return: Mapped MANE or Longest Compatible Remaining data if found/compatible. - # MANETranscriptError will be raised if unable to get required data for - # retrieving mapped MANE data. - # """ - # hgnc_gene_data = self._get_hgnc_data(gene) - # if not hgnc_gene_data: - # raise MANETranscriptError(f"Unable to get HGNC data for gene: {gene}") - - # gene = hgnc_gene_data["symbol"] - - # mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene) - # if not mane_data: - # raise MANETranscriptError(f"Unable to get MANE data for gene: {gene}") - - # mane_data_len = len(mane_data) - - # alt_ac = None - # if hgnc_gene_data["locations"]: - # chr = hgnc_gene_data["locations"][0].get("chr") or "" - # alt_acs, _ = self.seqrepo_access.translate_identifier( - # f"{assembly.value}:{chr}", "refseq" - # ) - # if alt_acs: - # alt_ac = alt_acs[0].split(":")[1] - # else: - # raise MANETranscriptError(f"Unable to translate identifier for: " - # f"{assembly}:{chr}") - # else: - # raise MANETranscriptError("Unable to get HGNC gene location data") - - # inter_residue_pos, _ = get_inter_residue_pos(genomic_position, residue_mode) - # g_pos = inter_residue_pos[0] - - # mane_transcripts = set() - # for i in range(mane_data_len): - # index = mane_data_len - i - 1 - # current_mane_data = mane_data[index] - # mane_transcripts |= set((current_mane_data["RefSeq_nuc"], - # current_mane_data["Ensembl_nuc"])) - # mane_c_ac = current_mane_data["RefSeq_nuc"] - - # ac_query = mane_c_ac.split(".")[0] - # tx_exon_aln_v_data = await self.uta_db.get_tx_exon_aln_v_data( - # ac_query, g_pos, g_pos, alt_ac, False, True) - - # if not tx_exon_aln_v_data: - # continue - # else: - # len_of_aligned_data = len(tx_exon_aln_v_data) - # if len_of_aligned_data == 1: - # tx_exon_aln_v_data = tx_exon_aln_v_data[0] - # else: - # logger.debug(f"Found {len_of_aligned_data} records for aligned " - # f"mapped MANE data for {ac_query}, {g_pos}, {alt_ac}") # noqa: E501 - - # # Try checking for MANE match - # filter_data = list(filter(lambda x: x[1] == mane_c_ac, - # tx_exon_aln_v_data)) - # if filter_data: - # tx_exon_aln_v_data = filter_data[0] - # else: - # # Try checking for older versions of MANE - # filter_data = list(filter(lambda x: x[1].startswith( - # mane_c_ac.split(".")[0]), tx_exon_aln_v_data)) - # if filter_data: - # filter_data.sort(key=lambda x: x[1], reverse=True) - # tx_exon_aln_v_data = filter_data[0] - # return MappedManeData( - # gene=gene, - # refseq=current_mane_data["RefSeq_nuc"], - # ensembl=current_mane_data["Ensembl_nuc"], - # strand="-" if tx_exon_aln_v_data[7] == -1 else "+", - # status="_".join(current_mane_data["MANE_status"].split()).lower(), - # alt_ac=alt_ac, - # assembly=assembly.value - # ) - - # lcr_data = await self.get_longest_compatible_transcript( - # gene, g_pos, g_pos, AnnotationLayer.GENOMIC, - # residue_mode=ResidueMode.INTER_RESIDUE, mane_transcripts=mane_transcripts, - # alt_ac=alt_ac) - # if lcr_data: - # return MappedManeData( - # gene=gene, - # refseq=lcr_data["refseq"], - # ensembl=lcr_data["ensembl"], - # strand=lcr_data["strand"], - # status=lcr_data["status"], - # alt_ac=alt_ac, - # assembly=assembly.value - # ) - - # return None diff --git a/cool_seq_tool/routers/mane.py b/cool_seq_tool/routers/mane.py index 7149b8fd..ed1ef6f6 100644 --- a/cool_seq_tool/routers/mane.py +++ b/cool_seq_tool/routers/mane.py @@ -76,53 +76,3 @@ async def get_mane_data( warnings=warnings, service_meta=cool_seq_tool.service_meta() ) - - -# @router.get( -# "/get_mapped_mane_data", -# summary="Retrieve MANE Transcript mapped to a given assembly", -# response_description=RESP_DESCR, -# description="Return mapped MANE Transcript data to a given assembly", -# response_model=MappedManeDataService, -# tags=[Tags.MANE_TRANSCRIPT] -# ) -# async def get_mapped_mane_data( -# gene: str = Query(..., description="HGNC Symbol or Identifier"), -# assembly: Assembly = Query(..., description="Genomic assembly to use"), -# genomic_position: int = Query(..., description="Genomic position associated to the given gene and assembly"), # noqa: E501 -# residue_mode: ResidueMode = Query(ResidueMode.INTER_RESIDUE, -# description="Residue mode for `genomic_position`") # noqa: E501 -# ) -> MappedManeDataService: -# """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, -# will return mapped MANE data. - -# :param str gene: HGNC symbol or identifier -# :param Assembly assembly: Assembly for the provided genomic position -# :param int genomic_position: Position on the genomic reference sequence to find -# MANE data for -# :param ResidueMode residue_mode: Starting residue mode for `start_pos` -# and `end_pos`. Will always return coordinates in inter-residue -# :return: Mapped MANE or Longest Compatible Remaining data -# """ -# warnings: List = list() -# mapped_mane_data = None -# try: -# mapped_mane_data = await cool_seq_tool.mane_transcript.get_mapped_mane_data( -# gene, assembly, genomic_position, residue_mode) -# if not mapped_mane_data: -# warnings.append(f"Unable to find mapped data for gene {gene} at position " -# f"{genomic_position} ({residue_mode} coordinates) on " -# f"assembly {assembly}") -# except MANETranscriptError as e: -# e = str(e) -# logger.exception(e) -# warnings.append(e) -# except Exception as e: -# logger.exception(f"get_mapped_mane_data unhandled exception {e}") -# warnings.append(UNHANDLED_EXCEPTION_MSG) - -# return MappedManeDataService( -# mapped_mane_data=mapped_mane_data, -# warnings=warnings, -# service_meta=cool_seq_tool.service_meta() -# ) diff --git a/setup.cfg b/setup.cfg index e36a3e4c..279b0142 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,6 @@ install_requires = pydantic uvicorn fastapi - gene-normalizer >=0.1.40.dev1, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8, != 0.2.9, != 0.2.10 ga4gh.vrs [options.package_data] diff --git a/tests/conftest.py b/tests/conftest.py index 80f76b9a..3b0d815a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,9 +42,3 @@ def test_transcript_mappings(test_cool_seq_tool): def test_mane_transcript_mappings(test_cool_seq_tool): """Create MANE Transcript Mappings test fixture""" return test_cool_seq_tool.mane_transcript_mappings - - -@pytest.fixture(scope="session") -def test_gene_query_handler(test_cool_seq_tool): - """Create Gene Query Handler test fixture""" - return test_cool_seq_tool.gene_query_handler diff --git a/tests/mappers/test_mane_transcript.py b/tests/mappers/test_mane_transcript.py index 460c5a22..78263b19 100644 --- a/tests/mappers/test_mane_transcript.py +++ b/tests/mappers/test_mane_transcript.py @@ -5,9 +5,8 @@ from mock import patch import pandas as pd -from cool_seq_tool.mappers.mane_transcript import MANETranscriptError from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess -from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode +from cool_seq_tool.schemas import AnnotationLayer @pytest.fixture(scope="module") @@ -565,70 +564,6 @@ async def test_g_to_mane_c(test_mane_transcript, egfr_l858r_mane_c, } -@pytest.mark.skipif(True, reason="chromosome locations not supported in 2.0-alpha") -@pytest.mark.asyncio -async def test_get_mapped_mane_data(test_mane_transcript): - """Test that get_mapped_mane_data works correctly""" - resp = await test_mane_transcript.get_mapped_mane_data( - "braf", Assembly.GRCH38, 140785808, ResidueMode.INTER_RESIDUE) - assert resp.model_dump() == { - "gene": "BRAF", - "refseq": "NM_001374258.1", - "ensembl": "ENST00000644969.2", - "strand": "-", - "status": "mane_plus_clinical", - "alt_ac": "NC_000007.14", - "assembly": "GRCh38" - } - - resp = await test_mane_transcript.get_mapped_mane_data( - "Braf", Assembly.GRCH37, 140485608, ResidueMode.INTER_RESIDUE) - assert resp.model_dump() == { - "gene": "BRAF", - "refseq": "NM_001374258.1", - "ensembl": "ENST00000644969.2", - "strand": "-", - "status": "mane_plus_clinical", - "alt_ac": "NC_000007.13", - "assembly": "GRCh37" - } - - resp = await test_mane_transcript.get_mapped_mane_data( - "BRAF", Assembly.GRCH38, 140783157, ResidueMode.INTER_RESIDUE) - assert resp.model_dump() == { - "gene": "BRAF", - "refseq": "NM_004333.6", - "ensembl": "ENST00000646891.2", - "strand": "-", - "status": "mane_select", - "alt_ac": "NC_000007.14", - "assembly": "GRCh38" - } - - resp = await test_mane_transcript.get_mapped_mane_data( - "BRAF", Assembly.GRCH37, 140482958, ResidueMode.RESIDUE) - assert resp.model_dump() == { - "gene": "BRAF", - "refseq": "NM_004333.6", - "ensembl": "ENST00000646891.2", - "strand": "-", - "status": "mane_select", - "alt_ac": "NC_000007.13", - "assembly": "GRCh37" - } - - # Invalid coord given assembly, so no result should be found - resp = await test_mane_transcript.get_mapped_mane_data( - "BRAF", Assembly.GRCH38, 140482957, ResidueMode.INTER_RESIDUE) - assert resp is None - - # Invalid gene - with pytest.raises(MANETranscriptError) as e: - await test_mane_transcript.get_mapped_mane_data( - "dummy", Assembly.GRCH37, 140482958, ResidueMode.RESIDUE) - assert str(e.value) == "Unable to get HGNC data for gene: dummy" - - @pytest.mark.asyncio async def test_valid(test_mane_transcript): """Test that valid queries do not raise any exceptions"""