Skip to content

Commit

Permalink
rm gene-normalizer dep + get_mapped_mane_data
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Oct 6, 2023
1 parent 1894458 commit 036fdc1
Show file tree
Hide file tree
Showing 7 changed files with 4 additions and 268 deletions.
1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ hgvs = "*"
pydantic = "*"
fastapi = "*"
uvicorn = "*"
gene-normalizer = ">=0.1.40.dev1, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8, != 0.2.9, != 0.2.10"
"ga4gh.vrs" = "*"

[dev-packages]
Expand Down
12 changes: 2 additions & 10 deletions cool_seq_tool/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import logging

from biocommons.seqrepo import SeqRepo
from gene.query import QueryHandler as GeneQueryHandler
from gene.database import create_db

from cool_seq_tool.mappers import (
MANETranscript, AlignmentMapper, ExonGenomicCoordsMapper
Expand All @@ -29,7 +27,7 @@ def __init__(
transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
mane_data_path: Path = MANE_SUMMARY_PATH,
db_url: str = UTA_DB_URL, gene_query_handler: Optional[GeneQueryHandler] = None,
db_url: str = UTA_DB_URL,
sr: Optional[SeqRepo] = None
) -> None:
"""Initialize CoolSeqTool class
Expand All @@ -39,9 +37,6 @@ def __init__(
:param mane_data_path: Path to RefSeq MANE summary data
:param db_url: PostgreSQL connection URL
Format: `driver://user:password@host/database/schema`
:param gene_query_handler: Gene normalizer query handler instance. If this is
provided, will use a current instance. If this is not provided, will create
a new instance.
:param sr: SeqRepo instance. If this is not provided, will create a new instance
"""
if not sr:
Expand All @@ -53,13 +48,10 @@ def __init__(
self.mane_transcript_mappings = MANETranscriptMappings(
mane_data_path=mane_data_path)
self.uta_db = UTADatabase(db_url=db_url)
if not gene_query_handler:
gene_query_handler = GeneQueryHandler(create_db())
self.gene_query_handler = gene_query_handler
self.alignment_mapper = AlignmentMapper(
self.seqrepo_access, self.transcript_mappings, self.uta_db)
self.mane_transcript = MANETranscript(
self.seqrepo_access, self.transcript_mappings,
self.mane_transcript_mappings, self.uta_db, self.gene_query_handler)
self.mane_transcript_mappings, self.uta_db)
self.exon_genomic_coords_mapper = ExonGenomicCoordsMapper(self.uta_db,
self.mane_transcript)
135 changes: 1 addition & 134 deletions cool_seq_tool/mappers/mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import Optional, Set, Tuple, Dict, List, Union

import pandas as pd
from gene.query import QueryHandler as GeneQueryHandler

from cool_seq_tool.schemas import (
AnnotationLayer, Assembly, ResidueMode, TranscriptPriorityLabel
Expand All @@ -27,20 +26,13 @@
logger = logging.getLogger(__name__)


class MANETranscriptError(Exception):
"""Custom exception for MANETranscript class"""

pass


class MANETranscript:
"""Class for retrieving MANE transcripts."""

def __init__(self, seqrepo_access: SeqRepoAccess,
transcript_mappings: TranscriptMappings,
mane_transcript_mappings: MANETranscriptMappings,
uta_db: UTADatabase,
gene_query_handler: GeneQueryHandler) -> None:
uta_db: UTADatabase) -> None:
"""Initialize the MANETranscript class.
:param seqrepo_access: Access to seqrepo queries
Expand All @@ -49,13 +41,11 @@ def __init__(self, seqrepo_access: SeqRepoAccess,
:param mane_transcript_mappings: Access to MANE Transcript accession mapping
data
:param uta_db: UTADatabase instance to give access to query UTA database
:param gene_query_handler: Access to Gene Normalizer
"""
self.seqrepo_access = seqrepo_access
self.transcript_mappings = transcript_mappings
self.mane_transcript_mappings = mane_transcript_mappings
self.uta_db = uta_db
self.gene_query_handler = gene_query_handler

@staticmethod
def _get_reading_frame(pos: int) -> int:
Expand Down Expand Up @@ -869,126 +859,3 @@ async def g_to_mane_c(
refseq_c_ac=current_mane_data["RefSeq_nuc"],
ensembl_c_ac=current_mane_data["Ensembl_nuc"],
alt_ac=grch38["ac"] if grch38 else None)

# Will be added once Chromosome Locations are added back to VRS 2.0-alpha
# def _get_hgnc_data(self, gene: str) -> Dict:
# """Return HGNC data for a given gene

# :param gene: Gene query
# :return: HGNC data
# """
# hgnc_data = {}
# gene_resp = self.gene_query_handler.normalize_unmerged(gene)
# hgnc_matches = gene_resp.source_matches.get(SourceName.HGNC)
# if hgnc_matches and hgnc_matches.records:
# hgnc_data = hgnc_matches.records[0].dict()
# else:
# logger.warning(f"Unable to get HGNC symbol for {gene}")
# return hgnc_data

# async def get_mapped_mane_data(
# self, gene: str, assembly: Assembly, genomic_position: int,
# residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE
# ) -> Optional[MappedManeData]:
# """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given, # noqa: E501
# will return mapped MANE data.

# :param str gene: Gene symbol or identifier
# :param Assembly assembly: Assembly for the provided genomic position
# :param int genomic_position: Position on the genomic reference sequence to find # noqa: E501
# MANE data for
# :param ResidueMode residue_mode: Starting residue mode for `start_pos`
# and `end_pos`. Will always return coordinates in inter-residue
# :return: Mapped MANE or Longest Compatible Remaining data if found/compatible.
# MANETranscriptError will be raised if unable to get required data for
# retrieving mapped MANE data.
# """
# hgnc_gene_data = self._get_hgnc_data(gene)
# if not hgnc_gene_data:
# raise MANETranscriptError(f"Unable to get HGNC data for gene: {gene}")

# gene = hgnc_gene_data["symbol"]

# mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
# if not mane_data:
# raise MANETranscriptError(f"Unable to get MANE data for gene: {gene}")

# mane_data_len = len(mane_data)

# alt_ac = None
# if hgnc_gene_data["locations"]:
# chr = hgnc_gene_data["locations"][0].get("chr") or ""
# alt_acs, _ = self.seqrepo_access.translate_identifier(
# f"{assembly.value}:{chr}", "refseq"
# )
# if alt_acs:
# alt_ac = alt_acs[0].split(":")[1]
# else:
# raise MANETranscriptError(f"Unable to translate identifier for: "
# f"{assembly}:{chr}")
# else:
# raise MANETranscriptError("Unable to get HGNC gene location data")

# inter_residue_pos, _ = get_inter_residue_pos(genomic_position, residue_mode)
# g_pos = inter_residue_pos[0]

# mane_transcripts = set()
# for i in range(mane_data_len):
# index = mane_data_len - i - 1
# current_mane_data = mane_data[index]
# mane_transcripts |= set((current_mane_data["RefSeq_nuc"],
# current_mane_data["Ensembl_nuc"]))
# mane_c_ac = current_mane_data["RefSeq_nuc"]

# ac_query = mane_c_ac.split(".")[0]
# tx_exon_aln_v_data = await self.uta_db.get_tx_exon_aln_v_data(
# ac_query, g_pos, g_pos, alt_ac, False, True)

# if not tx_exon_aln_v_data:
# continue
# else:
# len_of_aligned_data = len(tx_exon_aln_v_data)
# if len_of_aligned_data == 1:
# tx_exon_aln_v_data = tx_exon_aln_v_data[0]
# else:
# logger.debug(f"Found {len_of_aligned_data} records for aligned "
# f"mapped MANE data for {ac_query}, {g_pos}, {alt_ac}") # noqa: E501

# # Try checking for MANE match
# filter_data = list(filter(lambda x: x[1] == mane_c_ac,
# tx_exon_aln_v_data))
# if filter_data:
# tx_exon_aln_v_data = filter_data[0]
# else:
# # Try checking for older versions of MANE
# filter_data = list(filter(lambda x: x[1].startswith(
# mane_c_ac.split(".")[0]), tx_exon_aln_v_data))
# if filter_data:
# filter_data.sort(key=lambda x: x[1], reverse=True)
# tx_exon_aln_v_data = filter_data[0]
# return MappedManeData(
# gene=gene,
# refseq=current_mane_data["RefSeq_nuc"],
# ensembl=current_mane_data["Ensembl_nuc"],
# strand="-" if tx_exon_aln_v_data[7] == -1 else "+",
# status="_".join(current_mane_data["MANE_status"].split()).lower(),
# alt_ac=alt_ac,
# assembly=assembly.value
# )

# lcr_data = await self.get_longest_compatible_transcript(
# gene, g_pos, g_pos, AnnotationLayer.GENOMIC,
# residue_mode=ResidueMode.INTER_RESIDUE, mane_transcripts=mane_transcripts,
# alt_ac=alt_ac)
# if lcr_data:
# return MappedManeData(
# gene=gene,
# refseq=lcr_data["refseq"],
# ensembl=lcr_data["ensembl"],
# strand=lcr_data["strand"],
# status=lcr_data["status"],
# alt_ac=alt_ac,
# assembly=assembly.value
# )

# return None
50 changes: 0 additions & 50 deletions cool_seq_tool/routers/mane.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,53 +76,3 @@ async def get_mane_data(
warnings=warnings,
service_meta=cool_seq_tool.service_meta()
)


# @router.get(
# "/get_mapped_mane_data",
# summary="Retrieve MANE Transcript mapped to a given assembly",
# response_description=RESP_DESCR,
# description="Return mapped MANE Transcript data to a given assembly",
# response_model=MappedManeDataService,
# tags=[Tags.MANE_TRANSCRIPT]
# )
# async def get_mapped_mane_data(
# gene: str = Query(..., description="HGNC Symbol or Identifier"),
# assembly: Assembly = Query(..., description="Genomic assembly to use"),
# genomic_position: int = Query(..., description="Genomic position associated to the given gene and assembly"), # noqa: E501
# residue_mode: ResidueMode = Query(ResidueMode.INTER_RESIDUE,
# description="Residue mode for `genomic_position`") # noqa: E501
# ) -> MappedManeDataService:
# """Get MANE data for gene, assembly, and position. If GRCh37 assembly is given,
# will return mapped MANE data.

# :param str gene: HGNC symbol or identifier
# :param Assembly assembly: Assembly for the provided genomic position
# :param int genomic_position: Position on the genomic reference sequence to find
# MANE data for
# :param ResidueMode residue_mode: Starting residue mode for `start_pos`
# and `end_pos`. Will always return coordinates in inter-residue
# :return: Mapped MANE or Longest Compatible Remaining data
# """
# warnings: List = list()
# mapped_mane_data = None
# try:
# mapped_mane_data = await cool_seq_tool.mane_transcript.get_mapped_mane_data(
# gene, assembly, genomic_position, residue_mode)
# if not mapped_mane_data:
# warnings.append(f"Unable to find mapped data for gene {gene} at position "
# f"{genomic_position} ({residue_mode} coordinates) on "
# f"assembly {assembly}")
# except MANETranscriptError as e:
# e = str(e)
# logger.exception(e)
# warnings.append(e)
# except Exception as e:
# logger.exception(f"get_mapped_mane_data unhandled exception {e}")
# warnings.append(UNHANDLED_EXCEPTION_MSG)

# return MappedManeDataService(
# mapped_mane_data=mapped_mane_data,
# warnings=warnings,
# service_meta=cool_seq_tool.service_meta()
# )
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ install_requires =
pydantic
uvicorn
fastapi
gene-normalizer >=0.1.40.dev1, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8, != 0.2.9, != 0.2.10
ga4gh.vrs

[options.package_data]
Expand Down
6 changes: 0 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,3 @@ def test_transcript_mappings(test_cool_seq_tool):
def test_mane_transcript_mappings(test_cool_seq_tool):
"""Create MANE Transcript Mappings test fixture"""
return test_cool_seq_tool.mane_transcript_mappings


@pytest.fixture(scope="session")
def test_gene_query_handler(test_cool_seq_tool):
"""Create Gene Query Handler test fixture"""
return test_cool_seq_tool.gene_query_handler
67 changes: 1 addition & 66 deletions tests/mappers/test_mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from mock import patch
import pandas as pd

from cool_seq_tool.mappers.mane_transcript import MANETranscriptError
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
from cool_seq_tool.schemas import AnnotationLayer


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -565,70 +564,6 @@ async def test_g_to_mane_c(test_mane_transcript, egfr_l858r_mane_c,
}


@pytest.mark.skipif(True, reason="chromosome locations not supported in 2.0-alpha")
@pytest.mark.asyncio
async def test_get_mapped_mane_data(test_mane_transcript):
"""Test that get_mapped_mane_data works correctly"""
resp = await test_mane_transcript.get_mapped_mane_data(
"braf", Assembly.GRCH38, 140785808, ResidueMode.INTER_RESIDUE)
assert resp.model_dump() == {
"gene": "BRAF",
"refseq": "NM_001374258.1",
"ensembl": "ENST00000644969.2",
"strand": "-",
"status": "mane_plus_clinical",
"alt_ac": "NC_000007.14",
"assembly": "GRCh38"
}

resp = await test_mane_transcript.get_mapped_mane_data(
"Braf", Assembly.GRCH37, 140485608, ResidueMode.INTER_RESIDUE)
assert resp.model_dump() == {
"gene": "BRAF",
"refseq": "NM_001374258.1",
"ensembl": "ENST00000644969.2",
"strand": "-",
"status": "mane_plus_clinical",
"alt_ac": "NC_000007.13",
"assembly": "GRCh37"
}

resp = await test_mane_transcript.get_mapped_mane_data(
"BRAF", Assembly.GRCH38, 140783157, ResidueMode.INTER_RESIDUE)
assert resp.model_dump() == {
"gene": "BRAF",
"refseq": "NM_004333.6",
"ensembl": "ENST00000646891.2",
"strand": "-",
"status": "mane_select",
"alt_ac": "NC_000007.14",
"assembly": "GRCh38"
}

resp = await test_mane_transcript.get_mapped_mane_data(
"BRAF", Assembly.GRCH37, 140482958, ResidueMode.RESIDUE)
assert resp.model_dump() == {
"gene": "BRAF",
"refseq": "NM_004333.6",
"ensembl": "ENST00000646891.2",
"strand": "-",
"status": "mane_select",
"alt_ac": "NC_000007.13",
"assembly": "GRCh37"
}

# Invalid coord given assembly, so no result should be found
resp = await test_mane_transcript.get_mapped_mane_data(
"BRAF", Assembly.GRCH38, 140482957, ResidueMode.INTER_RESIDUE)
assert resp is None

# Invalid gene
with pytest.raises(MANETranscriptError) as e:
await test_mane_transcript.get_mapped_mane_data(
"dummy", Assembly.GRCH37, 140482958, ResidueMode.RESIDUE)
assert str(e.value) == "Unable to get HGNC data for gene: dummy"


@pytest.mark.asyncio
async def test_valid(test_mane_transcript):
"""Test that valid queries do not raise any exceptions"""
Expand Down

0 comments on commit 036fdc1

Please sign in to comment.