Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor!: get_gene_mane_data sorted by desc MANE_Status #213

Merged
merged 1 commit into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions cool_seq_tool/mappers/mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,6 @@ async def get_mane_transcript(
mane_data = self.mane_transcript_mappings.get_gene_mane_data(g["gene"])
if not mane_data:
return None
mane_data_len = len(mane_data)

# Transcript Priority (Must pass validation checks):
# 1. MANE Select
Expand All @@ -746,9 +745,7 @@ async def get_mane_transcript(
# a. If there is a tie, choose the first-published transcript among
# those transcripts meeting criterion
mane_transcripts = set()
for i in range(mane_data_len):
index = mane_data_len - i - 1
current_mane_data = mane_data[index]
for current_mane_data in mane_data:
mane_transcripts |= set(
(current_mane_data["RefSeq_nuc"], current_mane_data["Ensembl_nuc"])
)
Expand Down Expand Up @@ -957,11 +954,8 @@ async def g_to_mane_c(
mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
if not mane_data:
return None
mane_data_len = len(mane_data)

for i in range(mane_data_len):
index = mane_data_len - i - 1
current_mane_data = mane_data[index]
for current_mane_data in mane_data:
mane_c_ac = current_mane_data["RefSeq_nuc"]

# Liftover to GRCh38
Expand Down
8 changes: 4 additions & 4 deletions cool_seq_tool/sources/mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def _load_mane_transcript_data(self) -> pl.DataFrame:
def get_gene_mane_data(self, gene_symbol: str) -> Optional[List[Dict]]:
"""Return MANE Transcript data for a gene.
:param str gene_symbol: HGNC Gene Symbol
:return: MANE Transcript data (Transcript accessions,
gene, and location information)
:return: List of MANE Transcript data (Transcript accessions,
gene, and location information). Sorted list: MANE Select and then MANE Plus
Clinical
"""
data = self.df.filter(pl.col("symbol") == gene_symbol.upper())

Expand All @@ -40,8 +41,7 @@ def get_gene_mane_data(self, gene_symbol: str) -> Optional[List[Dict]]:
)
return None

# Ordering: MANE Plus Clinical (If it exists), MANE Select
data = data.sort(by="MANE_status", descending=False)
data = data.sort(by="MANE_status", descending=True)
return data.to_dicts()

def get_mane_from_transcripts(self, transcripts: List[str]) -> List[Dict]:
Expand Down
16 changes: 8 additions & 8 deletions tests/sources/test_mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,23 +97,23 @@ def test_get_gene_mane_data(
# MANE Select
actual = test_mane_transcript_mappings.get_gene_mane_data("BRAF")
assert len(actual) == 2
assert actual[0] == braf_plus_clinical
assert actual[1] == braf_select
assert actual[0] == braf_select
assert actual[1] == braf_plus_clinical

actual = test_mane_transcript_mappings.get_gene_mane_data("braf")
assert len(actual) == 2
assert actual[0] == braf_plus_clinical
assert actual[1] == braf_select
assert actual[0] == braf_select
assert actual[1] == braf_plus_clinical

# MANE Select and MANE Plus Clinical
actual = test_mane_transcript_mappings.get_gene_mane_data("ERCC6")
assert len(actual) == 2
assert actual[0] == ercc6_plus_clinical
assert actual[1] == ercc6_select
assert actual[0] == ercc6_select
assert actual[1] == ercc6_plus_clinical

actual = test_mane_transcript_mappings.get_gene_mane_data("ercc6")
assert actual[0] == ercc6_plus_clinical
assert actual[1] == ercc6_select
assert actual[0] == ercc6_select
assert actual[1] == ercc6_plus_clinical

# No Matches
actual = test_mane_transcript_mappings.get_gene_mane_data("BRAFF")
Expand Down