From bb14ea7e148d8b2ee8fb2a3d20865a4e791212e3 Mon Sep 17 00:00:00 2001 From: Jeremy Arbesfeld Date: Thu, 14 Nov 2024 13:08:31 -0500 Subject: [PATCH] Get index of tx_ac column --- src/cool_seq_tool/mappers/exon_genomic_coords.py | 8 +++++--- src/cool_seq_tool/mappers/mane_transcript.py | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 14ed514..53c9bad 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -966,9 +966,11 @@ async def _select_optimal_transcript( AND alt_ac = '{genomic_ac}' """ # noqa: S608 results = await self.uta_db.execute_query(query) - schema = ["tx_ac", "alt_ac", "hgnc"] - transcripts = [(r["tx_ac"], r["alt_ac"], r["hgnc"]) for r in results] - transcripts = pl.DataFrame(data=transcripts, schema=schema, orient="row") + schema = ["tx_ac"] + transcripts = [(r["tx_ac"]) for r in results] + transcripts = pl.DataFrame( + data=transcripts, schema=schema, orient="row" + ).unique() result = self.mane_transcript.get_prioritized_transcripts_from_gene( transcripts ) diff --git a/src/cool_seq_tool/mappers/mane_transcript.py b/src/cool_seq_tool/mappers/mane_transcript.py index 4f6e3df..c7058f0 100644 --- a/src/cool_seq_tool/mappers/mane_transcript.py +++ b/src/cool_seq_tool/mappers/mane_transcript.py @@ -651,7 +651,8 @@ def get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> list: most recent version of a transcript associated with an assembly will be kept """ copy_df = df.clone() - copy_df = copy_df.drop("alt_ac").unique() + if "alt_ac" in copy_df.columns: + copy_df = copy_df.drop("alt_ac").unique() copy_df = copy_df.with_columns( [ pl.col("tx_ac") @@ -670,9 +671,12 @@ def get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> list: ) copy_df = copy_df.unique(["ac_no_version_as_int"], keep="first") + tx_ac_index = copy_df.columns.index("tx_ac") copy_df = copy_df.with_columns( copy_df.map_rows( - lambda x: len(self.seqrepo_access.get_reference_sequence(x[1])[0]) + lambda x: len( + self.seqrepo_access.get_reference_sequence(x[tx_ac_index])[0] + ) ) .to_series() .alias("len_of_tx")