diff --git a/cool_seq_tool/app.py b/cool_seq_tool/app.py index 7d5fb3f4..d0aa4f1e 100644 --- a/cool_seq_tool/app.py +++ b/cool_seq_tool/app.py @@ -496,13 +496,13 @@ async def _set_genomic_data(self, params: Dict, strand: int, f"{params['chr']}" chromosome_number, assembly = descr - liftover_data = self.uta_db.get_liftover( + liftover_pos = self.uta_db.get_liftover( chromosome_number, params["pos"], Assembly.GRCH38) - if liftover_data is None: + if liftover_pos is None: return f"Position {params['pos']} does not exist on " \ f"chromosome {chromosome_number}" - params["pos"] = liftover_data[1] + params["pos"] = liftover_pos params["chr"] = grch38_ac tx_exons = await self._structure_exons(params["transcript"], alt_ac=grch38_ac) diff --git a/cool_seq_tool/data_sources/mane_transcript.py b/cool_seq_tool/data_sources/mane_transcript.py index b4d7f292..3bb39ae8 100644 --- a/cool_seq_tool/data_sources/mane_transcript.py +++ b/cool_seq_tool/data_sources/mane_transcript.py @@ -723,7 +723,7 @@ async def g_to_grch38(self, ac: str, start_pos: int, if liftover_start_i is None: return None else: - start_pos = liftover_start_i[1] + start_pos = liftover_start_i if not is_same_pos: liftover_end_i = self.uta_db.get_liftover(chromosome, end_pos, @@ -731,7 +731,7 @@ async def g_to_grch38(self, ac: str, start_pos: int, if liftover_end_i is None: return None else: - end_pos = liftover_end_i[1] + end_pos = liftover_end_i else: end_pos = start_pos diff --git a/cool_seq_tool/data_sources/uta_database.py b/cool_seq_tool/data_sources/uta_database.py index 7d2396ed..fdb52465 100644 --- a/cool_seq_tool/data_sources/uta_database.py +++ b/cool_seq_tool/data_sources/uta_database.py @@ -975,20 +975,18 @@ async def liftover_to_38(self, genomic_tx_data: Dict) -> None: nc_acs = await self.execute_query(query) genomic_tx_data["alt_ac"] = nc_acs[0][0] - def get_liftover(self, chromosome: str, pos: int, - liftover_to_assembly: Assembly) -> Optional[Tuple]: - """Get new genome assembly data for a position on a chromosome. - - :param str chromosome: The chromosome number. Must be prefixed with `chr` - :param int pos: Position on the chromosome - :param Assembly liftover_to_assembly: Assembly to liftover to - :return: [Target chromosome, target position, target strand, - conversion_chain_score] for assembly + def get_liftover( + self, chromosome: str, pos: int, liftover_to_assembly: Assembly + ) -> Optional[int]: + """Get new genome assembly position for a given position on a chromosome. Does + not validate if position exists on lifted over accession, this must be done + separately. + + :param chromosome: The chromosome number. Must be prefixed with `chr` + :param pos: Position on the chromosome + :param liftover_to_assembly: Assembly to liftover to + :return: Target position for assembly """ - if not chromosome.startswith("chr"): - logger.warning("`chromosome` must be prefixed with chr") - return None - if liftover_to_assembly == Assembly.GRCH38: liftover = self.liftover_37_to_38.convert_coordinate(chromosome, pos) elif liftover_to_assembly == Assembly.GRCH37: @@ -997,11 +995,24 @@ def get_liftover(self, chromosome: str, pos: int, logger.warning(f"{liftover_to_assembly} assembly not supported") liftover = None - if liftover is None or len(liftover) == 0: - logger.warning(f"{pos} does not exist on {chromosome}") - return None + if liftover is None: + # If chromosome is completely unknown to the LiftOver, None is returned + # from pyliftover + logger.warning(f"Unknown chromosome: {chromosome}") else: - return liftover[0] + len_liftover = len(liftover) + if len_liftover == 1: + liftover = liftover[0][1] + elif len_liftover == 0: + liftover = pos + else: + logger.warning( + f"Multiple liftover results found for {chromosome} {pos}: " + f"{liftover}. Selecting first liftover" + ) + liftover = liftover[0][1] + + return liftover def _set_liftover(self, genomic_tx_data: Dict, key: str, chromosome: str, liftover_to_assembly: Assembly) -> None: @@ -1027,7 +1038,7 @@ def _set_liftover(self, genomic_tx_data: Dict, key: str, chromosome: str, f"{genomic_tx_data[key][1]} on {chromosome}") return None - genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1] + genomic_tx_data[key] = liftover_start_i, liftover_end_i async def p_to_c_ac(self, p_ac: str) -> List[str]: """Return c. accession from p. accession. diff --git a/cool_seq_tool/version.py b/cool_seq_tool/version.py index 850c30ac..44de9d69 100644 --- a/cool_seq_tool/version.py +++ b/cool_seq_tool/version.py @@ -1 +1 @@ -__version__ = "0.1.14-dev0" +__version__ = "0.1.14-dev1" diff --git a/tests/unit/test_uta_database.py b/tests/unit/test_uta_database.py index 42a99a0a..d228c791 100644 --- a/tests/unit/test_uta_database.py +++ b/tests/unit/test_uta_database.py @@ -278,10 +278,11 @@ async def test_liftover_to_38(test_db, genomic_tx_data): def test_get_liftover(test_db): """Test that get_liftover works correctly.""" resp = test_db.get_liftover("chr7", 140453136, "GRCh38") - assert resp == ("chr7", 140753336, "+", 14633688187) + assert resp == 140753336 + # pos out of index, this method does not validate resp = test_db.get_liftover("chr17", 140453136, "GRCh38") - assert resp is None + assert resp == 140453136 # not prefixed w chr resp = test_db.get_liftover("7", 140453136, "GRCh38")