Skip to content

Commit

Permalink
fix: pyliftover.convert_coordinate handling
Browse files Browse the repository at this point in the history
- This method only returns None when the chromosome is unknown
  • Loading branch information
korikuzma committed Aug 17, 2023
1 parent b5201d3 commit f1d4b33
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 26 deletions.
6 changes: 3 additions & 3 deletions cool_seq_tool/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,13 +496,13 @@ async def _set_genomic_data(self, params: Dict, strand: int,
f"{params['chr']}"

chromosome_number, assembly = descr
liftover_data = self.uta_db.get_liftover(
liftover_pos = self.uta_db.get_liftover(
chromosome_number, params["pos"], Assembly.GRCH38)
if liftover_data is None:
if liftover_pos is None:
return f"Position {params['pos']} does not exist on " \
f"chromosome {chromosome_number}"

params["pos"] = liftover_data[1]
params["pos"] = liftover_pos
params["chr"] = grch38_ac

tx_exons = await self._structure_exons(params["transcript"], alt_ac=grch38_ac)
Expand Down
4 changes: 2 additions & 2 deletions cool_seq_tool/data_sources/mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,15 +723,15 @@ async def g_to_grch38(self, ac: str, start_pos: int,
if liftover_start_i is None:
return None
else:
start_pos = liftover_start_i[1]
start_pos = liftover_start_i

if not is_same_pos:
liftover_end_i = self.uta_db.get_liftover(chromosome, end_pos,
Assembly.GRCH38)
if liftover_end_i is None:
return None
else:
end_pos = liftover_end_i[1]
end_pos = liftover_end_i
else:
end_pos = start_pos

Expand Down
47 changes: 29 additions & 18 deletions cool_seq_tool/data_sources/uta_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,20 +975,18 @@ async def liftover_to_38(self, genomic_tx_data: Dict) -> None:
nc_acs = await self.execute_query(query)
genomic_tx_data["alt_ac"] = nc_acs[0][0]

def get_liftover(self, chromosome: str, pos: int,
liftover_to_assembly: Assembly) -> Optional[Tuple]:
"""Get new genome assembly data for a position on a chromosome.
:param str chromosome: The chromosome number. Must be prefixed with `chr`
:param int pos: Position on the chromosome
:param Assembly liftover_to_assembly: Assembly to liftover to
:return: [Target chromosome, target position, target strand,
conversion_chain_score] for assembly
def get_liftover(
self, chromosome: str, pos: int, liftover_to_assembly: Assembly
) -> Optional[int]:
"""Get new genome assembly position for a given position on a chromosome. Does
not validate if position exists on lifted over accession, this must be done
separately.
:param chromosome: The chromosome number. Must be prefixed with `chr`
:param pos: Position on the chromosome
:param liftover_to_assembly: Assembly to liftover to
:return: Target position for assembly
"""
if not chromosome.startswith("chr"):
logger.warning("`chromosome` must be prefixed with chr")
return None

if liftover_to_assembly == Assembly.GRCH38:
liftover = self.liftover_37_to_38.convert_coordinate(chromosome, pos)
elif liftover_to_assembly == Assembly.GRCH37:
Expand All @@ -997,11 +995,24 @@ def get_liftover(self, chromosome: str, pos: int,
logger.warning(f"{liftover_to_assembly} assembly not supported")
liftover = None

if liftover is None or len(liftover) == 0:
logger.warning(f"{pos} does not exist on {chromosome}")
return None
if liftover is None:
# If chromosome is completely unknown to the LiftOver, None is returned
# from pyliftover
logger.warning(f"Unknown chromosome: {chromosome}")
else:
return liftover[0]
len_liftover = len(liftover)
if len_liftover == 1:
liftover = liftover[0][1]
elif len_liftover == 0:
liftover = pos
else:
logger.warning(
f"Multiple liftover results found for {chromosome} {pos}: "
f"{liftover}. Selecting first liftover"
)
liftover = liftover[0][1]

return liftover

def _set_liftover(self, genomic_tx_data: Dict, key: str, chromosome: str,
liftover_to_assembly: Assembly) -> None:
Expand All @@ -1027,7 +1038,7 @@ def _set_liftover(self, genomic_tx_data: Dict, key: str, chromosome: str,
f"{genomic_tx_data[key][1]} on {chromosome}")
return None

genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
genomic_tx_data[key] = liftover_start_i, liftover_end_i

async def p_to_c_ac(self, p_ac: str) -> List[str]:
"""Return c. accession from p. accession.
Expand Down
2 changes: 1 addition & 1 deletion cool_seq_tool/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.14-dev0"
__version__ = "0.1.14-dev1"
5 changes: 3 additions & 2 deletions tests/unit/test_uta_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,11 @@ async def test_liftover_to_38(test_db, genomic_tx_data):
def test_get_liftover(test_db):
"""Test that get_liftover works correctly."""
resp = test_db.get_liftover("chr7", 140453136, "GRCh38")
assert resp == ("chr7", 140753336, "+", 14633688187)
assert resp == 140753336

# pos out of index, this method does not validate
resp = test_db.get_liftover("chr17", 140453136, "GRCh38")
assert resp is None
assert resp == 140453136

# not prefixed w chr
resp = test_db.get_liftover("7", 140453136, "GRCh38")
Expand Down

0 comments on commit f1d4b33

Please sign in to comment.