Skip to content

Commit

Permalink
Merge branch 'main' into issue-151
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Oct 16, 2023
2 parents 78c4fbb + cf06d99 commit c3c430d
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 71 deletions.
56 changes: 41 additions & 15 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,50 @@
name: Upload Python Package
# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
name: Publish Python distribution to PyPI

on:
release:
types: [created]

jobs:
deploy:
build:
name: Build distribution
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI }}
run: |
python3 setup.py sdist bdist_wheel
twine upload dist/*
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install pypa/build
run: >-
python3 -m
pip install
build
--user
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v3
with:
name: python-package-distributions
path: dist/
publish-to-pypi:
name: >-
Publish Python distribution to PyPI
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/cool-seq-tool
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- name: Download all the dists
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
- name: Publish distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
26 changes: 16 additions & 10 deletions cool_seq_tool/mappers/mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
AnnotationLayer,
Assembly,
ResidueMode,
TranscriptPriorityLabel,
TranscriptPriority,
)
from cool_seq_tool.sources import (
MANETranscriptMappings,
Expand Down Expand Up @@ -203,7 +203,7 @@ def _get_c_data(
cds_start_end: Tuple[int, int],
c_pos_change: Tuple[int, int],
strand: str,
status: TranscriptPriorityLabel,
status: TranscriptPriority,
refseq_c_ac: str,
ensembl_c_ac: Optional[str] = None,
alt_ac: Optional[str] = None,
Expand All @@ -216,7 +216,7 @@ def _get_c_data(
:param Tuple[int, int] c_pos_change: Start and end positions
for change on c. coordinate
:param str strand: Strand
:param TranscriptPriorityLabel status: Status of transcript
:param TranscriptPriority status: Status of transcript
:param str refseq_c_ac: Refseq transcript
:param Optional[str] ensembl_c_ac: Ensembl transcript
:param Optional[str] alt_ac: Genomic accession
Expand Down Expand Up @@ -263,14 +263,16 @@ def _get_mane_p(mane_data: Dict, mane_c_pos_range: Tuple[int, int]) -> Dict:
math.floor(mane_c_pos_range[1] / 3),
),
strand=mane_data["chr_strand"],
status="_".join(mane_data["MANE_status"].split()).lower(),
status=TranscriptPriority(
"_".join(mane_data["MANE_status"].split()).lower()
),
)

async def _g_to_c(
self,
g: Dict,
refseq_c_ac: str,
status: TranscriptPriorityLabel,
status: TranscriptPriority,
ensembl_c_ac: Optional[str] = None,
alt_ac: Optional[str] = None,
found_result: bool = False,
Expand All @@ -279,7 +281,7 @@ async def _g_to_c(
:param Dict g: Genomic data
:param str refseq_c_ac: Refseq transcript accession
:param TranscriptPriorityLabel status: Status of transcript
:param TranscriptPriority status: Status of transcript
:param Optional[str] ensembl_c_ac: Ensembl transcript accession
:param Optional[str] alt_ac: Genomic accession
:param bool found_result: `True` if found result, so do not need to query
Expand Down Expand Up @@ -605,7 +607,7 @@ async def get_longest_compatible_transcript(
lcr_c_data = await self._g_to_c(
g=g,
refseq_c_ac=tx_ac,
status=TranscriptPriorityLabel.LongestCompatibleRemaining.value,
status=TranscriptPriority.LONGEST_COMPATIBLE_REMAINING,
found_result=found_tx_exon_aln_v_result,
)

Expand Down Expand Up @@ -768,7 +770,9 @@ async def get_mane_transcript(
mane = await self._g_to_c(
g=g,
refseq_c_ac=current_mane_data["RefSeq_nuc"],
status="_".join(current_mane_data["MANE_status"].split()).lower(),
status=TranscriptPriority(
"_".join(current_mane_data["MANE_status"].split()).lower()
),
ensembl_c_ac=current_mane_data["Ensembl_nuc"],
)
if not mane:
Expand Down Expand Up @@ -957,7 +961,7 @@ async def g_to_mane_c(
coding_end_site=None,
pos=grch38["pos"],
strand=None,
status="GRCh38",
status=TranscriptPriority.GRCH38,
alt_ac=grch38["ac"],
)

Expand Down Expand Up @@ -1015,7 +1019,9 @@ async def g_to_mane_c(
cds_start_end=(coding_start_site, coding_end_site),
c_pos_change=mane_c_pos_change,
strand=current_mane_data["chr_strand"],
status="_".join(current_mane_data["MANE_status"].split()).lower(),
status=TranscriptPriority(
"_".join(current_mane_data["MANE_status"].split()).lower()
),
refseq_c_ac=current_mane_data["RefSeq_nuc"],
ensembl_c_ac=current_mane_data["Ensembl_nuc"],
alt_ac=grch38["ac"] if grch38 else None,
Expand Down
21 changes: 11 additions & 10 deletions cool_seq_tool/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ class Assembly(str, Enum):
GRCH38 = "GRCh38"


class TranscriptPriorityLabel(str, Enum):
class TranscriptPriority(str, Enum):
"""Create Enum for Transcript Priority labels"""

MANESelect = "mane_select"
MANEPlusClinical = "mane_plus_clinical"
LongestCompatibleRemaining = "longest_compatible_remaining"
MANE_SELECT = "mane_select"
MANE_PLUS_CLINICAL = "mane_plus_clinical"
LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
GRCH38 = "grch38"


class ResidueMode(str, Enum):
Expand Down Expand Up @@ -308,7 +309,7 @@ class MappedManeData(BaseModel):
refseq: StrictStr
ensembl: Optional[StrictStr] = None
strand: Strand
status: TranscriptPriorityLabel
status: TranscriptPriority
alt_ac: StrictStr
assembly: Assembly

Expand All @@ -319,7 +320,7 @@ class MappedManeData(BaseModel):
"refseq": "NM_001374258.1",
"ensembl": "ENST00000644969.2",
"strand": "-",
"status": "mane_plus_clinical",
"status": TranscriptPriority.MANE_PLUS_CLINICAL,
"alt_ac": "NC_000007.13",
"assembly": "GRCh37",
}
Expand All @@ -342,7 +343,7 @@ class MappedManeDataService(BaseModelForbidExtra):
"refseq": "NM_001374258.1",
"ensembl": "ENST00000644969.2",
"strand": "-",
"status": "mane_plus_clinical",
"status": TranscriptPriority.MANE_PLUS_CLINICAL,
"alt_ac": "NC_000007.13",
"assembly": "GRCh37",
},
Expand All @@ -366,7 +367,7 @@ class ManeData(BaseModel):
ensembl: Optional[StrictStr] = None
pos: Tuple[int, int]
strand: Strand
status: TranscriptPriorityLabel
status: TranscriptPriority

model_config = ConfigDict(
json_schema_extra={
Expand All @@ -376,7 +377,7 @@ class ManeData(BaseModel):
"ensembl": "ENSP00000493543.1",
"pos": (598, 598),
"strand": "-",
"status": "mane_select",
"status": TranscriptPriority.MANE_SELECT,
}
}
)
Expand All @@ -398,7 +399,7 @@ class ManeDataService(BaseModelForbidExtra):
"ensembl": "ENSP00000493543.1",
"pos": (598, 598),
"strand": "-",
"status": "mane_select",
"status": TranscriptPriority.MANE_SELECT,
},
"warnings": [],
"service_meta": {
Expand Down
38 changes: 22 additions & 16 deletions cool_seq_tool/sources/uta_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,42 +865,48 @@ async def get_gene_from_ac(

async def get_transcripts_from_gene(
self,
start_pos: int,
end_pos: int,
start_pos: Optional[int] = None,
end_pos: Optional[int] = None,
gene: Optional[str] = None,
use_tx_pos: bool = True,
alt_ac: Optional[str] = None,
) -> pl.DataFrame:
"""Get transcripts associated to a gene.
:param start_pos: Start position change
If not provided and `end_pos` not provided, all transcripts associated with
the gene and/or accession will be returned
:param end_pos: End position change
If not provided and `start_pos` not provided, all transcripts associated
with the gene and/or accession will be returned
:param gene: HGNC gene symbol
:param use_tx_pos: `True` if querying on transcript position. This means
`start_pos` and `end_pos` are c. coordinate positions. `False` if querying
on genomic position. This means `start_pos` and `end_pos` are g. coordinate
positions
:param alt_ac: Genomic accession
:param alt_ac: Genomic accession. If not provided, must provide `gene`
:return: Data Frame containing transcripts associated with a gene.
Transcripts are ordered by most recent NC accession, then by
descending transcript length.
descending transcript length
"""
schema = ["pro_ac", "tx_ac", "alt_ac", "cds_start_i"]
if not gene and not alt_ac:
return pl.DataFrame([], schema=schema)

if use_tx_pos:
pos_cond = f"""
AND {start_pos} + T.cds_start_i
BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
AND {end_pos} + T.cds_start_i
BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
"""
else:
pos_cond = f"""
AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
"""
pos_cond = ""
if start_pos is not None and end_pos is not None:
if use_tx_pos:
pos_cond = f"""
AND {start_pos} + T.cds_start_i
BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
AND {end_pos} + T.cds_start_i
BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
"""
else:
pos_cond = f"""
AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
"""

order_by_cond = """
ORDER BY SUBSTR(ALIGN.alt_ac, 0, position('.' in ALIGN.alt_ac)),
Expand Down
Loading

0 comments on commit c3c430d

Please sign in to comment.