diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index dfbfecfd..4256cfaa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,24 +1,50 @@ -name: Upload Python Package +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +name: Publish Python distribution to PyPI on: release: types: [created] jobs: - deploy: + build: + name: Build distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - - name: Install dependencies - run: | - python3 -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI }} - run: | - python3 setup.py sdist bdist_wheel - twine upload dist/* + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + publish-to-pypi: + name: >- + Publish Python distribution to PyPI + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/cool-seq-tool + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/cool_seq_tool/mappers/mane_transcript.py b/cool_seq_tool/mappers/mane_transcript.py index 39d4054a..d059e3b3 100644 --- a/cool_seq_tool/mappers/mane_transcript.py +++ b/cool_seq_tool/mappers/mane_transcript.py @@ -18,7 +18,7 @@ AnnotationLayer, Assembly, ResidueMode, - TranscriptPriorityLabel, + TranscriptPriority, ) from cool_seq_tool.sources import ( MANETranscriptMappings, @@ -203,7 +203,7 @@ def _get_c_data( cds_start_end: Tuple[int, int], c_pos_change: Tuple[int, int], strand: str, - status: TranscriptPriorityLabel, + status: TranscriptPriority, refseq_c_ac: str, ensembl_c_ac: Optional[str] = None, alt_ac: Optional[str] = None, @@ -216,7 +216,7 @@ def _get_c_data( :param Tuple[int, int] c_pos_change: Start and end positions for change on c. coordinate :param str strand: Strand - :param TranscriptPriorityLabel status: Status of transcript + :param TranscriptPriority status: Status of transcript :param str refseq_c_ac: Refseq transcript :param Optional[str] ensembl_c_ac: Ensembl transcript :param Optional[str] alt_ac: Genomic accession @@ -263,14 +263,16 @@ def _get_mane_p(mane_data: Dict, mane_c_pos_range: Tuple[int, int]) -> Dict: math.floor(mane_c_pos_range[1] / 3), ), strand=mane_data["chr_strand"], - status="_".join(mane_data["MANE_status"].split()).lower(), + status=TranscriptPriority( + "_".join(mane_data["MANE_status"].split()).lower() + ), ) async def _g_to_c( self, g: Dict, refseq_c_ac: str, - status: TranscriptPriorityLabel, + status: TranscriptPriority, ensembl_c_ac: Optional[str] = None, alt_ac: Optional[str] = None, found_result: bool = False, @@ -279,7 +281,7 @@ async def _g_to_c( :param Dict g: Genomic data :param str refseq_c_ac: Refseq transcript accession - :param TranscriptPriorityLabel status: Status of transcript + :param TranscriptPriority status: Status of transcript :param Optional[str] ensembl_c_ac: Ensembl transcript accession :param Optional[str] alt_ac: Genomic accession :param bool found_result: `True` if found result, so do not need to query @@ -605,7 +607,7 @@ async def get_longest_compatible_transcript( lcr_c_data = await self._g_to_c( g=g, refseq_c_ac=tx_ac, - status=TranscriptPriorityLabel.LongestCompatibleRemaining.value, + status=TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, found_result=found_tx_exon_aln_v_result, ) @@ -768,7 +770,9 @@ async def get_mane_transcript( mane = await self._g_to_c( g=g, refseq_c_ac=current_mane_data["RefSeq_nuc"], - status="_".join(current_mane_data["MANE_status"].split()).lower(), + status=TranscriptPriority( + "_".join(current_mane_data["MANE_status"].split()).lower() + ), ensembl_c_ac=current_mane_data["Ensembl_nuc"], ) if not mane: @@ -957,7 +961,7 @@ async def g_to_mane_c( coding_end_site=None, pos=grch38["pos"], strand=None, - status="GRCh38", + status=TranscriptPriority.GRCH38, alt_ac=grch38["ac"], ) @@ -1015,7 +1019,9 @@ async def g_to_mane_c( cds_start_end=(coding_start_site, coding_end_site), c_pos_change=mane_c_pos_change, strand=current_mane_data["chr_strand"], - status="_".join(current_mane_data["MANE_status"].split()).lower(), + status=TranscriptPriority( + "_".join(current_mane_data["MANE_status"].split()).lower() + ), refseq_c_ac=current_mane_data["RefSeq_nuc"], ensembl_c_ac=current_mane_data["Ensembl_nuc"], alt_ac=grch38["ac"] if grch38 else None, diff --git a/cool_seq_tool/schemas.py b/cool_seq_tool/schemas.py index 69dbf539..c06edb85 100644 --- a/cool_seq_tool/schemas.py +++ b/cool_seq_tool/schemas.py @@ -38,12 +38,13 @@ class Assembly(str, Enum): GRCH38 = "GRCh38" -class TranscriptPriorityLabel(str, Enum): +class TranscriptPriority(str, Enum): """Create Enum for Transcript Priority labels""" - MANESelect = "mane_select" - MANEPlusClinical = "mane_plus_clinical" - LongestCompatibleRemaining = "longest_compatible_remaining" + MANE_SELECT = "mane_select" + MANE_PLUS_CLINICAL = "mane_plus_clinical" + LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining" + GRCH38 = "grch38" class ResidueMode(str, Enum): @@ -308,7 +309,7 @@ class MappedManeData(BaseModel): refseq: StrictStr ensembl: Optional[StrictStr] = None strand: Strand - status: TranscriptPriorityLabel + status: TranscriptPriority alt_ac: StrictStr assembly: Assembly @@ -319,7 +320,7 @@ class MappedManeData(BaseModel): "refseq": "NM_001374258.1", "ensembl": "ENST00000644969.2", "strand": "-", - "status": "mane_plus_clinical", + "status": TranscriptPriority.MANE_PLUS_CLINICAL, "alt_ac": "NC_000007.13", "assembly": "GRCh37", } @@ -342,7 +343,7 @@ class MappedManeDataService(BaseModelForbidExtra): "refseq": "NM_001374258.1", "ensembl": "ENST00000644969.2", "strand": "-", - "status": "mane_plus_clinical", + "status": TranscriptPriority.MANE_PLUS_CLINICAL, "alt_ac": "NC_000007.13", "assembly": "GRCh37", }, @@ -366,7 +367,7 @@ class ManeData(BaseModel): ensembl: Optional[StrictStr] = None pos: Tuple[int, int] strand: Strand - status: TranscriptPriorityLabel + status: TranscriptPriority model_config = ConfigDict( json_schema_extra={ @@ -376,7 +377,7 @@ class ManeData(BaseModel): "ensembl": "ENSP00000493543.1", "pos": (598, 598), "strand": "-", - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, } } ) @@ -398,7 +399,7 @@ class ManeDataService(BaseModelForbidExtra): "ensembl": "ENSP00000493543.1", "pos": (598, 598), "strand": "-", - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, }, "warnings": [], "service_meta": { diff --git a/cool_seq_tool/sources/uta_database.py b/cool_seq_tool/sources/uta_database.py index 4c880377..53011351 100644 --- a/cool_seq_tool/sources/uta_database.py +++ b/cool_seq_tool/sources/uta_database.py @@ -865,8 +865,8 @@ async def get_gene_from_ac( async def get_transcripts_from_gene( self, - start_pos: int, - end_pos: int, + start_pos: Optional[int] = None, + end_pos: Optional[int] = None, gene: Optional[str] = None, use_tx_pos: bool = True, alt_ac: Optional[str] = None, @@ -874,33 +874,39 @@ async def get_transcripts_from_gene( """Get transcripts associated to a gene. :param start_pos: Start position change + If not provided and `end_pos` not provided, all transcripts associated with + the gene and/or accession will be returned :param end_pos: End position change + If not provided and `start_pos` not provided, all transcripts associated + with the gene and/or accession will be returned :param gene: HGNC gene symbol :param use_tx_pos: `True` if querying on transcript position. This means `start_pos` and `end_pos` are c. coordinate positions. `False` if querying on genomic position. This means `start_pos` and `end_pos` are g. coordinate positions - :param alt_ac: Genomic accession + :param alt_ac: Genomic accession. If not provided, must provide `gene` :return: Data Frame containing transcripts associated with a gene. Transcripts are ordered by most recent NC accession, then by - descending transcript length. + descending transcript length """ schema = ["pro_ac", "tx_ac", "alt_ac", "cds_start_i"] if not gene and not alt_ac: return pl.DataFrame([], schema=schema) - if use_tx_pos: - pos_cond = f""" - AND {start_pos} + T.cds_start_i - BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i - AND {end_pos} + T.cds_start_i - BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i - """ - else: - pos_cond = f""" - AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i - AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i - """ + pos_cond = "" + if start_pos is not None and end_pos is not None: + if use_tx_pos: + pos_cond = f""" + AND {start_pos} + T.cds_start_i + BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i + AND {end_pos} + T.cds_start_i + BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i + """ + else: + pos_cond = f""" + AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i + AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i + """ order_by_cond = """ ORDER BY SUBSTR(ALIGN.alt_ac, 0, position('.' in ALIGN.alt_ac)), diff --git a/tests/mappers/test_mane_transcript.py b/tests/mappers/test_mane_transcript.py index 2764ff8c..457e7d04 100644 --- a/tests/mappers/test_mane_transcript.py +++ b/tests/mappers/test_mane_transcript.py @@ -6,7 +6,7 @@ from mock import patch from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess -from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode, TranscriptPriority @pytest.fixture(scope="module") @@ -62,7 +62,7 @@ def braf_v600e_mane_p(): "refseq": "NP_004324.2", "ensembl": "ENSP00000493543.1", "pos": (599, 599), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "-", "gene": "BRAF", } @@ -75,7 +75,7 @@ def egfr_l858r_mane_p(): "refseq": "NP_005219.2", "ensembl": "ENSP00000275493.2", "pos": (857, 857), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "+", "gene": "EGFR", } @@ -89,7 +89,7 @@ def braf_v600e_mane_c(): "refseq": "NM_004333.6", "ensembl": "ENST00000646891.2", "pos": (1798, 1798), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "-", "coding_start_site": 226, "coding_end_site": 2527, @@ -105,7 +105,7 @@ def egfr_l858r_mane_c(): "refseq": "NM_005228.5", "ensembl": "ENST00000275493.7", "pos": (2572, 2572), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "+", "coding_start_site": 261, "coding_end_site": 3894, @@ -124,7 +124,7 @@ def grch38(): "coding_end_site": None, "pos": (55191821, 55191821), "strand": None, - "status": "GRCh38", + "status": TranscriptPriority.GRCH38, "alt_ac": "NC_000007.14", } @@ -140,7 +140,7 @@ def mybpc3_s236g(): "refseq": "NP_000247.2", "ensembl": "ENSP00000442795.1", "pos": (235, 235), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "-", "gene": "MYBPC3", } @@ -360,7 +360,7 @@ async def test_p_to_mane_p(test_mane_transcript, braf_v600e_mane_p, egfr_l858r_m "ensembl": "ENSP00000366997.4", "pos": (62, 62), "strand": "-", - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, } @@ -450,7 +450,7 @@ async def test_c_to_mane_c(test_mane_transcript, braf_v600e_mane_c, egfr_l858r_m "refseq": "NM_004448.4", "ensembl": "ENST00000269571.10", "pos": (2263, 2277), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "+", "coding_start_site": 175, "coding_end_site": 3943, @@ -506,7 +506,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (599, 599), "strand": "-", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } resp = await test_mane_transcript.get_longest_compatible_transcript( 599, @@ -533,7 +533,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (1798, 1798), "strand": "-", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } resp = await test_mane_transcript.get_longest_compatible_transcript( 1799, @@ -567,7 +567,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (1807, 1807), "strand": "-", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } # CA1139661942 has no other RefSeq accessions @@ -595,7 +595,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (2103, 2120), "strand": "+", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } # protein @@ -612,7 +612,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (701, 706), "strand": "+", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } resp = await test_mane_transcript.get_longest_compatible_transcript( @@ -628,7 +628,7 @@ async def test_get_longest_compatible_transcript(test_mane_transcript): "ensembl": None, "pos": (239, 258), "strand": "-", - "status": "longest_compatible_remaining", + "status": TranscriptPriority.LONGEST_COMPATIBLE_REMAINING, } @@ -721,7 +721,7 @@ async def test_g_to_mane_c( "refseq": "NM_004985.5", "ensembl": "ENST00000311936.8", "pos": (34, 34), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "-", "coding_start_site": 190, "coding_end_site": 757, @@ -736,7 +736,7 @@ async def test_g_to_mane_c( "refseq": "NM_005228.5", "ensembl": "ENST00000275493.7", "pos": (2368, 2368), - "status": "mane_select", + "status": TranscriptPriority.MANE_SELECT, "strand": "+", "coding_start_site": 261, "coding_end_site": 3894, diff --git a/tests/sources/test_uta_database.py b/tests/sources/test_uta_database.py index 985496d0..a6819f4e 100644 --- a/tests/sources/test_uta_database.py +++ b/tests/sources/test_uta_database.py @@ -281,11 +281,35 @@ async def test_get_gene_from_ac(test_db): @pytest.mark.asyncio async def test_get_transcripts_from_gene(test_db): - """Test that get_trasncripts_from_gene works correctly.""" - resp = await test_db.get_transcripts_from_gene(2145, 2145, gene="BRAF") + """Test that get_transcripts_from_gene works correctly.""" + resp = await test_db.get_transcripts_from_gene( + start_pos=2145, end_pos=2145, gene="BRAF" + ) + assert len(resp) == 32 + + # using no start/end pos + resp = await test_db.get_transcripts_from_gene(gene="BRAF") assert len(resp) == 32 - resp = await test_db.get_transcripts_from_gene(140453136, 140453136, gene="BRAF") + # using 0 start/end pos + resp = await test_db.get_transcripts_from_gene(gene="BRAF", start_pos=0, end_pos=0) + assert len(resp) == 32 + + # using 0 genomic start/end pos + resp = await test_db.get_transcripts_from_gene( + gene="BRAF", start_pos=0, end_pos=0, use_tx_pos=False + ) + assert len(resp) == 0 + + # using gene with genomic pos + resp = await test_db.get_transcripts_from_gene( + gene="BRAF", start_pos=140753336, end_pos=140753336, use_tx_pos=False + ) + assert len(resp) == 16 + + resp = await test_db.get_transcripts_from_gene( + gene="BRAF", start_pos=140453136, end_pos=140453136 + ) assert len(resp) == 0