Merge branch 'main' into issue-151

GenomicMedLab · Oct 16, 2023 · c3c430d · c3c430d
2 parents 78c4fbb + cf06d99
commit c3c430d
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 71 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -1,24 +1,50 @@
-name: Upload Python Package
+# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
+name: Publish Python distribution to PyPI
 
 on:
   release:
     types: [created]
 
 jobs:
-  deploy:
+  build:
+    name: Build distribution
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v4
-    - name: Install dependencies
-      run: |
-        python3 -m pip install --upgrade pip
-        pip install setuptools wheel twine
-    - name: Build and publish
-      env:
-        TWINE_USERNAME: __token__
-        TWINE_PASSWORD: ${{ secrets.PYPI }}
-      run: |
-        python3 setup.py sdist bdist_wheel
-        twine upload dist/*
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+      - name: Install pypa/build
+        run: >-
+          python3 -m
+          pip install
+          build
+          --user
+      - name: Build a binary wheel and a source tarball
+        run: python3 -m build
+      - name: Store the distribution packages
+        uses: actions/upload-artifact@v3
+        with:
+          name: python-package-distributions
+          path: dist/
+  publish-to-pypi:
+    name: >-
+      Publish Python distribution to PyPI
+    needs:
+      - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/cool-seq-tool
+    permissions:
+      id-token: write # IMPORTANT: mandatory for trusted publishing
+    steps:
+      - name: Download all the dists
+        uses: actions/download-artifact@v3
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Publish distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/cool_seq_tool/mappers/mane_transcript.py b/cool_seq_tool/mappers/mane_transcript.py
@@ -18,7 +18,7 @@
     AnnotationLayer,
     Assembly,
     ResidueMode,
-    TranscriptPriorityLabel,
+    TranscriptPriority,
 )
 from cool_seq_tool.sources import (
     MANETranscriptMappings,
@@ -203,7 +203,7 @@ def _get_c_data(
         cds_start_end: Tuple[int, int],
         c_pos_change: Tuple[int, int],
         strand: str,
-        status: TranscriptPriorityLabel,
+        status: TranscriptPriority,
         refseq_c_ac: str,
         ensembl_c_ac: Optional[str] = None,
         alt_ac: Optional[str] = None,
@@ -216,7 +216,7 @@ def _get_c_data(
         :param Tuple[int, int] c_pos_change: Start and end positions
             for change on c. coordinate
         :param str strand: Strand
-        :param TranscriptPriorityLabel status: Status of transcript
+        :param TranscriptPriority status: Status of transcript
         :param str refseq_c_ac: Refseq transcript
         :param Optional[str] ensembl_c_ac: Ensembl transcript
         :param Optional[str] alt_ac: Genomic accession
@@ -263,14 +263,16 @@ def _get_mane_p(mane_data: Dict, mane_c_pos_range: Tuple[int, int]) -> Dict:
                 math.floor(mane_c_pos_range[1] / 3),
             ),
             strand=mane_data["chr_strand"],
-            status="_".join(mane_data["MANE_status"].split()).lower(),
+            status=TranscriptPriority(
+                "_".join(mane_data["MANE_status"].split()).lower()
+            ),
         )
 
     async def _g_to_c(
         self,
         g: Dict,
         refseq_c_ac: str,
-        status: TranscriptPriorityLabel,
+        status: TranscriptPriority,
         ensembl_c_ac: Optional[str] = None,
         alt_ac: Optional[str] = None,
         found_result: bool = False,
@@ -279,7 +281,7 @@ async def _g_to_c(
 
         :param Dict g: Genomic data
         :param str refseq_c_ac: Refseq transcript accession
-        :param TranscriptPriorityLabel status: Status of transcript
+        :param TranscriptPriority status: Status of transcript
         :param Optional[str] ensembl_c_ac: Ensembl transcript accession
         :param Optional[str] alt_ac: Genomic accession
         :param bool found_result: `True` if found result, so do not need to query
@@ -605,7 +607,7 @@ async def get_longest_compatible_transcript(
             lcr_c_data = await self._g_to_c(
                 g=g,
                 refseq_c_ac=tx_ac,
-                status=TranscriptPriorityLabel.LongestCompatibleRemaining.value,
+                status=TranscriptPriority.LONGEST_COMPATIBLE_REMAINING,
                 found_result=found_tx_exon_aln_v_result,
             )
 
@@ -768,7 +770,9 @@ async def get_mane_transcript(
                 mane = await self._g_to_c(
                     g=g,
                     refseq_c_ac=current_mane_data["RefSeq_nuc"],
-                    status="_".join(current_mane_data["MANE_status"].split()).lower(),
+                    status=TranscriptPriority(
+                        "_".join(current_mane_data["MANE_status"].split()).lower()
+                    ),
                     ensembl_c_ac=current_mane_data["Ensembl_nuc"],
                 )
                 if not mane:
@@ -957,7 +961,7 @@ async def g_to_mane_c(
                 coding_end_site=None,
                 pos=grch38["pos"],
                 strand=None,
-                status="GRCh38",
+                status=TranscriptPriority.GRCH38,
                 alt_ac=grch38["ac"],
             )
 
@@ -1015,7 +1019,9 @@ async def g_to_mane_c(
                 cds_start_end=(coding_start_site, coding_end_site),
                 c_pos_change=mane_c_pos_change,
                 strand=current_mane_data["chr_strand"],
-                status="_".join(current_mane_data["MANE_status"].split()).lower(),
+                status=TranscriptPriority(
+                    "_".join(current_mane_data["MANE_status"].split()).lower()
+                ),
                 refseq_c_ac=current_mane_data["RefSeq_nuc"],
                 ensembl_c_ac=current_mane_data["Ensembl_nuc"],
                 alt_ac=grch38["ac"] if grch38 else None,

diff --git a/cool_seq_tool/schemas.py b/cool_seq_tool/schemas.py
@@ -38,12 +38,13 @@ class Assembly(str, Enum):
     GRCH38 = "GRCh38"
 
 
-class TranscriptPriorityLabel(str, Enum):
+class TranscriptPriority(str, Enum):
     """Create Enum for Transcript Priority labels"""
 
-    MANESelect = "mane_select"
-    MANEPlusClinical = "mane_plus_clinical"
-    LongestCompatibleRemaining = "longest_compatible_remaining"
+    MANE_SELECT = "mane_select"
+    MANE_PLUS_CLINICAL = "mane_plus_clinical"
+    LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
+    GRCH38 = "grch38"
 
 
 class ResidueMode(str, Enum):
@@ -308,7 +309,7 @@ class MappedManeData(BaseModel):
     refseq: StrictStr
     ensembl: Optional[StrictStr] = None
     strand: Strand
-    status: TranscriptPriorityLabel
+    status: TranscriptPriority
     alt_ac: StrictStr
     assembly: Assembly
 
@@ -319,7 +320,7 @@ class MappedManeData(BaseModel):
                 "refseq": "NM_001374258.1",
                 "ensembl": "ENST00000644969.2",
                 "strand": "-",
-                "status": "mane_plus_clinical",
+                "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                 "alt_ac": "NC_000007.13",
                 "assembly": "GRCh37",
             }
@@ -342,7 +343,7 @@ class MappedManeDataService(BaseModelForbidExtra):
                     "refseq": "NM_001374258.1",
                     "ensembl": "ENST00000644969.2",
                     "strand": "-",
-                    "status": "mane_plus_clinical",
+                    "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                     "alt_ac": "NC_000007.13",
                     "assembly": "GRCh37",
                 },
@@ -366,7 +367,7 @@ class ManeData(BaseModel):
     ensembl: Optional[StrictStr] = None
     pos: Tuple[int, int]
     strand: Strand
-    status: TranscriptPriorityLabel
+    status: TranscriptPriority
 
     model_config = ConfigDict(
         json_schema_extra={
@@ -376,7 +377,7 @@ class ManeData(BaseModel):
                 "ensembl": "ENSP00000493543.1",
                 "pos": (598, 598),
                 "strand": "-",
-                "status": "mane_select",
+                "status": TranscriptPriority.MANE_SELECT,
             }
         }
     )
@@ -398,7 +399,7 @@ class ManeDataService(BaseModelForbidExtra):
                     "ensembl": "ENSP00000493543.1",
                     "pos": (598, 598),
                     "strand": "-",
-                    "status": "mane_select",
+                    "status": TranscriptPriority.MANE_SELECT,
                 },
                 "warnings": [],
                 "service_meta": {

diff --git a/cool_seq_tool/sources/uta_database.py b/cool_seq_tool/sources/uta_database.py
@@ -865,42 +865,48 @@ async def get_gene_from_ac(
 
     async def get_transcripts_from_gene(
         self,
-        start_pos: int,
-        end_pos: int,
+        start_pos: Optional[int] = None,
+        end_pos: Optional[int] = None,
         gene: Optional[str] = None,
         use_tx_pos: bool = True,
         alt_ac: Optional[str] = None,
     ) -> pl.DataFrame:
         """Get transcripts associated to a gene.
 
         :param start_pos: Start position change
+            If not provided and `end_pos` not provided, all transcripts associated with
+            the gene and/or accession will be returned
         :param end_pos: End position change
+            If not provided and `start_pos` not provided, all transcripts associated
+            with the gene and/or accession will be returned
         :param gene: HGNC gene symbol
         :param use_tx_pos: `True` if querying on transcript position. This means
             `start_pos` and `end_pos` are c. coordinate positions. `False` if querying
             on genomic position. This means `start_pos` and `end_pos` are g. coordinate
             positions
-        :param alt_ac: Genomic accession
+        :param alt_ac: Genomic accession. If not provided, must provide `gene`
         :return: Data Frame containing transcripts associated with a gene.
             Transcripts are ordered by most recent NC accession, then by
-            descending transcript length.
+            descending transcript length
         """
         schema = ["pro_ac", "tx_ac", "alt_ac", "cds_start_i"]
         if not gene and not alt_ac:
             return pl.DataFrame([], schema=schema)
 
-        if use_tx_pos:
-            pos_cond = f"""
-                AND {start_pos} + T.cds_start_i
-                    BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
-                AND {end_pos} + T.cds_start_i
-                    BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
-                """
-        else:
-            pos_cond = f"""
-                AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
-                AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
-                """
+        pos_cond = ""
+        if start_pos is not None and end_pos is not None:
+            if use_tx_pos:
+                pos_cond = f"""
+                    AND {start_pos} + T.cds_start_i
+                        BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
+                    AND {end_pos} + T.cds_start_i
+                        BETWEEN ALIGN.tx_start_i AND ALIGN.tx_end_i
+                    """
+            else:
+                pos_cond = f"""
+                    AND {start_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
+                    AND {end_pos} BETWEEN ALIGN.alt_start_i AND ALIGN.alt_end_i
+                    """
 
         order_by_cond = """
         ORDER BY SUBSTR(ALIGN.alt_ac, 0, position('.' in ALIGN.alt_ac)),