GenomicMedLab · korikuzma · Oct 13, 2023 · Aug 24, 2023 · Sep 7, 2023 · Sep 7, 2023
diff --git a/.flake8 b/.flake8
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -0,0 +1,32 @@
+name: checks
+on: [push, pull_request]
+jobs:
+  deps:
+    name: deps py${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python3 -m pip install pipenv
+          pipenv install --dev
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: black
+        uses: psf/black@stable
+
+      - name: ruff
+        uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
diff --git a/.gitignore b/.gitignore
@@ -129,7 +129,6 @@ dmypy.json
 .pyre/
 
 Pipfile.lock
-pyproject.toml
 
 # Data files
 cool_seq_tool/data/seqrepo/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,21 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v1.4.0
-    hooks:
-    - id: flake8
-      additional_dependencies: [flake8-docstrings, flake8-quotes, flake8-annotations, flake8-import-order]
-    - id: check-added-large-files
-    - id: detect-private-key
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v1.4.0
+  hooks:
+  - id: check-added-large-files
+  - id: detect-private-key
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+- repo: https://github.com/psf/black
+  rev: 23.9.1
+  hooks:
+    - id: black
+      language_version: python3.11
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.0.291
+  hooks:
+    - id: ruff
+      args: [ --fix, --exit-non-zero-on-fix ]
diff --git a/Pipfile b/Pipfile
@@ -8,7 +8,7 @@ aiofiles = "*"
 asyncpg = "*"
 boto3 = "*"
 pyliftover = "*"
-pandas = "*"
+polars = "*"
 hgvs = "*"
 "biocommons.seqrepo" = "*"
 pydantic = "*"
@@ -20,14 +20,11 @@ uvicorn = "*"
 cool_seq_tool = {editable = true, path = "."}
 pytest = "*"
 pre-commit = "*"
-flake8 = "*"
-flake8-docstrings = "*"
-flake8-annotations = "*"
-flake8-quotes = "*"
-flake8-import-order = "*"
 pytest-cov = "*"
 pytest-asyncio = "==0.18.3"
 ipython = "*"
 ipykernel = "*"
 psycopg2-binary = "*"
 mock = "*"
+ruff = "*"
+black = "*"
diff --git a/README.md b/README.md
@@ -128,7 +128,8 @@ uvicorn cool_seq_tool.api:app --reload
 Next, view the FastAPI on your local machine: http://127.0.0.1:8000/cool_seq_tool
 
 ## Init coding style tests
-Code style is managed by [flake8](https://github.com/PyCQA/flake8) and checked prior to commit.
+
+Code style is managed by [Ruff](https://github.com/astral-sh/ruff) and [Black](https://github.com/psf/black), and should be checked prior to commit.
 
 We use [pre-commit](https://pre-commit.com/#usage) to run conformance tests.
 

diff --git a/cool_seq_tool/__init__.py b/cool_seq_tool/__init__.py
@@ -1,12 +1,12 @@
 """The cool_seq_tool package"""
-from pathlib import Path
 import logging
+from pathlib import Path
 
 APP_ROOT = Path(__file__).resolve().parents[0]
 
 logging.basicConfig(
     filename="cool_seq_tool.log",
-    format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s"
+    format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
 )
 logger = logging.getLogger("cool_seq_tool")
 logger.setLevel(logging.DEBUG)

diff --git a/cool_seq_tool/api.py b/cool_seq_tool/api.py
@@ -4,15 +4,13 @@
 from fastapi import FastAPI
 from fastapi.openapi.utils import get_openapi
 
-
-from cool_seq_tool.routers import default, mane, mappings, SERVICE_NAME
+from cool_seq_tool.routers import SERVICE_NAME, default, mane, mappings
 from cool_seq_tool.version import __version__
 
-
 app = FastAPI(
     docs_url=f"/{SERVICE_NAME}",
     openapi_url=f"/{SERVICE_NAME}/openapi.json",
-    swagger_ui_parameters={"tryItOutEnabled": True}
+    swagger_ui_parameters={"tryItOutEnabled": True},
 )
 
 
@@ -29,13 +27,13 @@ def custom_openapi() -> Dict:
         title="The GenomicMedLab Cool Seq Tool",
         version=__version__,
         description="Common Operations On Lots-of Sequences Tool.",
-        routes=app.routes
+        routes=app.routes,
     )
 
     openapi_schema["info"]["contact"] = {
         "name": "Alex H. Wagner",
         "email": "Alex.Wagner@nationwidechildrens.org",
-        "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab"  # noqa: E501
+        "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",  # noqa: E501
     }
     app.openapi_schema = openapi_schema
     return app.openapi_schema

diff --git a/cool_seq_tool/app.py b/cool_seq_tool/app.py
@@ -1,20 +1,25 @@
 """Module for initializing data sources."""
-from typing import Optional
-from pathlib import Path
 import logging
+from pathlib import Path
+from typing import Optional
 
 from biocommons.seqrepo import SeqRepo
 
+from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
 from cool_seq_tool.mappers import (
-    MANETranscript, AlignmentMapper, ExonGenomicCoordsMapper
+    AlignmentMapper,
+    ExonGenomicCoordsMapper,
+    MANETranscript,
+)
+from cool_seq_tool.paths import (
+    LRG_REFSEQGENE_PATH,
+    MANE_SUMMARY_PATH,
+    SEQREPO_ROOT_DIR,
+    TRANSCRIPT_MAPPINGS_PATH,
 )
-from cool_seq_tool.sources.uta_database import UTA_DB_URL, UTADatabase
 from cool_seq_tool.sources.mane_transcript_mappings import MANETranscriptMappings
 from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
-from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
-from cool_seq_tool.paths import LRG_REFSEQGENE_PATH, MANE_SUMMARY_PATH, \
-    SEQREPO_ROOT_DIR, TRANSCRIPT_MAPPINGS_PATH
-
+from cool_seq_tool.sources.uta_database import UTA_DB_URL, UTADatabase
 
 logger = logging.getLogger(__name__)
 
@@ -28,7 +33,7 @@ def __init__(
         lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
         mane_data_path: Path = MANE_SUMMARY_PATH,
         db_url: str = UTA_DB_URL,
-        sr: Optional[SeqRepo] = None
+        sr: Optional[SeqRepo] = None,
     ) -> None:
         """Initialize CoolSeqTool class
 
@@ -44,14 +49,21 @@ def __init__(
         self.seqrepo_access = SeqRepoAccess(sr)
         self.transcript_mappings = TranscriptMappings(
             transcript_file_path=transcript_file_path,
-            lrg_refseqgene_path=lrg_refseqgene_path)
+            lrg_refseqgene_path=lrg_refseqgene_path,
+        )
         self.mane_transcript_mappings = MANETranscriptMappings(
-            mane_data_path=mane_data_path)
+            mane_data_path=mane_data_path
+        )
         self.uta_db = UTADatabase(db_url=db_url)
         self.alignment_mapper = AlignmentMapper(
-            self.seqrepo_access, self.transcript_mappings, self.uta_db)
+            self.seqrepo_access, self.transcript_mappings, self.uta_db
+        )
         self.mane_transcript = MANETranscript(
-            self.seqrepo_access, self.transcript_mappings,
-            self.mane_transcript_mappings, self.uta_db)
-        self.ex_g_coords_mapper = ExonGenomicCoordsMapper(self.uta_db,
-                                                          self.mane_transcript)
+            self.seqrepo_access,
+            self.transcript_mappings,
+            self.mane_transcript_mappings,
+            self.uta_db,
+        )
+        self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
+            self.uta_db, self.mane_transcript
+        )
diff --git a/cool_seq_tool/data/data_downloads.py b/cool_seq_tool/data/data_downloads.py
@@ -1,11 +1,11 @@
 """Module for handling downloadable data files."""
-from ftplib import FTP
+import datetime
+import gzip
 import logging
+import shutil
+from ftplib import FTP
 from os import remove
-import gzip
 from pathlib import Path
-import shutil
-import datetime
 
 from dateutil import parser
 
@@ -33,13 +33,11 @@ def get_mane_summary(self) -> Path:
             ftp.login()
             ftp.cwd("/refseq/MANE/MANE_human/current")
             files = ftp.nlst()
-            mane_summary_file = \
-                [f for f in files if f.endswith(".summary.txt.gz")]
+            mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
             if not mane_summary_file:
                 raise Exception("Unable to download MANE summary data")
             mane_summary_file = mane_summary_file[0]
-            self._mane_summary_path = \
-                self._data_dir / mane_summary_file[:-3]
+            self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
             mane_data_path = self._data_dir / mane_summary_file
             if not self._mane_summary_path.exists():
                 logger.info("Downloading MANE summary file from NCBI.")
@@ -65,8 +63,7 @@ def get_lrg_refseq_gene_data(self) -> Path:
             ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
             timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
             date = str(parser.parse(timestamp)).split()[0]
-            version = datetime.datetime.strptime(date,
-                                                 "%Y-%m-%d").strftime("%Y%m%d")
+            version = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
             fn_versioned = f"{lrg_refseqgene_file}_{version}"
             lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
             self._lrg_refseqgene_path = self._data_dir / fn_versioned