style: add additional ruff checks (#260)

GenomicMedLab · Jan 30, 2024 · b6cb68e · b6cb68e
1 parent e0354b7
commit b6cb68e
Show file tree

Hide file tree

Showing 19 changed files with 601 additions and 596 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -34,7 +34,7 @@ jobs:
         run: python3 -m pip install ".[dev]"
 
       - name: Check style
-        run: python3 -m ruff check . && ruff format --check .
+        run: python3 -m ruff check . && python3 -m ruff format --check .
 
   docs:
     runs-on: ubuntu-latest

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.9
+    rev: v0.1.14
     hooks:
       - id: ruff-format
       - id: ruff

diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ dependencies = [
 dynamic = ["version"]
 
 [project.optional-dependencies]
-dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff"]
+dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff>=0.1.14"]
 tests = ["pytest", "pytest-cov", "pytest-asyncio==0.18.3", "mock"]
 docs = [
     "sphinx==6.1.3",
@@ -87,14 +87,49 @@ branch = true
 [tool.ruff]
 src = ["src"]
 exclude = ["docs/source/conf.py"]
-# pycodestyle (E, W)
-# Pyflakes (F)
-# flake8-annotations (ANN)
-# pydocstyle (D)
-# pep8-naming (N)
-# isort (I)
-select = ["E", "W", "F", "ANN", "D", "N", "I"]
-fixable = ["I", "F401"]
+select = [
+    "F",  # https://docs.astral.sh/ruff/rules/#pyflakes-f
+    "E", "W",  # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
+    "I",  # https://docs.astral.sh/ruff/rules/#isort-i
+    "N",  # https://docs.astral.sh/ruff/rules/#pep8-naming-n
+    "D",  # https://docs.astral.sh/ruff/rules/#pydocstyle-d
+    "UP",  # https://docs.astral.sh/ruff/rules/#pyupgrade-up
+    "ANN",  # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann
+    "ASYNC",  # https://docs.astral.sh/ruff/rules/#flake8-async-async
+    "S",  # https://docs.astral.sh/ruff/rules/#flake8-bandit-s
+    "B",  # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
+    "A",  # https://docs.astral.sh/ruff/rules/#flake8-builtins-a
+    "C4",  # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
+    "DTZ",  # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
+    "T10",  # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
+    "EM",  # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
+    "G",  # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
+    "PIE",  # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
+    "T20",  # https://docs.astral.sh/ruff/rules/#flake8-print-t20
+    "PT",  # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
+    "Q",  # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
+    "RSE",  # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
+    "RET",  # https://docs.astral.sh/ruff/rules/#flake8-return-ret
+    "SIM",  # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
+    "PTH",  # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
+    "PGH",  # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
+    "RUF",  # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
+]
+fixable = [
+    "I",
+    "F401",
+    "D",
+    "UP",
+    "ANN",
+    "B",
+    "C4",
+    "G",
+    "PIE",
+    "PT",
+    "RSE",
+    "SIM",
+    "RUF"
+]
 
 # ANN101 - missing-type-self
 # ANN003 - missing-type-kwargs
@@ -110,12 +145,14 @@ fixable = ["I", "F401"]
 # E117 - over-indented*
 # E501 - line-too-long*
 # W191 - tab-indentation*
+# S321 - suspicious-ftp-lib-usage
 # *ignored for compatibility with formatter
 ignore = [
     "ANN101", "ANN003",
     "D203", "D205", "D206", "D213", "D300", "D400", "D415",
     "E111", "E114", "E117", "E501",
-    "W191"
+    "W191",
+    "S321",
 ]
 
 [tool.ruff.per-file-ignores]
@@ -125,6 +162,9 @@ ignore = [
 # N805 - invalid-first-argument-name-for-method
 # F821 - undefined-name
 # F401 - unused-import
-"tests/*" = ["ANN001", "ANN2", "ANN102"]
+"tests/*" = ["ANN001", "ANN2", "ANN102", "S101"]
 "*__init__.py" = ["F401"]
 "src/cool_seq_tool/schemas.py" = ["ANN201", "N805", "ANN001"]
+
+[tool.ruff.lint.flake8-bugbear]
+extend-immutable-calls = ["fastapi.Query"]
diff --git a/src/cool_seq_tool/data/__init__.py b/src/cool_seq_tool/data/__init__.py
@@ -1,2 +1,2 @@
 """Module for data"""
-from .data_downloads import DataDownload  # noqa: F401
+from .data_downloads import DataDownload
diff --git a/src/cool_seq_tool/data/data_downloads.py b/src/cool_seq_tool/data/data_downloads.py
@@ -4,7 +4,6 @@
 import logging
 import shutil
 from ftplib import FTP
-from os import remove
 from pathlib import Path
 
 from dateutil import parser
@@ -38,18 +37,20 @@ def get_mane_summary(self) -> Path:
             files = ftp.nlst()
             mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
             if not mane_summary_file:
-                raise Exception("Unable to download MANE summary data")
+                msg = "Unable to download MANE summary data"
+                raise Exception(msg)
             mane_summary_file = mane_summary_file[0]
             self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
             mane_data_path = self._data_dir / mane_summary_file
             if not self._mane_summary_path.exists():
                 logger.info("Downloading MANE summary file from NCBI.")
-                with open(mane_data_path, "wb") as fp:
+                with mane_data_path.open("wb") as fp:
                     ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
-                with gzip.open(mane_data_path, "rb") as f_in:
-                    with open(self._mane_summary_path, "wb") as f_out:
-                        shutil.copyfileobj(f_in, f_out)
-                remove(mane_data_path)
+                with gzip.open(
+                    mane_data_path, "rb"
+                ) as f_in, self._mane_summary_path.open("wb") as f_out:
+                    shutil.copyfileobj(f_in, f_out)
+                mane_data_path.unlink()
                 logger.info("MANE summary file download complete.")
         return self._mane_summary_path
 
@@ -66,18 +67,23 @@ def get_lrg_refseq_gene_data(self) -> Path:
             ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
             timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
             date = str(parser.parse(timestamp)).split()[0]
-            version = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
+            version = (
+                datetime.datetime.strptime(date, "%Y-%m-%d")
+                .astimezone(tz=datetime.timezone.utc)
+                .strftime("%Y%m%d")
+            )
             fn_versioned = f"{lrg_refseqgene_file}_{version}"
             lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
             self._lrg_refseqgene_path = self._data_dir / fn_versioned
             if not self._lrg_refseqgene_path.exists():
                 logger.info("Downloading LRG RefSeq data from NCBI.")
                 ftp.cwd(ftp_dir_path)
-                with open(lrg_refseqgene_path, "wb") as fp:
+                with lrg_refseqgene_path.open("wb") as fp:
                     ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
-                with open(lrg_refseqgene_path, "rb") as f_in:
-                    with open(self._lrg_refseqgene_path, "wb") as f_out:
-                        shutil.copyfileobj(f_in, f_out)
-                remove(lrg_refseqgene_path)
+                with lrg_refseqgene_path.open(
+                    "rb"
+                ) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
+                    shutil.copyfileobj(f_in, f_out)
+                lrg_refseqgene_path.unlink()
                 logger.info("LRG RefSeq data download complete.")
         return self._lrg_refseqgene_path
diff --git a/src/cool_seq_tool/handlers/seqrepo_access.py b/src/cool_seq_tool/handlers/seqrepo_access.py
@@ -152,8 +152,7 @@ def chromosome_to_acs(
                 acs.append(ac.split("refseq:")[-1])
         if acs:
             return acs, None
-        else:
-            return None, f"{chromosome} is not a valid chromosome"
+        return None, f"{chromosome} is not a valid chromosome"
 
     def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]:
         """Get chromosome for accession.
@@ -172,8 +171,7 @@ def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]:
         )[0]
         if aliases is None:
             return None, f"Unable to get chromosome for {ac}"
-        else:
-            return aliases, None
+        return aliases, None
 
     def get_fasta_file(self, sequence_id: str, outfile_path: Path) -> None:
         """Retrieve FASTA file containing sequence for requested sequence ID.

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -112,8 +112,7 @@ async def transcript_to_genomic_coordinates(
         # Ensure valid inputs
         if not transcript:
             return self._return_warnings(resp, "Must provide `transcript`")
-        else:
-            transcript = transcript.strip()
+        transcript = transcript.strip()
 
         exon_start_exists, exon_end_exists = False, False
         if exon_start is not None:
@@ -130,12 +129,11 @@ async def transcript_to_genomic_coordinates(
             return self._return_warnings(
                 resp, "Must provide either `exon_start` or `exon_end`"
             )
-        elif exon_start_exists and exon_end_exists:
-            if exon_start > exon_end:
-                return self._return_warnings(
-                    resp,
-                    f"Start exon {exon_start} is greater than end exon {exon_end}",
-                )
+        if exon_start_exists and exon_end_exists and (exon_start > exon_end):
+            return self._return_warnings(
+                resp,
+                f"Start exon {exon_start} is greater than end exon {exon_end}",
+            )
 
         # Get all exons and associated start/end coordinates for transcript
         tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
@@ -266,7 +264,7 @@ async def genomic_to_transcript_exon_coordinates(
         if start is None and end is None:
             return self._return_warnings(resp, "Must provide either `start` or `end`")
 
-        params = {key: None for key in GenomicData.model_fields.keys()}
+        params = {key: None for key in GenomicData.model_fields}
         if gene is not None:
             gene = gene.upper().strip()
 
@@ -312,13 +310,12 @@ async def genomic_to_transcript_exon_coordinates(
 
         for field in ["transcript", "gene", "chr", "strand"]:
             if start_data:
-                if end_data:
-                    if start_data[field] != end_data[field]:
-                        msg = (
-                            f"Start `{field}`, {start_data[field]}, does "
-                            f"not match End `{field}`, {end_data[field]}"
-                        )
-                        return self._return_warnings(resp, msg)
+                if end_data and (start_data[field] != end_data[field]):
+                    msg = (
+                        f"Start `{field}`, {start_data[field]}, does "
+                        f"not match End `{field}`, {end_data[field]}"
+                    )
+                    return self._return_warnings(resp, msg)
                 params[field] = start_data[field]
             else:
                 params[field] = end_data[field]
@@ -440,7 +437,10 @@ async def _get_alt_ac_start_and_end(
                     else:
                         error = "Strand does not match"
                     logger.warning(
-                        f"{error}: {alt_ac_data['start'][i]} != {alt_ac_data['end'][i]}"
+                        "%s: %s != %s",
+                        error,
+                        alt_ac_data["start"][i],
+                        alt_ac_data["end"][i],
                     )
                     return None, error
         return tuple(alt_ac_data_values), None
@@ -482,7 +482,7 @@ async def _genomic_to_transcript_exon_coordinate(
                 resp, "Must provide either `gene` or `transcript`"
             )
 
-        params = {key: None for key in TranscriptExonData.model_fields.keys()}
+        params = {key: None for key in TranscriptExonData.model_fields}
 
         if alt_ac:
             # Check if valid accession is given
@@ -550,7 +550,7 @@ def _get_gene_and_alt_ac(
         len_alt_acs = len(alt_acs)
         if len_alt_acs > 1:
             return None, f"Found more than one accessions: {alt_acs}"
-        elif len_alt_acs == 0:
+        if len_alt_acs == 0:
             return None, "No genomic accessions found"
         alt_ac = next(iter(alt_acs))
 
@@ -565,13 +565,12 @@ def _get_gene_and_alt_ac(
         elif len_genes == 0:
             return None, "No genes found"
 
-        if input_gene is not None:
-            if output_gene != input_gene.upper():
-                return (
-                    None,
-                    f"Input gene, {input_gene}, does not match "
-                    f"expected output gene, {output_gene}",
-                )
+        if input_gene is not None and output_gene != input_gene.upper():
+            return (
+                None,
+                f"Input gene, {input_gene}, does not match "
+                f"expected output gene, {output_gene}",
+            )
 
         gene = output_gene if output_gene else input_gene
         return (gene, alt_ac), None