Skip to content

Commit

Permalink
r0.6.0
Browse files Browse the repository at this point in the history
feat: add evidence filters (#57)
Merge pull request #58 from qbic-pipelines/dev
  • Loading branch information
HomoPolyethylen authored Oct 10, 2024
2 parents 7e42283 + 8ab9990 commit 309e365
Show file tree
Hide file tree
Showing 9 changed files with 323 additions and 103 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/lint-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@ jobs:
- uses: actions/checkout@v3

- uses: actions/setup-node@v3
with:
node-version: ">=20.11.0"

- name: Install editorconfig-checker
run: npm install -g editorconfig-checker

# Run editor config check only on files not covered by a linter
- name: Run ECLint check
run: editorconfig-checker -exclude README.md $(git ls-files | grep -v 'test\|.py\|md\|json\|yml\|yaml\|html\|css\|Makefile\|.obo')
run: editorconfig-checker -exclude $(git ls-files | grep -v 'test\|.py\|md\|json\|yml\|yaml\|html\|css\|Makefile\|.obo')

Prettier:
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ ENV/
*.zip
metadata.txt
cgi_results/*
**/unittest_out

# Development tests
dev_tests/
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
Changelog
============

0.6.0 - Keppler-452b Goldilocks (2024-10-10)
---------------------------------------------

**Added**

* option `--filter_evidence` to filter CIViC evidences based on type, direction, status, level and significance

**Fixed**

**Dependencies**

* updated `black` to version 24.3.0 due to moderate vulnerability CVE-2024-21503
* updated `requests` to version 2.31.0 due to moderate vulnerabilities CVE-2023-32681 and CVE-2024-35195

**Deprecated**

0.5.5 - Sulfur Io (2024-09-09)
---------------------------------------------

Expand Down
6 changes: 5 additions & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ To filter, the following fields are required in the VEP info column:

If ``filter_vep`` is set, the filtered and removed variants are given out as results in the ``vcf_files`` directory.

Using ``filter_evidence`` allows to filter the CIViC evidences based on type, direction, status, level and significance.

A typical command for a CIViC query:

.. code-block:: bash
Expand All @@ -229,7 +231,9 @@ A typical command for a CIViC query:
-v input_file.vcf,tsv,gtf \
-o outdir \
-g ref_genome [GRCh37, GRCh38, NCBI36] \
--filter_vep
--filter_vep \
--filter_evidence type=Predictive \
--filter_evidence status=accepted
The command above generates the following result files using `CIViCpy <https://docs.civicpy.org/>`_.

Expand Down
101 changes: 81 additions & 20 deletions querynator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import random
import shutil
from collections import defaultdict
from enum import Enum

import click
Expand Down Expand Up @@ -266,6 +267,62 @@ def get_unique_querynator_dir(querynator_output):
return querynator_output


def validate_evidence_filters(evidence_filters):
"""validate the evidence filters, given as key-value pairs
:return: None
:rtype: None
:raises click.UsageError: if the evidence filter is not in the list of valid filters"""

VALID_FILTERS = {
"type": ["Predictive", "Diagnostic", "Prognostic", "Predisposing", "Oncogenic", "Functional"],
"significance": ["SensitivityResponse", "Adverse Response", "Reduced Sensitivity", "N/A"],
"direction": ["Supports", "Does Not Support"],
"level": ["A", "B", "C", "D", "E", "F"],
"rating": [1, 2, 3, 4, 5],
"status": ["Accepted", "Rejected", "Submitted"],
}
# apply casefold to all keys and values to be case insensitive
VALID_FILTERS = {k.casefold(): [str(v).casefold() for v in values] for k, values in VALID_FILTERS.items()}

if not isinstance(evidence_filters, list) and not isinstance(evidence_filters, tuple):
evidence_filters = [evidence_filters]
non_empty_filters = [f.casefold() for f in evidence_filters if f]

for evidence_filter in non_empty_filters:
if "=" not in evidence_filter:
raise click.UsageError(
f"Invalid evidence filter '{evidence_filter}'. Please provide a key-value pair separated by '='"
)

key, value = evidence_filter.split("=")
if key not in VALID_FILTERS.keys():
raise click.UsageError(
f"Unsupported or invalid evidence filter '{key}'. Please provide one of {VALID_FILTERS.keys()}"
)
if value not in VALID_FILTERS[key]:
raise click.UsageError(
f"Unsupported or invalid value '{value}' for evidence filter '{key}'. Please provide one of {VALID_FILTERS[key]}"
)


def parse_filters(filters) -> dict:
"""parse key-value pairs into a dict of key:[list of grouped values]
:param filters: key-value pairs as strings
:type filters: str, list, tuple
:return: dict of key:[list of grouped values]
:rtype: dict
"""
if not isinstance(filters, list) and not isinstance(filters, tuple):
filters = [filters]

parsed_filters = defaultdict(list)
for filter in filters:
key, value = filter.split("=")
parsed_filters[key.lower()].append(value.lower())

return parsed_filters


def run_querynator():
print("\n __ ")
print(" ____ ___ _____ _______ ______ ____ _/ /_____ _____")
Expand Down Expand Up @@ -434,27 +491,31 @@ def query_api_cgi(mutations, cnas, translocations, cancer, genome, token, email,
show_default=True,
default=False,
)
def query_api_civic(vcf, outdir, genome, cancer, filter_vep):
try:
result_dir = get_unique_querynator_dir(f"{outdir}")
dirname, basename = os.path.split(result_dir)
if filter_vep:
in_vcf_header, candidate_variants, removed_variants = filter_vcf_by_vep(vcf, logger)
# create result directories
os.makedirs(f"{result_dir}/vcf_files")
write_vcf(in_vcf_header, removed_variants, f"{result_dir}/vcf_files/{basename}.removed_variants.vcf")
write_vcf(in_vcf_header, candidate_variants, f"{result_dir}/vcf_files/{basename}.filtered_variants.vcf")

logger.info("Query the Clinical Interpretations of Variants In Cancer (CIViC)")
# run analysis
query_civic(candidate_variants, result_dir, logger, vcf, genome, cancer, filter_vep)

else:
logger.info("Query the Clinical Interpretations of Variants In Cancer (CIViC)")
query_civic(vcf, result_dir, logger, vcf, genome, filter_vep)
@click.option(
"-e",
"--filter_evidence",
help="Key-Value pairs to filter the evidence items. Example: 'type=Predictive'",
multiple=True,
)
def query_api_civic(vcf, outdir, genome, cancer, filter_vep, filter_evidence):
validate_evidence_filters(filter_evidence)
evidence_filters = parse_filters(filter_evidence)
result_dir = get_unique_querynator_dir(f"{outdir}")
dirname, basename = os.path.split(result_dir)
if filter_vep:
in_vcf_header, candidate_variants, removed_variants = filter_vcf_by_vep(vcf, logger)
# create result directories
os.makedirs(f"{result_dir}/vcf_files")
write_vcf(in_vcf_header, removed_variants, f"{result_dir}/vcf_files/{basename}.removed_variants.vcf")
write_vcf(in_vcf_header, candidate_variants, f"{result_dir}/vcf_files/{basename}.filtered_variants.vcf")

logger.info("Query the Clinical Interpretations of Variants In Cancer (CIViC)")
# run analysis
query_civic(candidate_variants, result_dir, logger, vcf, genome, cancer, filter_vep, evidence_filters)

except FileNotFoundError:
print("The provided file cannot be found. Please try another path.")
else:
logger.info("Query the Clinical Interpretations of Variants In Cancer (CIViC)")
query_civic(vcf, result_dir, logger, vcf, genome, cancer, filter_vep, evidence_filters)


# querynator create report
Expand Down
Loading

0 comments on commit 309e365

Please sign in to comment.