Skip to content

Commit

Permalink
fix: fixed check for user bowtie index (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
DavideBrex authored Jan 24, 2024
1 parent e6af464 commit 4609a1c
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 13 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ To obtain the Snakemake workflow, you can:
1. Create a new github repository using this workflow [as a template](https://help.github.com/en/articles/creating-a-repository-from-a-template).
2. [Clone](https://help.github.com/en/articles/cloning-a-repository) the newly created repository to your local system, in the folder where you want to perform the data analysis.

- Download the source code as zip file from this page (code button)
- Download the source code as zip file from the latest [version](https://github.com/DavideBrex/SpikeFlow/releases).


The usage of this workflow is also described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=DavideBrex%2FSpikeFlow).
Expand Down Expand Up @@ -221,7 +221,7 @@ First, the singularity container will be pulled from DockerHub and then the work

To execute the pipeline on a HPC cluster, please follow [these guidelines](https://snakemake.readthedocs.io/en/stable/tutorial/additional_features.html#cluster-execution).

If you are using **Snakemake version $\ge$ 8**, the comman line arguments have [different names](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#containerization-of-conda-based-workflows). In this case, run the workflow with:
If you are using **Snakemake version $\ge$ 8**, the command line arguments have [different names](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#containerization-of-conda-based-workflows). In this case, run the workflow with:

```bash
snakemake --cores --software-deployment-method conda apptainer
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/callPeaks.smk
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ rule macs2_callNarrowPeak:
+ " --gsize "
+ str(config["params"]["deeptools"]["effective_genome_length"])
+ " --pvalue "
+ config["params"]["peakCalling"]["macs2"]["pvalue"]
+ str(config["params"]["peakCalling"]["macs2"]["pvalue"])
+ " --keep-dup all",
benchmark:
"{}results/.benchmarks/{{sample}}.macs2.benchmark.txt".format(outdir)
Expand Down
52 changes: 42 additions & 10 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,40 @@ wildcard_constraints:

# -------------------- Sample sheet Sanity checks function ---------------#
def perform_checks(input_df):
def check_index_files(folder_path, prefix):
# Expected filenames
expected_files = [
"{}.1.ebwt",
"{}.2.ebwt",
"{}.3.ebwt",
"{}.4.ebwt",
"{}.rev.1.ebwt",
"{}.rev.2.ebwt",
]
# Check if the folder exists
if not os.path.exists(folder_path):
raise FileNotFoundError(
"The genome index folder {} does not exist. \nPlease check that the folder is present and contains the indexing files".format(
folder_path
)
)
# List all files in the directory to check for the presence of index files
files_in_directory = os.listdir(folder_path)
missing_files = [] # Check for each expected file
for file_pattern in expected_files:
expected_file = file_pattern.format(prefix)
if expected_file not in files_in_directory:
missing_files.append(expected_file)
# Report missing files
if missing_files:
raise FileNotFoundError(
"""It appears that the genome index folder you provided is missing one/more indexing files.
\nPlease check that the index prefix is correct and the index files are present in {}""".format(
folder_path
)
)

# config file header
header = [
"sample",
"replicate",
Expand Down Expand Up @@ -203,18 +237,16 @@ def perform_checks(input_df):

# 6. in case an index is provided for the ref genome (different than ""), check whether it actually exists
if config["resources"]["ref"]["index"] != "":
if not os.path.exists(os.path.dirname(config["resources"]["ref"]["index"])):
raise FileNotFoundError(
"The provided path to the reference genome index does not exist. \nPlease check that the folder is present and contains the indexing files"
)
check_index_files(
os.path.dirname(config["resources"]["ref"]["index"]),
os.path.basename(config["resources"]["ref"]["index"]),
)
# same for spike
if config["resources"]["ref_spike"]["index_spike"] != "":
if not os.path.exists(
os.path.dirname(config["resources"]["ref_spike"]["index_spike"])
):
raise FileNotFoundError(
"The provided path to the spike genome index does not exist. \nPlease check that the folder is present and contains the indexing files"
)
check_index_files(
os.path.dirname(config["resources"]["ref_spike"]["index_spike"]),
os.path.basename(config["resources"]["ref_spike"]["index_spike"]),
)
# 7. check if the chromsome sizes file exists and if the blacklist file exists
if not os.path.exists(config["params"]["peakCalling"]["chrom_sizes"]):
raise FileNotFoundError(
Expand Down

0 comments on commit 4609a1c

Please sign in to comment.