Skip to content

Commit

Permalink
revert to old download ref behavior because of issue w/ ruleorder and…
Browse files Browse the repository at this point in the history
… checkpoints.
  • Loading branch information
cademirch committed Apr 3, 2024
1 parent 956bad2 commit 5adbcd9
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 25 deletions.
1 change: 0 additions & 1 deletion profiles/default/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ default-resources:
# Control number of threads each rule will use.
set-threads:
# Reference Genome Processing. Does NOT use more than 1 thread.
copy_reference: 1
download_reference: 1
index_reference: 1
# Interval Generation. Does NOT use more than 1 thread.
Expand Down
7 changes: 3 additions & 4 deletions profiles/slurm/config.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
executor: slurm
use-conda: True
jobs: 100 # Have up to N jobs submitted at any given time
latency-wait: 20 # Wait N seconds for output files due to latency
retries: 3 # Retry jobs N times.
latency-wait: 100 # Wait N seconds for output files due to latency
retries: 0 # Retry jobs N times.

# These resources will be applied to all rules. Can be overriden on a per-rule basis below.
default-resources:
mem_mb: attempt * 2000
mem_mb_reduced: (attempt * 2000) * 0.9 # Mem allocated to java for GATK rules (tries to prevent OOM errors)
slurm_partition: ""
slurm_account: # Same as sbatch -A. Not all clusters use this.
runtime: 10 # In minutes
runtime: 30 # In minutes


# Control number of threads each rule will use.
set-threads:
# Reference Genome Processing. Does NOT use more than 1 thread.
copy_reference: 1
download_reference: 1
index_reference: 1
# Interval Generation. Does NOT use more than 1 thread.
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def get_ref(wildcards):
# if not user-specified refpath, force MissingInputError in copy_ref with dummyfile, which allows download_ref to run b/c of ruleorder.
logger.info(f"refPath specified in sample sheet header, but no path provided for refGenome '{wildcards.refGenome}'\n" +
f"Will try to download '{wildcards.refGenome}' from NCBI. If this is a genome accession, you can ignore this warning.")
return "Need to Download"
return []

def sentieon_combine_gvcf_cmd_line(wc):
gvcfs = sentieon_combine_gvcf_input(wc)["gvcfs"]
Expand Down
46 changes: 27 additions & 19 deletions workflow/rules/reference.smk
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
ruleorder: copy_reference > download_reference > index_reference
localrules: copy_reference, download_reference
ruleorder: download_reference > index_reference
# localrules: copy_reference, download_reference

rule copy_reference:
"""Copies user-specified reference genome path to results dir to maintain refGenome wildcard"""
input:
ref = get_ref
output:
ref = "results/{refGenome}/data/genome/{refGenome}.fna"
log:
"logs/{refGenome}/copy_ref/log.txt"
shell:
#probably don't need to unzip but might as well.
"""
gunzip -c {input.ref} 2> {log} > {output.ref} || cp {input.ref} {output.ref} &> {log}
"""
# This does not work with SLURM as of 4/3/24. See here for more info:https://github.com/snakemake/snakemake-executor-plugin-slurm/issues/60
# rule copy_reference:
# """Copies user-specified reference genome path to results dir to maintain refGenome wildcard"""
# input:
# ref = get_ref
# output:
# ref = "results/{refGenome}/data/genome/{refGenome}.fna"
# log:
# "logs/{refGenome}/copy_ref/log.txt"
# shell:
# #probably don't need to unzip but might as well.
# """
# gunzip -c {input.ref} 2> {log} > {output.ref} || cp {input.ref} {output.ref} &> {log}
# """

rule download_reference:
input:
ref = get_ref
output:
ref = "results/{refGenome}/data/genome/{refGenome}.fna"
params:
Expand All @@ -29,10 +32,15 @@ rule download_reference:
"benchmarks/{refGenome}/download_ref/benchmark.txt"
shell:
"""
mkdir -p {params.outdir} &> {log}
datasets download genome accession --exclude-gff3 --exclude-protein --exclude-rna --filename {params.dataset} {wildcards.refGenome} &>> {log} \
&& 7z x {params.dataset} -aoa -o{params.outdir} &>> {log} \
&& cat {params.outdir}/ncbi_dataset/data/{wildcards.refGenome}/*.fna > {output.ref} 2>> {log}
if [ -z "{input.ref}" ] # check if this is empty
then
mkdir -p {params.outdir}
datasets download genome accession --exclude-gff3 --exclude-protein --exclude-rna --filename {params.dataset} {wildcards.refGenome} \
&& 7z x {params.dataset} -aoa -o{params.outdir} \
&& cat {params.outdir}/ncbi_dataset/data/{wildcards.refGenome}/*.fna > {output.ref}
else
gunzip -c {input.ref} 2> {log} > {output.ref} || cp {input.ref} {output.ref} &> {log}
fi
"""

rule index_reference:
Expand Down

0 comments on commit 5adbcd9

Please sign in to comment.