Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow call caching reference creation #7

Open
wants to merge 14 commits into
base: develop
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ that users understand how the changes affect the new version.

version 0.1.0-dev
-----------------
- Update gatk4, tabix and rtg-tools
- Allow call caching of reference creation
- Switch to miniwdl for testing
- Add documentation for the pipeline.
- The pipeline output now contains summary tables for the SNPs and indels for
each sample, as well as an html report which plots the precision and
Expand Down
40 changes: 21 additions & 19 deletions clinical-validation.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,22 @@ workflow ClinicalValidation {
File? regions
File? fallbackBaselineVcf
Map[String, String] dockerImages = {
"gatk4": "quay.io/biocontainers/gatk4:4.1.2.0--1",
"gatk4": "quay.io/biocontainers/gatk4:4.5.0.0--py36hdfd78af_0",
"vt": "quay.io/biocontainers/vt:0.57721--hdf88d34_2",
"tabix": "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0",
"rtg-tools": "quay.io/biocontainers/rtg-tools:3.10.1--0",
"tabix": "quay.io/biocontainers/tabix:1.11--hdfd78af_0",
"rtg-tools": "quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0",
"plotly": "lumc/plotly:4.10.0"
}
Boolean allRecords = false
}

call rtg.Format as formatReference {
input:
inputFiles = [referenceFasta],
outputPath = "reference.sdf",
dockerImage = dockerImages["rtg-tools"]
}

scatter (unit in validationUnit) {
# This is needed for the summary report
String sampleName = unit.outputPrefix
Expand Down Expand Up @@ -157,21 +164,14 @@ workflow ClinicalValidation {
dockerImage = dockerImages["gatk4"]
}

call rtg.Format as formatReference {
input:
inputFiles = [referenceFasta],
outputPath = unit.outputPrefix + "/reference.sdf",
dockerImage = dockerImages["rtg-tools"]
}

call rtg.VcfEval as evalSNPs {
input:
baseline = selectSNPsBaseline.outputVcf,
baselineIndex = selectSNPsBaseline.outputVcfIndex,
calls = selectSNPsCall.outputVcf,
callsIndex = selectSNPsCall.outputVcfIndex,
outputDir = unit.outputPrefix + "/evalSNPs/",
template = formatReference.sdf,
referenceFiles = formatReference.referenceFiles,
allRecords = allRecords,
bedRegions = regions,
sample = unit.sampleNameVcf,
Expand All @@ -185,7 +185,7 @@ workflow ClinicalValidation {
calls = selectIndelsCall.outputVcf,
callsIndex = selectIndelsCall.outputVcfIndex,
outputDir = unit.outputPrefix + "/evalIndels/",
template = formatReference.sdf,
referenceFiles = formatReference.referenceFiles,
allRecords = allRecords,
bedRegions = regions,
sample = unit.sampleNameVcf,
Expand All @@ -208,19 +208,21 @@ workflow ClinicalValidation {
output {
Array[File] indelStats = flatten(evalIndels.allStats)
Array[File] SNPStats = flatten(evalSNPs.allStats)
Array[File] indelVcf = selectIndelsCall.outputVcf
Array[File] indelVcfIndex = selectIndelsCall.outputVcfIndex
Array[File] SNPVcf = selectSNPsCall.outputVcf
Array[File] SNPVcfIndex = selectSNPsCall.outputVcfIndex

Array[File] normalizedVcf = indexNormalizedCall.compressed
Array[File] normalizedVcfIndex = indexNormalizedCall.index

Array[File] normalizedBaselineVcf = indexBaselineVcf.compressed
Array[File] normalizedBaselineVcfIndex = indexBaselineVcf.index
Array[File] BaselineIndelVcf = selectIndelsBaseline.outputVcf
Array[File] BaselineIndelVcfIndex = selectIndelsBaseline.outputVcfIndex
Array[File] BaselineSNPVcf = selectSNPsBaseline.outputVcf
Array[File] BaselineSNPVcfIndex = selectSNPsBaseline.outputVcfIndex

Array[File] normalizedVcf = indexNormalizedCall.compressed
Array[File] normalizedVcfIndex = indexNormalizedCall.index
Array[File] indelVcf = selectIndelsCall.outputVcf
Array[File] indelVcfIndex = selectIndelsCall.outputVcfIndex
Array[File] SNPVcf = selectSNPsCall.outputVcf
Array[File] SNPVcfIndex = selectSNPsCall.outputVcfIndex

File? indelTSV = parseSummary.IndelTSV
File? snpTSV = parseSummary.SnpTSV
File? htmlGraph = parseSummary.HtmlGraph
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = --git-aware --symlink
5 changes: 2 additions & 3 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
# For more information on how to set up conda with bioconda channel see:
# http://bioconda.github.io/#install-conda
# this file can be installed with "conda install --file requirements-test.txt"
# TODO: Remove cromwell version requirement once bug in cromwell is fixed: https://github.com/broadinstitute/cromwell/pull/5437
cromwell<=48
pytest-workflow=1.4
cromwell
pytest-workflow
miniwdl
wdl-aid
2 changes: 1 addition & 1 deletion tasks
7 changes: 7 additions & 0 deletions tests/miniwdl.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[file_io]
allow_any_input=true
copy_input_files_for=["MultiQC"]
use_relative_output_paths=true

[task_runtime]
as_user = true
66 changes: 33 additions & 33 deletions tests/test_clinical_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
tags:
- integration
command: >-
cromwell run -o tests/cromwell_options.json
miniwdl run --cfg tests/miniwdl.cfg -d test-output/.
-i tests/integration/malesample.json clinical-validation.wdl
files:
- path: test-output/male-sample-output/calledIndels.vcf.gz
- path: test-output/male-sample-output/calledSnps.vcf.gz
- path: test-output/male-sample-output/evalIndels/summary.txt
- path: test-output/male-sample-output/evalSNPs/summary.txt
- path: test-output/out/male-sample-output/calledIndels.vcf.gz
- path: test-output/out/male-sample-output/calledSnps.vcf.gz
- path: test-output/out/male-sample-output/evalIndels/summary.txt
- path: test-output/out/male-sample-output/evalSNPs/summary.txt
stdout:
must_not_contain:
- "--all-records"
Expand All @@ -17,65 +17,65 @@
tags:
- integration
command: >-
cromwell run -o tests/cromwell_options.json
miniwdl run --cfg tests/miniwdl.cfg -d test-output/. --debug
-i tests/integration/multisample.json clinical-validation.wdl
files:
- path: test-output/sample1/calledIndels.vcf.gz
- path: test-output/sample1/calledSnps.vcf.gz
- path: test-output/sample1/evalIndels/summary.txt
- path: test-output/sample1/evalSNPs/summary.txt
- path: test-output/sample2/calledIndels.vcf.gz
- path: test-output/sample2/calledSnps.vcf.gz
- path: test-output/sample2/evalIndels/summary.txt
- path: test-output/sample2/evalSNPs/summary.txt
- path: test-output/summary.html
- path: test-output/out/sample1/calledIndels.vcf.gz
- path: test-output/out/sample1/calledSnps.vcf.gz
- path: test-output/out/sample1/evalIndels/summary.txt
- path: test-output/out/sample1/evalSNPs/summary.txt
- path: test-output/out/sample2/calledIndels.vcf.gz
- path: test-output/out/sample2/calledSnps.vcf.gz
- path: test-output/out/sample2/evalIndels/summary.txt
- path: test-output/out/sample2/evalSNPs/summary.txt
- path: test-output/out/summary.html
contains:
- "sample1"
- "sample2"
- path: test-output/indel_summary.tsv
- path: test-output/out/indel_summary.tsv
contains:
- "sample1\tNone"
- "sample2\tNone"
must_not_contain:
- "sample1\t0"
- "sample2\t0"

- path: test-output/snp_summary.tsv
- path: test-output/out/snp_summary.tsv
contains:
- "sample1"
- "sample2"
stdout:
stderr:
contains:
- "--all-records"

- name: no-samplenameVcf
tags:
- integration
command: >-
cromwell run -o tests/cromwell_options.json
miniwdl run --cfg tests/miniwdl.cfg -d test-output/.
-i tests/integration/no-samplenameVcf.json clinical-validation.wdl
files:
- path: test-output/sample-output/calledIndels.vcf.gz
- path: test-output/sample-output/calledSnps.vcf.gz
- path: test-output/sample-output/evalIndels/summary.txt
- path: test-output/sample-output/evalSNPs/summary.txt
- path: test-output/out/sample-output/calledIndels.vcf.gz
- path: test-output/out/sample-output/calledSnps.vcf.gz
- path: test-output/out/sample-output/evalIndels/summary.txt
- path: test-output/out/sample-output/evalSNPs/summary.txt

- name: two-samples-fallback
tags:
- integration
command: >-
cromwell run -o tests/cromwell_options.json
miniwdl run --cfg tests/miniwdl.cfg -d test-output/. --debug
-i tests/integration/multisample_fallback.json clinical-validation.wdl
files:
- path: test-output/sample1/calledIndels.vcf.gz
- path: test-output/sample1/calledSnps.vcf.gz
- path: test-output/sample1/evalIndels/summary.txt
- path: test-output/sample1/evalSNPs/summary.txt
- path: test-output/sample2/calledIndels.vcf.gz
- path: test-output/sample2/calledSnps.vcf.gz
- path: test-output/sample2/evalIndels/summary.txt
- path: test-output/sample2/evalSNPs/summary.txt
stdout:
- path: test-output/out/sample1/calledIndels.vcf.gz
- path: test-output/out/sample1/calledSnps.vcf.gz
- path: test-output/out/sample1/evalIndels/summary.txt
- path: test-output/out/sample1/evalSNPs/summary.txt
- path: test-output/out/sample2/calledIndels.vcf.gz
- path: test-output/out/sample2/calledSnps.vcf.gz
- path: test-output/out/sample2/evalIndels/summary.txt
- path: test-output/out/sample2/evalSNPs/summary.txt
stderr:
contains:
- "--all-records"

Loading