biowdl · rhpvorderman · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,9 @@ that users understand how the changes affect the new version.
 
 version 0.1.0-dev
 -----------------
+- Update gatk4, tabix and rtg-tools
+- Allow call caching of reference creation
+- Switch to miniwdl for testing
 - Add documentation for the pipeline.
 - The pipeline output now contains summary tables for the SNPs and indels for
   each sample, as well as an html report which plots the precision and

diff --git a/clinical-validation.wdl b/clinical-validation.wdl
@@ -42,15 +42,22 @@ workflow ClinicalValidation {
         File? regions
         File? fallbackBaselineVcf
         Map[String, String] dockerImages = {
-            "gatk4": "quay.io/biocontainers/gatk4:4.1.2.0--1",
+            "gatk4": "quay.io/biocontainers/gatk4:4.5.0.0--py36hdfd78af_0",
             "vt": "quay.io/biocontainers/vt:0.57721--hdf88d34_2",
-            "tabix": "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0",
-            "rtg-tools": "quay.io/biocontainers/rtg-tools:3.10.1--0",
+            "tabix": "quay.io/biocontainers/tabix:1.11--hdfd78af_0",
+            "rtg-tools": "quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0",
             "plotly": "lumc/plotly:4.10.0"
         }
         Boolean allRecords = false
     }
 
+    call rtg.Format as formatReference {
+        input:
+            inputFiles = [referenceFasta],
+            outputPath = "reference.sdf",
+            dockerImage = dockerImages["rtg-tools"]
+    }
+
     scatter (unit in validationUnit) {
         # This is needed for the summary report
         String sampleName = unit.outputPrefix
@@ -157,21 +164,14 @@ workflow ClinicalValidation {
                 dockerImage = dockerImages["gatk4"]
         }
 
-        call rtg.Format as formatReference {
-            input:
-                inputFiles = [referenceFasta],
-                outputPath = unit.outputPrefix + "/reference.sdf",
-                dockerImage = dockerImages["rtg-tools"]
-        }
-
         call rtg.VcfEval as evalSNPs {
             input:
                 baseline = selectSNPsBaseline.outputVcf,
                 baselineIndex = selectSNPsBaseline.outputVcfIndex,
                 calls = selectSNPsCall.outputVcf,
                 callsIndex = selectSNPsCall.outputVcfIndex,
                 outputDir = unit.outputPrefix + "/evalSNPs/",
-                template = formatReference.sdf,
+                referenceFiles = formatReference.referenceFiles,
                 allRecords = allRecords,
                 bedRegions = regions,
                 sample = unit.sampleNameVcf,
@@ -185,7 +185,7 @@ workflow ClinicalValidation {
                 calls = selectIndelsCall.outputVcf,
                 callsIndex = selectIndelsCall.outputVcfIndex,
                 outputDir = unit.outputPrefix + "/evalIndels/",
-                template = formatReference.sdf,
+                referenceFiles = formatReference.referenceFiles,
                 allRecords = allRecords,
                 bedRegions = regions,
                 sample = unit.sampleNameVcf,
@@ -208,19 +208,21 @@ workflow ClinicalValidation {
     output {
         Array[File] indelStats = flatten(evalIndels.allStats)
         Array[File] SNPStats = flatten(evalSNPs.allStats)
-        Array[File] indelVcf = selectIndelsCall.outputVcf
-        Array[File] indelVcfIndex = selectIndelsCall.outputVcfIndex
-        Array[File] SNPVcf = selectSNPsCall.outputVcf
-        Array[File] SNPVcfIndex = selectSNPsCall.outputVcfIndex
-
-        Array[File] normalizedVcf = indexNormalizedCall.compressed
-        Array[File] normalizedVcfIndex = indexNormalizedCall.index
 
+        Array[File] normalizedBaselineVcf = indexBaselineVcf.compressed
+        Array[File] normalizedBaselineVcfIndex = indexBaselineVcf.index
         Array[File] BaselineIndelVcf = selectIndelsBaseline.outputVcf
         Array[File] BaselineIndelVcfIndex = selectIndelsBaseline.outputVcfIndex
         Array[File] BaselineSNPVcf = selectSNPsBaseline.outputVcf
         Array[File] BaselineSNPVcfIndex = selectSNPsBaseline.outputVcfIndex
 
+        Array[File] normalizedVcf = indexNormalizedCall.compressed
+        Array[File] normalizedVcfIndex = indexNormalizedCall.index
+        Array[File] indelVcf = selectIndelsCall.outputVcf
+        Array[File] indelVcfIndex = selectIndelsCall.outputVcfIndex
+        Array[File] SNPVcf = selectSNPsCall.outputVcf
+        Array[File] SNPVcfIndex = selectSNPsCall.outputVcfIndex
+
         File? indelTSV = parseSummary.IndelTSV
         File? snpTSV = parseSummary.SnpTSV
         File? htmlGraph = parseSummary.HtmlGraph

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --git-aware --symlink
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -3,8 +3,7 @@
 # For more information on how to set up conda with bioconda channel see:
 # http://bioconda.github.io/#install-conda
 # this file can be installed with "conda install --file requirements-test.txt"
-# TODO: Remove cromwell version requirement once bug in cromwell is fixed: https://github.com/broadinstitute/cromwell/pull/5437
-cromwell<=48
-pytest-workflow=1.4
+cromwell
+pytest-workflow
 miniwdl
 wdl-aid
diff --git a/tasks b/tasks
diff --git a/tests/miniwdl.cfg b/tests/miniwdl.cfg
@@ -0,0 +1,7 @@
+[file_io]
+allow_any_input=true
+copy_input_files_for=["MultiQC"]
+use_relative_output_paths=true
+
+[task_runtime]
+as_user = true
diff --git a/tests/test_clinical_validation.yml b/tests/test_clinical_validation.yml
@@ -2,13 +2,13 @@
   tags:
     - integration
   command: >-
-    cromwell run -o tests/cromwell_options.json
+    miniwdl run --cfg tests/miniwdl.cfg -d test-output/.
     -i tests/integration/malesample.json clinical-validation.wdl
   files:
-    - path: test-output/male-sample-output/calledIndels.vcf.gz
-    - path: test-output/male-sample-output/calledSnps.vcf.gz
-    - path: test-output/male-sample-output/evalIndels/summary.txt
-    - path: test-output/male-sample-output/evalSNPs/summary.txt
+    - path: test-output/out/male-sample-output/calledIndels.vcf.gz
+    - path: test-output/out/male-sample-output/calledSnps.vcf.gz
+    - path: test-output/out/male-sample-output/evalIndels/summary.txt
+    - path: test-output/out/male-sample-output/evalSNPs/summary.txt
   stdout:
     must_not_contain:
       - "--all-records"
@@ -17,65 +17,65 @@
   tags:
     - integration
   command: >-
-    cromwell run -o tests/cromwell_options.json
+    miniwdl run --cfg tests/miniwdl.cfg -d test-output/. --debug
     -i tests/integration/multisample.json clinical-validation.wdl
   files:
-    - path: test-output/sample1/calledIndels.vcf.gz
-    - path: test-output/sample1/calledSnps.vcf.gz
-    - path: test-output/sample1/evalIndels/summary.txt
-    - path: test-output/sample1/evalSNPs/summary.txt
-    - path: test-output/sample2/calledIndels.vcf.gz
-    - path: test-output/sample2/calledSnps.vcf.gz
-    - path: test-output/sample2/evalIndels/summary.txt
-    - path: test-output/sample2/evalSNPs/summary.txt
-    - path: test-output/summary.html
+    - path: test-output/out/sample1/calledIndels.vcf.gz
+    - path: test-output/out/sample1/calledSnps.vcf.gz
+    - path: test-output/out/sample1/evalIndels/summary.txt
+    - path: test-output/out/sample1/evalSNPs/summary.txt
+    - path: test-output/out/sample2/calledIndels.vcf.gz
+    - path: test-output/out/sample2/calledSnps.vcf.gz
+    - path: test-output/out/sample2/evalIndels/summary.txt
+    - path: test-output/out/sample2/evalSNPs/summary.txt
+    - path: test-output/out/summary.html
       contains:
         - "sample1"
         - "sample2"
-    - path: test-output/indel_summary.tsv
+    - path: test-output/out/indel_summary.tsv
       contains:
         - "sample1\tNone"
         - "sample2\tNone"
       must_not_contain:
         - "sample1\t0"
         - "sample2\t0"
 
-    - path: test-output/snp_summary.tsv
+    - path: test-output/out/snp_summary.tsv
       contains:
         - "sample1"
         - "sample2"
-  stdout:
+  stderr:
     contains:
       - "--all-records"
 
 - name: no-samplenameVcf
   tags:
     - integration
   command: >-
-    cromwell run -o tests/cromwell_options.json
+    miniwdl run --cfg tests/miniwdl.cfg -d test-output/.
     -i tests/integration/no-samplenameVcf.json clinical-validation.wdl
   files:
-    - path: test-output/sample-output/calledIndels.vcf.gz
-    - path: test-output/sample-output/calledSnps.vcf.gz
-    - path: test-output/sample-output/evalIndels/summary.txt
-    - path: test-output/sample-output/evalSNPs/summary.txt
+    - path: test-output/out/sample-output/calledIndels.vcf.gz
+    - path: test-output/out/sample-output/calledSnps.vcf.gz
+    - path: test-output/out/sample-output/evalIndels/summary.txt
+    - path: test-output/out/sample-output/evalSNPs/summary.txt
 
 - name: two-samples-fallback
   tags:
     - integration
   command: >-
-    cromwell run -o tests/cromwell_options.json
+    miniwdl run --cfg tests/miniwdl.cfg -d test-output/. --debug
     -i tests/integration/multisample_fallback.json clinical-validation.wdl
   files:
-    - path: test-output/sample1/calledIndels.vcf.gz
-    - path: test-output/sample1/calledSnps.vcf.gz
-    - path: test-output/sample1/evalIndels/summary.txt
-    - path: test-output/sample1/evalSNPs/summary.txt
-    - path: test-output/sample2/calledIndels.vcf.gz
-    - path: test-output/sample2/calledSnps.vcf.gz
-    - path: test-output/sample2/evalIndels/summary.txt
-    - path: test-output/sample2/evalSNPs/summary.txt
-  stdout:
+    - path: test-output/out/sample1/calledIndels.vcf.gz
+    - path: test-output/out/sample1/calledSnps.vcf.gz
+    - path: test-output/out/sample1/evalIndels/summary.txt
+    - path: test-output/out/sample1/evalSNPs/summary.txt
+    - path: test-output/out/sample2/calledIndels.vcf.gz
+    - path: test-output/out/sample2/calledSnps.vcf.gz
+    - path: test-output/out/sample2/evalIndels/summary.txt
+    - path: test-output/out/sample2/evalSNPs/summary.txt
+  stderr:
     contains:
       - "--all-records"