From becf7f6cbc5a531d5f0cba96d6fa1accc2062173 Mon Sep 17 00:00:00 2001
From: Yueyao Gao <gaoyueya@broadinstitute.org>
Date: Wed, 9 Aug 2023 10:05:39 -0400
Subject: [PATCH] Update CNV and M2 to GATK official workflows

---
 .dockstore.yml                                |    4 +-
 .../GATK4_CNV/GATK4_CNV.terra-inputs.json     |    1 -
 .../GATK4_CNVSomaticPairWorkflow.inputs.json  |  119 ++
 ...V.wdl => GATK4_CNVSomaticPairWorkflow.wdl} |  955 ++++-----
 GATK_CNV_Mutect2/GATK4_CNV/README.md          |   33 +-
 .../mutect2-gatk4/mutect2-gatk4.inputs.json   |   71 +-
 .../mutect2-gatk4/mutect2-gatk4.wdl           | 1867 +++++++----------
 7 files changed, 1401 insertions(+), 1649 deletions(-)
 delete mode 100644 GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.terra-inputs.json
 create mode 100644 GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.inputs.json
 rename GATK_CNV_Mutect2/GATK4_CNV/{GATK4_CNV.wdl => GATK4_CNVSomaticPairWorkflow.wdl} (57%)

diff --git a/.dockstore.yml b/.dockstore.yml
index 4f8eaf9..5e78565 100644
--- a/.dockstore.yml
+++ b/.dockstore.yml
@@ -19,9 +19,9 @@ workflows:
    - /CollectSamError/CollectSamErrorMetrics.inputs.json
  - name: GATK4_CNV
    subclass: WDL
-   primaryDescriptorPath: /GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.wdl
+   primaryDescriptorPath: /GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.wdl
    testParameterFiles:
-   - /GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.terra-inputs.json
+   - /GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.inputs.json
  - name: mutect2-gatk4
    subclass: WDL
    primaryDescriptorPath: /GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.wdl
diff --git a/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.terra-inputs.json b/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.terra-inputs.json
deleted file mode 100644
index 19e3813..0000000
--- a/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.terra-inputs.json
+++ /dev/null
@@ -1 +0,0 @@
-{"CNVSomaticPairWorkflow.bin_length":"${0}","CNVSomaticPairWorkflow.blacklist_intervals":"${workspace.blacklist_intervals}","CNVSomaticPairWorkflow.calling_copy_ratio_z_score_threshold":"${3.05}","CNVSomaticPairWorkflow.common_sites":"${workspace.common_sites}","CNVSomaticPairWorkflow.gatk_docker":"us.gcr.io/broad-gatk/gatk:4.1.2.0","CNVSomaticPairWorkflow.intervals":"${workspace.CNV_intervals}","CNVSomaticPairWorkflow.is_run_oncotator":"${true}","CNVSomaticPairWorkflow.normal_bam":"${this.control_sample_bam}","CNVSomaticPairWorkflow.normal_bam_idx":"${this.control_sample_bam_index}","CNVSomaticPairWorkflow.oncotator_docker":"broadinstitute/oncotator:1.9.5.0-eval-gatk-protected","CNVSomaticPairWorkflow.read_count_pon":"${}","CNVSomaticPairWorkflow.ref_fasta":"${workspace.ref_fasta}","CNVSomaticPairWorkflow.ref_fasta_dict":"${workspace.ref_dict}","CNVSomaticPairWorkflow.ref_fasta_fai":"${workspace.ref_fasta_index}","CNVSomaticPairWorkflow.tumor_bam":"${this.case_sample_bam}","CNVSomaticPairWorkflow.tumor_bam_idx":"${this.case_sample_bam_index}"}
\ No newline at end of file
diff --git a/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.inputs.json b/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.inputs.json
new file mode 100644
index 0000000..865d2b8
--- /dev/null
+++ b/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.inputs.json
@@ -0,0 +1,119 @@
+{
+  "CNVSomaticPairWorkflow.preemptible_attempts": "Int? (optional)",
+  "CNVSomaticPairWorkflow.oncotator_docker": "String? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_call_copy_ratio_segments": "Int? (optional)",
+  "CNVSomaticPairWorkflow.num_smoothing_iterations_per_fit": "Int? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsNormal.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_funcotator": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.calling_copy_ratio_z_score_threshold": "Float? (optional)",
+  "CNVSomaticPairWorkflow.minor_allele_fraction_prior_alpha": "Float? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsTumor.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_ref_version": "String? (optional)",
+  "CNVSomaticPairWorkflow.gatk_docker": "String",
+  "CNVSomaticPairWorkflow.num_changepoints_penalty_factor": "Float? (optional)",
+  "CNVSomaticPairWorkflow.common_sites": "File",
+  "CNVSomaticPairWorkflow.tumor_bam_idx": "File",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_oncotator": "Int? (optional)",
+  "CNVSomaticPairWorkflow.neutral_segment_copy_ratio_upper_bound": "Float? (optional)",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.CollectCountsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.minimum_base_quality": "String? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_denoise_read_counts": "Int? (optional)",
+  "CNVSomaticPairWorkflow.min_total_allele_count_normal": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.genotyping_base_error_rate": "Float? (optional)",
+  "CNVSomaticPairWorkflow.DenoiseReadCountsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.funcotator_cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_is_removing_untared_datasources": "Boolean? (optional)",
+  "CNVSomaticPairWorkflow.emergency_extra_disk": "Int? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.CallCopyRatioSegmentsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsNormal.normal_allelic_counts": "File? (optional)",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_excluded_fields": "Array[String]? (optional)",
+  "CNVSomaticPairWorkflow.point_size_copy_ratio": "Float? (optional)",
+  "CNVSomaticPairWorkflow.ref_fasta_fai": "File",
+  "CNVSomaticPairWorkflow.CollectCountsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.kernel_approximation_dimension": "Int? (optional)",
+  "CNVSomaticPairWorkflow.outlier_neutral_segment_copy_ratio_z_score_threshold": "Float? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_annotation_overrides": "Array[String]? (optional)",
+  "CNVSomaticPairWorkflow.kernel_variance_copy_ratio": "Float? (optional)",
+  "CNVSomaticPairWorkflow.CallCopyRatioSegmentsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.funcotator_disk_space_gb": "Int? (optional)",
+  "CNVSomaticPairWorkflow.additional_args_for_oncotator": "String? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_transcript_selection_list": "File? (optional)",
+  "CNVSomaticPairWorkflow.DenoiseReadCountsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.mem_gb_for_model_segments": "Int? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_plotting": "Int? (optional)",
+  "CNVSomaticPairWorkflow.min_total_allele_count": "Int? (optional)",
+  "CNVSomaticPairWorkflow.point_size_allele_fraction": "Float? (optional)",
+  "CNVSomaticPairWorkflow.ref_fasta": "File",
+  "CNVSomaticPairWorkflow.ModelSegmentsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.num_burn_in_allele_fraction": "Int? (optional)",
+  "CNVSomaticPairWorkflow.smoothing_threshold_allele_fraction": "Float? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.mem_gb_for_collect_counts": "Int? (optional)",
+  "CNVSomaticPairWorkflow.ref_fasta_dict": "File",
+  "CNVSomaticPairWorkflow.collect_counts_format": "String? (optional)",
+  "CNVSomaticPairWorkflow.minimum_contig_length": "Int? (optional)",
+  "CNVSomaticPairWorkflow.maximum_copy_ratio": "String? (optional)",
+  "CNVSomaticPairWorkflow.is_run_funcotator": "Boolean? (optional)",
+  "CNVSomaticPairWorkflow.smoothing_threshold_copy_ratio": "Float? (optional)",
+  "CNVSomaticPairWorkflow.ModelSegmentsNormal.min_total_allele_count_normal": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.tumor_bam": "File",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.normal_bam_idx": "File? (optional)",
+  "CNVSomaticPairWorkflow.CollectAllelicCountsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.neutral_segment_copy_ratio_lower_bound": "Float? (optional)",
+  "CNVSomaticPairWorkflow.num_samples_allele_fraction": "Int? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_annotation_defaults": "Array[String]? (optional)",
+  "CNVSomaticPairWorkflow.max_num_segments_per_chromosome": "Int? (optional)",
+  "CNVSomaticPairWorkflow.blacklist_intervals": "File? (optional)",
+  "CNVSomaticPairWorkflow.CallCopyRatioSegmentsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.kernel_scaling_allele_fraction": "Float? (optional)",
+  "CNVSomaticPairWorkflow.CollectCountsNormal.disabled_read_filters": "Array[String]? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_use_ssd": "Boolean? (optional)",
+  "CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.mem_gb_for_preprocess_intervals": "Int? (optional)",
+  "CNVSomaticPairWorkflow.genotyping_homozygous_log_ratio_threshold": "Float? (optional)",
+  "CNVSomaticPairWorkflow.CollectAllelicCountsNormal.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.max_num_smoothing_iterations": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PreprocessIntervals.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.output_dir": "String? (optional)",
+  "CNVSomaticPairWorkflow.funcotator_transcript_selection_mode": "String? (optional)",
+  "CNVSomaticPairWorkflow.is_run_oncotator": "Boolean? (optional)",
+  "CNVSomaticPairWorkflow.padding": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PreprocessIntervals.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.mem_gb_for_collect_allelic_counts": "Int? (optional)",
+  "CNVSomaticPairWorkflow.normal_bam": "File? (optional)",
+  "CNVSomaticPairWorkflow.gcs_project_for_requester_pays": "String? (optional)",
+  "CNVSomaticPairWorkflow.CollectCountsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.CollectAllelicCountsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.num_burn_in_copy_ratio": "Int? (optional)",
+  "CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.DenoiseReadCountsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.CollectAllelicCountsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.CNVFuncotateSegmentsWorkflow.interval_list": "File? (optional)",
+  "CNVSomaticPairWorkflow.intervals": "File",
+  "CNVSomaticPairWorkflow.funcotator_data_sources_tar_gz": "File? (optional)",
+  "CNVSomaticPairWorkflow.gatk4_jar_override": "File? (optional)",
+  "CNVSomaticPairWorkflow.CollectCountsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.CallCopyRatioSegmentsNormal.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.DenoiseReadCountsTumor.cpu": "Int? (optional)",
+  "CNVSomaticPairWorkflow.number_of_eigensamples": "Int? (optional)",
+  "CNVSomaticPairWorkflow.CollectCountsTumor.disabled_read_filters": "Array[String]? (optional)",
+  "CNVSomaticPairWorkflow.window_sizes": "Array[Int]+? (optional, default = [8, 16, 32, 64, 128, 256])",
+  "CNVSomaticPairWorkflow.num_samples_copy_ratio": "Int? (optional)",
+  "CNVSomaticPairWorkflow.read_count_pon": "File",
+  "CNVSomaticPairWorkflow.ModelSegmentsTumor.use_ssd": "Boolean (optional, default = false)",
+  "CNVSomaticPairWorkflow.boot_disk_space_gb_for_oncotator": "Int? (optional)",
+  "CNVSomaticPairWorkflow.bin_length": "Int? (optional)",
+  "CNVSomaticPairWorkflow.additional_args_for_funcotator": "String? (optional)",
+  "CNVSomaticPairWorkflow.kernel_variance_allele_fraction": "Float? (optional)"
+}
+
diff --git a/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.wdl b/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.wdl
similarity index 57%
rename from GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.wdl
rename to GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.wdl
index bf91289..9ee2aa9 100644
--- a/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNV.wdl
+++ b/GATK_CNV_Mutect2/GATK4_CNV/GATK4_CNVSomaticPairWorkflow.wdl
@@ -1,106 +1,178 @@
+# Workflow for running the GATK CNV pipeline on a matched pair. Supports both WGS and WES.
+#
+# Notes:
+#
+# - The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the
+#   GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
+#   These intervals will be padded on both sides by the amount specified by padding (default 250)
+#   and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning,
+#   e.g., for WES).  For WGS, the intervals should simply cover the autosomal chromosomes (sex chromosomes may be
+#   included, but care should be taken to 1) avoid creating panels of mixed sex, and 2) denoise case samples only
+#   with panels containing only individuals of the same sex as the case samples).
+#
+# - Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals
+#   argument, which accepts formats compatible with the GATK -XL argument
+#   (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
+#   This may be useful for excluding centromeric regions, etc. from analysis.  Alternatively, these regions may
+#   be manually filtered from the final callset.
+#
+#  A reasonable blacklist for excluded intervals (-XL) can be found at:
+#   hg19: gs://gatk-best-practices/somatic-b37/CNV_and_centromere_blacklist.hg19.list
+#   hg38: gs://gatk-best-practices/somatic-hg38/CNV_and_centromere_blacklist.hg38liftover.list (untested)
+#
+# - The sites file (common_sites) should be a Picard or GATK-style interval list.  This is a list of sites
+#   of known variation at which allelic counts will be collected for use in modeling minor-allele fractions.
+#
+# - If you opt to run FuncotateSegments (i.e. set `is_run_funcotator` to `true`), then please also ensure that you have
+#       the correct value for `funcotator_ref_version`.  Treat `funcotator_ref_version` as required if
+#       `is_run_funcotator` is `true`.  Valid values for `funcotator_ref_version` are `hg38` and `hg19`.
+#       The latter includes GRCh37.
+#
+#
+# - Example invocation:
+#
+#       java -jar cromwell.jar run cnv_somatic_pair_workflow.wdl -i my_parameters.json
+#
+#############
+
+version 1.0
+
+import "https://raw.githubusercontent.com/broadinstitute/gatk/4.2.0.0/scripts/cnv_wdl/cnv_common_tasks.wdl" as CNVTasks
+import "https://raw.githubusercontent.com/broadinstitute/gatk/4.2.0.0/scripts/cnv_wdl/somatic/cnv_somatic_oncotator_workflow.wdl" as CNVOncotator
+import "https://raw.githubusercontent.com/broadinstitute/gatk/4.2.0.0/scripts/cnv_wdl/somatic/cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments
+
 workflow CNVSomaticPairWorkflow {
 
-    ##################################
-    #### required basic arguments ####
-    ##################################
-    File common_sites
-    File intervals
-    File? blacklist_intervals
-    File tumor_bam
-    File tumor_bam_idx
-    File? normal_bam
-    File? normal_bam_idx
-    File? read_count_pon
-    File ref_fasta_dict
-    File ref_fasta_fai
-    File ref_fasta
-    String gatk_docker
-
-    ##################################
-    #### optional basic arguments ####
-    ##################################
-     # For running oncotator
-    Boolean? is_run_oncotator
-    File? gatk4_jar_override
-    Int? preemptible_attempts
-    # Use as a last resort to increase the disk given to every task in case of ill behaving data
-    Int? emergency_extra_disk
-
-    ####################################################
-    #### optional arguments for PreprocessIntervals ####
-    ####################################################
-    Int? padding
-    Int? bin_length
-    Int? mem_gb_for_preprocess_intervals
-
-    ##############################################
-    #### optional arguments for CollectCounts ####
-    ##############################################
-    String? collect_counts_format
-    Int? mem_gb_for_collect_counts
-
-    #####################################################
-    #### optional arguments for CollectAllelicCounts ####
-    #####################################################
-    String? minimum_base_quality
-    Int? mem_gb_for_collect_allelic_counts
-
-    ##################################################
-    #### optional arguments for DenoiseReadCounts ####
-    ##################################################
-    Int? number_of_eigensamples
-    Int? mem_gb_for_denoise_read_counts
-
-    ##############################################
-    #### optional arguments for ModelSegments ####
-    ##############################################
-    Int? max_num_segments_per_chromosome
-    Int? min_total_allele_count
-    Int? min_total_allele_count_normal
-    Float? genotyping_homozygous_log_ratio_threshold
-    Float? genotyping_base_error_rate
-    Float? kernel_variance_copy_ratio
-    Float? kernel_variance_allele_fraction
-    Float? kernel_scaling_allele_fraction
-    Int? kernel_approximation_dimension
-    Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
-    Float? num_changepoints_penalty_factor
-    Float? minor_allele_fraction_prior_alpha
-    Int? num_samples_copy_ratio
-    Int? num_burn_in_copy_ratio
-    Int? num_samples_allele_fraction
-    Int? num_burn_in_allele_fraction
-    Float? smoothing_threshold_copy_ratio
-    Float? smoothing_threshold_allele_fraction
-    Int? max_num_smoothing_iterations
-    Int? num_smoothing_iterations_per_fit
-    Int? mem_gb_for_model_segments
-
-    ######################################################
-    #### optional arguments for CallCopyRatioSegments ####
-    ######################################################
-    Float? neutral_segment_copy_ratio_lower_bound
-    Float? neutral_segment_copy_ratio_upper_bound
-    Float? outlier_neutral_segment_copy_ratio_z_score_threshold
-    Float? calling_copy_ratio_z_score_threshold
-    Int? mem_gb_for_call_copy_ratio_segments
-
-    #########################################
-    #### optional arguments for plotting ####
-    #########################################
-    Int? minimum_contig_length
-    Int? mem_gb_for_plotting
-
-    ##########################################
-    #### optional arguments for Oncotator ####
-    ##########################################
-    String? additional_args_for_oncotator
-    String? oncotator_docker
-    Int? mem_gb_for_oncotator
-    Int? boot_disk_space_gb_for_oncotator
+    input {
+      ##################################
+      #### required basic arguments ####
+      ##################################
+      File common_sites
+      File intervals
+      File? blacklist_intervals
+      File tumor_bam
+      File tumor_bam_idx
+      File? normal_bam
+      File? normal_bam_idx
+      File read_count_pon
+      File ref_fasta_dict
+      File ref_fasta_fai
+      File ref_fasta
+      String gatk_docker
+
+      ##################################
+      #### optional basic arguments ####
+      ##################################
+       # For running oncotator
+      Boolean? is_run_oncotator
+       # For running funcotator
+      Boolean? is_run_funcotator
+
+      File? gatk4_jar_override
+      Int? preemptible_attempts
+      # Use as a last resort to increase the disk given to every task in case of ill behaving data
+      Int? emergency_extra_disk
+
+      # Required if BAM/CRAM is in a requester pays bucket
+      String? gcs_project_for_requester_pays
+
+      ####################################################
+      #### optional arguments for PreprocessIntervals ####
+      ####################################################
+      Int? padding
+      Int? bin_length
+      Int? mem_gb_for_preprocess_intervals
+
+      ##############################################
+      #### optional arguments for CollectCounts ####
+      ##############################################
+      String? collect_counts_format
+      Int? mem_gb_for_collect_counts
+
+      #####################################################
+      #### optional arguments for CollectAllelicCounts ####
+      #####################################################
+      String? minimum_base_quality
+      Int? mem_gb_for_collect_allelic_counts
+
+      ##################################################
+      #### optional arguments for DenoiseReadCounts ####
+      ##################################################
+      Int? number_of_eigensamples
+      Int? mem_gb_for_denoise_read_counts
+
+      ##############################################
+      #### optional arguments for ModelSegments ####
+      ##############################################
+      Int? max_num_segments_per_chromosome
+      Int? min_total_allele_count
+      Int? min_total_allele_count_normal
+      Float? genotyping_homozygous_log_ratio_threshold
+      Float? genotyping_base_error_rate
+      Float? kernel_variance_copy_ratio
+      Float? kernel_variance_allele_fraction
+      Float? kernel_scaling_allele_fraction
+      Int? kernel_approximation_dimension
+      Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
+      Float? num_changepoints_penalty_factor
+      Float? minor_allele_fraction_prior_alpha
+      Int? num_samples_copy_ratio
+      Int? num_burn_in_copy_ratio
+      Int? num_samples_allele_fraction
+      Int? num_burn_in_allele_fraction
+      Float? smoothing_threshold_copy_ratio
+      Float? smoothing_threshold_allele_fraction
+      Int? max_num_smoothing_iterations
+      Int? num_smoothing_iterations_per_fit
+      Int? mem_gb_for_model_segments
+
+      ######################################################
+      #### optional arguments for CallCopyRatioSegments ####
+      ######################################################
+      Float? neutral_segment_copy_ratio_lower_bound
+      Float? neutral_segment_copy_ratio_upper_bound
+      Float? outlier_neutral_segment_copy_ratio_z_score_threshold
+      Float? calling_copy_ratio_z_score_threshold
+      Int? mem_gb_for_call_copy_ratio_segments
+
+      #########################################
+      #### optional arguments for plotting ####
+      #########################################
+      Int? minimum_contig_length
+      # If maximum_copy_ratio = Infinity, the maximum copy ratio will be automatically determined
+      String? maximum_copy_ratio
+      Float? point_size_copy_ratio
+      Float? point_size_allele_fraction
+      Int? mem_gb_for_plotting
+
+      ##########################################
+      #### optional arguments for Oncotator ####
+      ##########################################
+      String? additional_args_for_oncotator
+      String? oncotator_docker
+      Int? mem_gb_for_oncotator
+      Int? boot_disk_space_gb_for_oncotator
+
+      ##################################################
+      #### optional arguments for FuncotateSegments ####
+      ##################################################
+      String? additional_args_for_funcotator
+      String? funcotator_ref_version
+      Int? mem_gb_for_funcotator
+      File? funcotator_transcript_selection_list
+      File? funcotator_data_sources_tar_gz
+      String? funcotator_transcript_selection_mode
+      Array[String]? funcotator_annotation_defaults
+      Array[String]? funcotator_annotation_overrides
+      Array[String]? funcotator_excluded_fields
+      Boolean? funcotator_is_removing_untared_datasources
+      Int? funcotator_disk_space_gb
+      Boolean? funcotator_use_ssd
+      Int? funcotator_cpu
+    }
 
     Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_dict, "GB") + size(ref_fasta_fai, "GB"))
-    Int read_count_pon_size = if defined(read_count_pon) then ceil(size(read_count_pon, "GB")) else 0
+    Int read_count_pon_size = ceil(size(read_count_pon, "GB"))
     Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bam_idx, "GB"))
     Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) else 0
 
@@ -112,7 +184,7 @@ workflow CNVSomaticPairWorkflow {
     File final_normal_bam_idx = select_first([normal_bam_idx, "null"])
 
     Int preprocess_intervals_disk = ref_size + disk_pad
-    call PreprocessIntervals {
+    call CNVTasks.PreprocessIntervals {
         input:
             intervals = intervals,
             blacklist_intervals = blacklist_intervals,
@@ -129,7 +201,7 @@ workflow CNVSomaticPairWorkflow {
     }
 
     Int collect_counts_tumor_disk = tumor_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
-    call CollectCounts as CollectCountsTumor {
+    call CNVTasks.CollectCounts as CollectCountsTumor {
         input:
             intervals = PreprocessIntervals.preprocessed_intervals,
             bam = tumor_bam,
@@ -138,15 +210,17 @@ workflow CNVSomaticPairWorkflow {
             ref_fasta_fai = ref_fasta_fai,
             ref_fasta_dict = ref_fasta_dict,
             format = collect_counts_format,
+            enable_indexing = false,
             gatk4_jar_override = gatk4_jar_override,
             gatk_docker = gatk_docker,
             mem_gb = mem_gb_for_collect_counts,
             disk_space_gb = collect_counts_tumor_disk,
-            preemptible_attempts = preemptible_attempts
+            preemptible_attempts = preemptible_attempts,
+            gcs_project_for_requester_pays = gcs_project_for_requester_pays
     }
 
     Int collect_allelic_counts_tumor_disk = tumor_bam_size + ref_size + disk_pad
-    call CollectAllelicCounts as CollectAllelicCountsTumor {
+    call CNVTasks.CollectAllelicCounts as CollectAllelicCountsTumor {
         input:
             common_sites = common_sites,
             bam = tumor_bam,
@@ -159,7 +233,8 @@ workflow CNVSomaticPairWorkflow {
             gatk_docker = gatk_docker,
             mem_gb = mem_gb_for_collect_allelic_counts,
             disk_space_gb = collect_allelic_counts_tumor_disk,
-            preemptible_attempts = preemptible_attempts
+            preemptible_attempts = preemptible_attempts,
+            gcs_project_for_requester_pays = gcs_project_for_requester_pays
     }
 
     Int denoise_read_counts_tumor_disk = read_count_pon_size + ceil(size(CollectCountsTumor.counts, "GB")) + disk_pad
@@ -236,6 +311,8 @@ workflow CNVSomaticPairWorkflow {
             denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
             ref_fasta_dict = ref_fasta_dict,
             minimum_contig_length = minimum_contig_length,
+            maximum_copy_ratio = maximum_copy_ratio,
+            point_size_copy_ratio = point_size_copy_ratio,
             gatk4_jar_override = gatk4_jar_override,
             gatk_docker = gatk_docker,
             mem_gb = mem_gb_for_plotting,
@@ -251,6 +328,9 @@ workflow CNVSomaticPairWorkflow {
             modeled_segments = ModelSegmentsTumor.modeled_segments,
             ref_fasta_dict = ref_fasta_dict,
             minimum_contig_length = minimum_contig_length,
+            maximum_copy_ratio = maximum_copy_ratio,
+            point_size_copy_ratio = point_size_copy_ratio,
+            point_size_allele_fraction = point_size_allele_fraction,
             gatk4_jar_override = gatk4_jar_override,
             gatk_docker = gatk_docker,
             mem_gb = mem_gb_for_plotting,
@@ -260,7 +340,7 @@ workflow CNVSomaticPairWorkflow {
 
     Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
     if (defined(normal_bam)) {
-        call CollectCounts as CollectCountsNormal {
+        call CNVTasks.CollectCounts as CollectCountsNormal {
             input:
                 intervals = PreprocessIntervals.preprocessed_intervals,
                 bam = final_normal_bam,
@@ -269,15 +349,17 @@ workflow CNVSomaticPairWorkflow {
                 ref_fasta_fai = ref_fasta_fai,
                 ref_fasta_dict = ref_fasta_dict,
                 format = collect_counts_format,
+                enable_indexing = false,
                 gatk4_jar_override = gatk4_jar_override,
                 gatk_docker = gatk_docker,
                 mem_gb = mem_gb_for_collect_counts,
                 disk_space_gb = collect_counts_normal_disk,
-                preemptible_attempts = preemptible_attempts
+                preemptible_attempts = preemptible_attempts,
+                gcs_project_for_requester_pays = gcs_project_for_requester_pays
         }
 
         Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad
-        call CollectAllelicCounts as CollectAllelicCountsNormal {
+        call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal {
             input:
                 common_sites = common_sites,
                 bam = final_normal_bam,
@@ -290,7 +372,8 @@ workflow CNVSomaticPairWorkflow {
                 gatk_docker = gatk_docker,
                 mem_gb = mem_gb_for_collect_allelic_counts,
                 disk_space_gb = collect_allelic_counts_normal_disk,
-                preemptible_attempts = preemptible_attempts
+                preemptible_attempts = preemptible_attempts,
+                gcs_project_for_requester_pays = gcs_project_for_requester_pays
         }
 
         Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad
@@ -364,6 +447,8 @@ workflow CNVSomaticPairWorkflow {
                 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
                 ref_fasta_dict = ref_fasta_dict,
                 minimum_contig_length = minimum_contig_length,
+                maximum_copy_ratio = maximum_copy_ratio,
+                point_size_copy_ratio = point_size_copy_ratio,
                 gatk4_jar_override = gatk4_jar_override,
                 gatk_docker = gatk_docker,
                 mem_gb = mem_gb_for_plotting,
@@ -379,6 +464,9 @@ workflow CNVSomaticPairWorkflow {
                 modeled_segments = ModelSegmentsNormal.modeled_segments,
                 ref_fasta_dict = ref_fasta_dict,
                 minimum_contig_length = minimum_contig_length,
+                maximum_copy_ratio = maximum_copy_ratio,
+                point_size_copy_ratio = point_size_copy_ratio,
+                point_size_allele_fraction = point_size_allele_fraction,
                 gatk4_jar_override = gatk4_jar_override,
                 gatk_docker = gatk_docker,
                 mem_gb = mem_gb_for_plotting,
@@ -388,16 +476,41 @@ workflow CNVSomaticPairWorkflow {
     }
 
     if (select_first([is_run_oncotator, false])) {
-        call OncotateSegments {
+        call CNVOncotator.CNVOncotatorWorkflow as CNVOncotatorWorkflow {
             input:
                  called_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
                  additional_args = additional_args_for_oncotator,
                  oncotator_docker = oncotator_docker,
-                 mem_gb = mem_gb_for_oncotator,
-                 boot_disk_space_gb = boot_disk_space_gb_for_oncotator,
+                 mem_gb_for_oncotator = mem_gb_for_oncotator,
+                 boot_disk_space_gb_for_oncotator = boot_disk_space_gb_for_oncotator,
                  preemptible_attempts = preemptible_attempts
         }
     }
+    if (select_first([is_run_funcotator, false])) {
+        call CNVFuncotateSegments.CNVFuncotateSegmentsWorkflow as CNVFuncotateSegmentsWorkflow {
+            input:
+                 input_seg_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
+                 funcotator_ref_version = select_first([funcotator_ref_version, "hg19"]),
+                 extra_args = additional_args_for_funcotator,
+                 ref_fasta = ref_fasta,
+                 ref_fasta_fai = ref_fasta_fai,
+                 ref_fasta_dict = ref_fasta_dict,
+                 transcript_selection_list = funcotator_transcript_selection_list,
+                 funcotator_data_sources_tar_gz = funcotator_data_sources_tar_gz,
+                 gatk4_jar_override = gatk4_jar_override,
+                 gatk_docker = gatk_docker,
+                 mem_gb = mem_gb_for_funcotator,
+                 preemptible_attempts = preemptible_attempts,
+                 transcript_selection_mode = funcotator_transcript_selection_mode,
+                 annotation_defaults = funcotator_annotation_defaults,
+                 annotation_overrides = funcotator_annotation_overrides,
+                 funcotator_excluded_fields = funcotator_excluded_fields,
+                 is_removing_untared_datasources = funcotator_is_removing_untared_datasources,
+                 disk_space_gb = funcotator_disk_space_gb,
+                 use_ssd = funcotator_use_ssd,
+                 cpu = funcotator_cpu
+        }
+    }
 
     output {
         File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
@@ -421,11 +534,7 @@ workflow CNVSomaticPairWorkflow {
         File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters
         File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments
         File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments
-        Int total_segments = CallCopyRatioSegmentsTumor.total_segments
-        Int amplification = CallCopyRatioSegmentsTumor.amplification
-        Int deletion = CallCopyRatioSegmentsTumor.deletion
         File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot
-        File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot
         File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD
         Float standardized_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD_value
         File denoised_MAD_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD
@@ -456,7 +565,6 @@ workflow CNVSomaticPairWorkflow {
         File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments
         File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments
         File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot
-        File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot
         File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD
         Float? standardized_MAD_value_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD_value
         File? denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD
@@ -467,204 +575,47 @@ workflow CNVSomaticPairWorkflow {
         Float? scaled_delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD_value
         File? modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot
 
-        File oncotated_called_file_tumor = select_first([OncotateSegments.oncotated_called_file, "null"])
-        File oncotated_called_gene_list_file_tumor = select_first([OncotateSegments.oncotated_called_gene_list_file, "null"])
-    }
-}
-
-
-task PreprocessIntervals {
-    File? intervals
-    File? blacklist_intervals
-    File ref_fasta
-    File ref_fasta_fai
-    File ref_fasta_dict
-    Int? padding
-    Int? bin_length
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
-
-    Int machine_mem_mb = select_first([mem_gb, 2]) * 1000
-    Int command_mem_mb = machine_mem_mb - 500
-
-    # Determine output filename
-    String filename = select_first([intervals, "wgs"])
-    String base_filename = basename(filename, ".interval_list")
-
-    command <<<
-        set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" PreprocessIntervals \
-            ${"-L " + intervals} \
-            ${"-XL " + blacklist_intervals} \
-            --sequence-dictionary ${ref_fasta_dict} \
-            --reference ${ref_fasta} \
-            --padding ${default="250" padding} \
-            --bin-length ${default="1000" bin_length} \
-            --interval-merging-rule OVERLAPPING_ONLY \
-            --output ${base_filename}.preprocessed.interval_list
-    >>>
-
-    runtime {
-        docker: "${gatk_docker}"
-        memory: machine_mem_mb + " MB"
-        disks: "local-disk " + select_first([disk_space_gb, 40]) + if use_ssd then " SSD" else " HDD"
-        cpu: select_first([cpu, 1])
-        preemptible: select_first([preemptible_attempts, 5])
-    }
-
-    output {
-        File preprocessed_intervals = "${base_filename}.preprocessed.interval_list"
-    }
-}
-
-
-task CollectCounts {
-    File intervals
-    File bam
-    File bam_idx
-    File ref_fasta
-    File ref_fasta_fai
-    File ref_fasta_dict
-    String? format
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
-
-    Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
-    Int command_mem_mb = machine_mem_mb - 1000
-
-    # Sample name is derived from the bam filename
-    String base_filename = basename(bam, ".bam")
-    String counts_filename = if !defined(format) then "${base_filename}.counts.hdf5" else "${base_filename}.counts.tsv"
-
-    command <<<
-        set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" CollectReadCounts \
-            -L ${intervals} \
-            --input ${bam} \
-            --reference ${ref_fasta} \
-            --format ${default="HDF5" format} \
-            --interval-merging-rule OVERLAPPING_ONLY \
-            --output ${counts_filename}
-    >>>
-
-    runtime {
-        docker: "${gatk_docker}"
-        memory: machine_mem_mb + " MB"
-        disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + if use_ssd then " SSD" else " HDD"
-        cpu: select_first([cpu, 1])
-        preemptible: select_first([preemptible_attempts, 5])
-    }
-
-    output {
-        String entity_id = base_filename
-        File counts = counts_filename
-    }
-}
-
-
-task CollectAllelicCounts {
-    File common_sites
-    File bam
-    File bam_idx
-    File ref_fasta
-    File ref_fasta_fai
-    File ref_fasta_dict
-    Int? minimum_base_quality
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
-
-    Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
-    Int command_mem_mb = machine_mem_mb - 1000
-
-    # Sample name is derived from the bam filename
-    String base_filename = basename(bam, ".bam")
-
-    String allelic_counts_filename = "${base_filename}.allelicCounts.tsv"
-
-    command <<<
-        set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" CollectAllelicCounts \
-            -L ${common_sites} \
-            --input ${bam} \
-            --reference ${ref_fasta} \
-            --minimum-base-quality ${default="20" minimum_base_quality} \
-            --output ${allelic_counts_filename}
-    >>>
-
-    runtime {
-        docker: "${gatk_docker}"
-        memory: machine_mem_mb + " MB"
-        disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + if use_ssd then " SSD" else " HDD"
-        cpu: select_first([cpu, 1])
-        preemptible: select_first([preemptible_attempts, 5])
-    }
-
-    output {
-        String entity_id = base_filename
-        File allelic_counts = allelic_counts_filename
+        File oncotated_called_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_file, "null"])
+        File oncotated_called_gene_list_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_gene_list_file, "null"])
+        File funcotated_called_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_seg_simple_tsv, "null"])
+        File funcotated_called_gene_list_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_gene_list_tsv, "null"])
     }
 }
 
-
 task DenoiseReadCounts {
-    String entity_id
-    File read_counts
-    File? read_count_pon
-    Int? number_of_eigensamples #use all eigensamples in panel by default
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
+    input {
+      String entity_id
+      File read_counts
+      File read_count_pon
+      Int? number_of_eigensamples #use all eigensamples in panel by default
+      File? gatk4_jar_override
+
+      # Runtime parameters
+      String gatk_docker
+      Int? mem_gb
+      Int? disk_space_gb
+      Boolean use_ssd = false
+      Int? cpu
+      Int? preemptible_attempts
+    }
 
     Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
     Int command_mem_mb = machine_mem_mb - 1000
 
     command <<<
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" DenoiseReadCounts \
-            --input ${read_counts} \
-            ${"--count-panel-of-normals " + read_count_pon} \
-            ${"--number-of-eigensamples " + number_of_eigensamples} \
-            --standardized-copy-ratios ${entity_id}.standardizedCR.tsv \
-            --denoised-copy-ratios ${entity_id}.denoisedCR.tsv
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}
+
+        gatk --java-options "-Xmx~{command_mem_mb}m" DenoiseReadCounts \
+            --input ~{read_counts} \
+            --count-panel-of-normals ~{read_count_pon} \
+            ~{"--number-of-eigensamples " + number_of_eigensamples} \
+            --standardized-copy-ratios ~{entity_id}.standardizedCR.tsv \
+            --denoised-copy-ratios ~{entity_id}.denoisedCR.tsv
     >>>
 
     runtime {
-        docker: "${gatk_docker}"
+        docker: "~{gatk_docker}"
         memory: machine_mem_mb + " MB"
         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
         cpu: select_first([cpu, 1])
@@ -672,46 +623,48 @@ task DenoiseReadCounts {
     }
 
     output {
-        File standardized_copy_ratios = "${entity_id}.standardizedCR.tsv"
-        File denoised_copy_ratios = "${entity_id}.denoisedCR.tsv"
+        File standardized_copy_ratios = "~{entity_id}.standardizedCR.tsv"
+        File denoised_copy_ratios = "~{entity_id}.denoisedCR.tsv"
     }
 }
 
 task ModelSegments {
-    String entity_id
-    File denoised_copy_ratios
-    File allelic_counts
-    File? normal_allelic_counts
-    Int? max_num_segments_per_chromosome
-    Int? min_total_allele_count
-    Int? min_total_allele_count_normal
-    Float? genotyping_homozygous_log_ratio_threshold
-    Float? genotyping_base_error_rate
-    Float? kernel_variance_copy_ratio
-    Float? kernel_variance_allele_fraction
-    Float? kernel_scaling_allele_fraction
-    Int? kernel_approximation_dimension
-    Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
-    Float? num_changepoints_penalty_factor
-    Float? minor_allele_fraction_prior_alpha
-    Int? num_samples_copy_ratio
-    Int? num_burn_in_copy_ratio
-    Int? num_samples_allele_fraction
-    Int? num_burn_in_allele_fraction
-    Float? smoothing_threshold_copy_ratio
-    Float? smoothing_threshold_allele_fraction
-    Int? max_num_smoothing_iterations
-    Int? num_smoothing_iterations_per_fit
-    String? output_dir
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
+    input {
+      String entity_id
+      File denoised_copy_ratios
+      File allelic_counts
+      File? normal_allelic_counts
+      Int? max_num_segments_per_chromosome
+      Int? min_total_allele_count
+      Int? min_total_allele_count_normal
+      Float? genotyping_homozygous_log_ratio_threshold
+      Float? genotyping_base_error_rate
+      Float? kernel_variance_copy_ratio
+      Float? kernel_variance_allele_fraction
+      Float? kernel_scaling_allele_fraction
+      Int? kernel_approximation_dimension
+      Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
+      Float? num_changepoints_penalty_factor
+      Float? minor_allele_fraction_prior_alpha
+      Int? num_samples_copy_ratio
+      Int? num_burn_in_copy_ratio
+      Int? num_samples_allele_fraction
+      Int? num_burn_in_allele_fraction
+      Float? smoothing_threshold_copy_ratio
+      Float? smoothing_threshold_allele_fraction
+      Int? max_num_smoothing_iterations
+      Int? num_smoothing_iterations_per_fit
+      String? output_dir
+      File? gatk4_jar_override
+
+      # Runtime parameters
+      String gatk_docker
+      Int? mem_gb
+      Int? disk_space_gb
+      Boolean use_ssd = false
+      Int? cpu
+      Int? preemptible_attempts
+    }
 
     Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
     # ModelSegments seems to need at least 3GB of overhead to run
@@ -727,42 +680,42 @@ task ModelSegments {
 
     command <<<
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" ModelSegments \
-            --denoised-copy-ratios ${denoised_copy_ratios} \
-            --allelic-counts ${allelic_counts} \
-            ${"--normal-allelic-counts " + normal_allelic_counts} \
-            --minimum-total-allele-count-case ${min_total_allele_count_} \
-            --minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
-            --genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
-            --genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
-            --maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
-            --kernel-variance-copy-ratio ${default="0.0" kernel_variance_copy_ratio} \
-            --kernel-variance-allele-fraction ${default="0.025" kernel_variance_allele_fraction} \
-            --kernel-scaling-allele-fraction ${default="1.0" kernel_scaling_allele_fraction} \
-            --kernel-approximation-dimension ${default="100" kernel_approximation_dimension} \
-            --window-size ${sep=" --window-size " window_sizes} \
-            --number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
-            --minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
-            --number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
-            --number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
-            --number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
-            --number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
-            --smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
-            --smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
-            --maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
-            --number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
-            --output ${output_dir_} \
-            --output-prefix ${entity_id}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}
+
+        gatk --java-options "-Xmx~{command_mem_mb}m" ModelSegments \
+            --denoised-copy-ratios ~{denoised_copy_ratios} \
+            --allelic-counts ~{allelic_counts} \
+            ~{"--normal-allelic-counts " + normal_allelic_counts} \
+            --minimum-total-allele-count-case ~{min_total_allele_count_} \
+            --minimum-total-allele-count-normal ~{default="30" min_total_allele_count_normal} \
+            --genotyping-homozygous-log-ratio-threshold ~{default="-10.0" genotyping_homozygous_log_ratio_threshold} \
+            --genotyping-base-error-rate ~{default="0.05" genotyping_base_error_rate} \
+            --maximum-number-of-segments-per-chromosome ~{default="1000" max_num_segments_per_chromosome} \
+            --kernel-variance-copy-ratio ~{default="0.0" kernel_variance_copy_ratio} \
+            --kernel-variance-allele-fraction ~{default="0.025" kernel_variance_allele_fraction} \
+            --kernel-scaling-allele-fraction ~{default="1.0" kernel_scaling_allele_fraction} \
+            --kernel-approximation-dimension ~{default="100" kernel_approximation_dimension} \
+            --window-size ~{sep=" --window-size " window_sizes} \
+            --number-of-changepoints-penalty-factor ~{default="1.0" num_changepoints_penalty_factor} \
+            --minor-allele-fraction-prior-alpha ~{default="25.0" minor_allele_fraction_prior_alpha} \
+            --number-of-samples-copy-ratio ~{default="100" num_samples_copy_ratio} \
+            --number-of-burn-in-samples-copy-ratio ~{default="50" num_burn_in_copy_ratio} \
+            --number-of-samples-allele-fraction ~{default="100" num_samples_allele_fraction} \
+            --number-of-burn-in-samples-allele-fraction ~{default="50" num_burn_in_allele_fraction} \
+            --smoothing-credible-interval-threshold-copy-ratio ~{default="2.0" smoothing_threshold_copy_ratio} \
+            --smoothing-credible-interval-threshold-allele-fraction ~{default="2.0" smoothing_threshold_allele_fraction} \
+            --maximum-number-of-smoothing-iterations ~{default="10" max_num_smoothing_iterations} \
+            --number-of-smoothing-iterations-per-fit ~{default="0" num_smoothing_iterations_per_fit} \
+            --output ~{output_dir_} \
+            --output-prefix ~{entity_id}
 
         # We need to create the file even if the above command doesn't so we have something to delocalize
         # If no file is created by the above task then it will copy out an empty file
-        touch ${output_dir_}/${entity_id}.hets.normal.tsv
+        touch ~{output_dir_}/~{entity_id}.hets.normal.tsv
     >>>
 
     runtime {
-        docker: "${gatk_docker}"
+        docker: "~{gatk_docker}"
         memory: machine_mem_mb + " MB"
         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
         cpu: select_first([cpu, 1])
@@ -770,59 +723,57 @@ task ModelSegments {
     }
 
     output {
-        File het_allelic_counts = "${output_dir_}/${entity_id}.hets.tsv"
-        File normal_het_allelic_counts = "${output_dir_}/${entity_id}.hets.normal.tsv"
-        File copy_ratio_only_segments = "${output_dir_}/${entity_id}.cr.seg"
-        File copy_ratio_legacy_segments = "${output_dir_}/${entity_id}.cr.igv.seg"
-        File allele_fraction_legacy_segments = "${output_dir_}/${entity_id}.af.igv.seg"
-        File modeled_segments_begin = "${output_dir_}/${entity_id}.modelBegin.seg"
-        File copy_ratio_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.cr.param"
-        File allele_fraction_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.af.param"
-        File modeled_segments = "${output_dir_}/${entity_id}.modelFinal.seg"
-        File copy_ratio_parameters = "${output_dir_}/${entity_id}.modelFinal.cr.param"
-        File allele_fraction_parameters = "${output_dir_}/${entity_id}.modelFinal.af.param"
+        File het_allelic_counts = "~{output_dir_}/~{entity_id}.hets.tsv"
+        File normal_het_allelic_counts = "~{output_dir_}/~{entity_id}.hets.normal.tsv"
+        File copy_ratio_only_segments = "~{output_dir_}/~{entity_id}.cr.seg"
+        File copy_ratio_legacy_segments = "~{output_dir_}/~{entity_id}.cr.igv.seg"
+        File allele_fraction_legacy_segments = "~{output_dir_}/~{entity_id}.af.igv.seg"
+        File modeled_segments_begin = "~{output_dir_}/~{entity_id}.modelBegin.seg"
+        File copy_ratio_parameters_begin = "~{output_dir_}/~{entity_id}.modelBegin.cr.param"
+        File allele_fraction_parameters_begin = "~{output_dir_}/~{entity_id}.modelBegin.af.param"
+        File modeled_segments = "~{output_dir_}/~{entity_id}.modelFinal.seg"
+        File copy_ratio_parameters = "~{output_dir_}/~{entity_id}.modelFinal.cr.param"
+        File allele_fraction_parameters = "~{output_dir_}/~{entity_id}.modelFinal.af.param"
     }
 }
 
 task CallCopyRatioSegments {
-    String entity_id
-    File copy_ratio_segments
-    Float? neutral_segment_copy_ratio_lower_bound
-    Float? neutral_segment_copy_ratio_upper_bound
-    Float? outlier_neutral_segment_copy_ratio_z_score_threshold
-    Float? calling_copy_ratio_z_score_threshold
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
+    input {
+      String entity_id
+      File copy_ratio_segments
+      Float? neutral_segment_copy_ratio_lower_bound
+      Float? neutral_segment_copy_ratio_upper_bound
+      Float? outlier_neutral_segment_copy_ratio_z_score_threshold
+      Float? calling_copy_ratio_z_score_threshold
+      File? gatk4_jar_override
+
+      # Runtime parameters
+      String gatk_docker
+      Int? mem_gb
+      Int? disk_space_gb
+      Boolean use_ssd = false
+      Int? cpu
+      Int? preemptible_attempts
+    }
 
     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
     Int command_mem_mb = machine_mem_mb - 1000
 
     command <<<
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" CallCopyRatioSegments \
-            --input ${copy_ratio_segments} \
-            --neutral-segment-copy-ratio-lower-bound ${default="0.9" neutral_segment_copy_ratio_lower_bound} \
-            --neutral-segment-copy-ratio-upper-bound ${default="1.1" neutral_segment_copy_ratio_upper_bound} \
-            --outlier-neutral-segment-copy-ratio-z-score-threshold ${default="2.0" outlier_neutral_segment_copy_ratio_z_score_threshold} \
-            --calling-copy-ratio-z-score-threshold ${default="2.0" calling_copy_ratio_z_score_threshold} \
-            --output ${entity_id}.called.seg
-
-        grep -v ^@ "${entity_id}.called.seg" | grep -v ^CONTIG | wc -l > total_segs.txt
-        grep -v ^@ "${entity_id}.called.seg" | grep -v ^CONTIG | awk -F "\t" 'BEGIN{sum=0}{if($6=="+"){sum+=1}}END{print sum}' > amp_segs.txt
-        grep -v ^@ "${entity_id}.called.seg" | grep -v ^CONTIG | awk -F "\t" 'BEGIN{sum=0}{if($6=="-"){sum+=1}}END{print sum}' > del_segs.txt
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}
+
+        gatk --java-options "-Xmx~{command_mem_mb}m" CallCopyRatioSegments \
+            --input ~{copy_ratio_segments} \
+            --neutral-segment-copy-ratio-lower-bound ~{default="0.9" neutral_segment_copy_ratio_lower_bound} \
+            --neutral-segment-copy-ratio-upper-bound ~{default="1.1" neutral_segment_copy_ratio_upper_bound} \
+            --outlier-neutral-segment-copy-ratio-z-score-threshold ~{default="2.0" outlier_neutral_segment_copy_ratio_z_score_threshold} \
+            --calling-copy-ratio-z-score-threshold ~{default="2.0" calling_copy_ratio_z_score_threshold} \
+            --output ~{entity_id}.called.seg
     >>>
 
     runtime {
-        docker: "${gatk_docker}"
+        docker: "~{gatk_docker}"
         memory: machine_mem_mb + " MB"
         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
         cpu: select_first([cpu, 1])
@@ -830,30 +781,31 @@ task CallCopyRatioSegments {
     }
 
     output {
-        File called_copy_ratio_segments = "${entity_id}.called.seg"
-        File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg"
-        Int total_segments = read_int("total_segs.txt")
-        Int amplification = read_int("amp_segs.txt")
-        Int deletion = read_int("del_segs.txt")
+        File called_copy_ratio_segments = "~{entity_id}.called.seg"
+        File called_copy_ratio_legacy_segments = "~{entity_id}.called.igv.seg"
     }
 }
 
 task PlotDenoisedCopyRatios {
-    String entity_id
-    File standardized_copy_ratios
-    File denoised_copy_ratios
-    File ref_fasta_dict
-    Int? minimum_contig_length
-    String? output_dir
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
+    input {
+      String entity_id
+      File standardized_copy_ratios
+      File denoised_copy_ratios
+      File ref_fasta_dict
+      Int? minimum_contig_length
+      String? maximum_copy_ratio
+      Float? point_size_copy_ratio
+      String? output_dir
+      File? gatk4_jar_override
+
+      # Runtime parameters
+      String gatk_docker
+      Int? mem_gb
+      Int? disk_space_gb
+      Boolean use_ssd = false
+      Int? cpu
+      Int? preemptible_attempts
+    }
 
     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
     Int command_mem_mb = machine_mem_mb - 1000
@@ -863,19 +815,21 @@ task PlotDenoisedCopyRatios {
 
     command <<<
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" PlotDenoisedCopyRatios \
-            --standardized-copy-ratios ${standardized_copy_ratios} \
-            --denoised-copy-ratios ${denoised_copy_ratios} \
-            --sequence-dictionary ${ref_fasta_dict} \
-            --minimum-contig-length ${default="1000000" minimum_contig_length} \
-            --output ${output_dir_} \
-            --output-prefix ${entity_id}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}
+
+        gatk --java-options "-Xmx~{command_mem_mb}m" PlotDenoisedCopyRatios \
+            --standardized-copy-ratios ~{standardized_copy_ratios} \
+            --denoised-copy-ratios ~{denoised_copy_ratios} \
+            --sequence-dictionary ~{ref_fasta_dict} \
+            --minimum-contig-length ~{default="1000000" minimum_contig_length} \
+            --maximum-copy-ratio ~{default="4.0" maximum_copy_ratio} \
+            --point-size-copy-ratio ~{default="0.2" point_size_copy_ratio} \
+            --output ~{output_dir_} \
+            --output-prefix ~{entity_id}
     >>>
 
     runtime {
-        docker: "${gatk_docker}"
+        docker: "~{gatk_docker}"
         memory: machine_mem_mb + " MB"
         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
         cpu: select_first([cpu, 1])
@@ -883,36 +837,40 @@ task PlotDenoisedCopyRatios {
     }
 
     output {
-        File denoised_copy_ratios_plot = "${output_dir_}/${entity_id}.denoised.png"
-        File denoised_copy_ratios_lim_4_plot = "${output_dir_}/${entity_id}.denoisedLimit4.png"
-        File standardized_MAD = "${output_dir_}/${entity_id}.standardizedMAD.txt"
+        File denoised_copy_ratios_plot = "~{output_dir_}/~{entity_id}.denoised.png"
+        File standardized_MAD = "~{output_dir_}/~{entity_id}.standardizedMAD.txt"
         Float standardized_MAD_value = read_float(standardized_MAD)
-        File denoised_MAD = "${output_dir_}/${entity_id}.denoisedMAD.txt"
+        File denoised_MAD = "~{output_dir_}/~{entity_id}.denoisedMAD.txt"
         Float denoised_MAD_value = read_float(denoised_MAD)
-        File delta_MAD = "${output_dir_}/${entity_id}.deltaMAD.txt"
+        File delta_MAD = "~{output_dir_}/~{entity_id}.deltaMAD.txt"
         Float delta_MAD_value = read_float(delta_MAD)
-        File scaled_delta_MAD = "${output_dir_}/${entity_id}.scaledDeltaMAD.txt"
+        File scaled_delta_MAD = "~{output_dir_}/~{entity_id}.scaledDeltaMAD.txt"
         Float scaled_delta_MAD_value = read_float(scaled_delta_MAD)
     }
 }
 
 task PlotModeledSegments {
-    String entity_id
-    File denoised_copy_ratios
-    File het_allelic_counts
-    File modeled_segments
-    File ref_fasta_dict
-    Int? minimum_contig_length
-    String? output_dir
-    File? gatk4_jar_override
-
-    # Runtime parameters
-    String gatk_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
+    input {
+      String entity_id
+      File denoised_copy_ratios
+      File het_allelic_counts
+      File modeled_segments
+      File ref_fasta_dict
+      Int? minimum_contig_length
+      String? maximum_copy_ratio
+      Float? point_size_copy_ratio
+      Float? point_size_allele_fraction
+      String? output_dir
+      File? gatk4_jar_override
+
+      # Runtime parameters
+      String gatk_docker
+      Int? mem_gb
+      Int? disk_space_gb
+      Boolean use_ssd = false
+      Int? cpu
+      Int? preemptible_attempts
+    }
 
     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
     Int command_mem_mb = machine_mem_mb - 1000
@@ -922,20 +880,23 @@ task PlotModeledSegments {
 
     command <<<
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
-
-        gatk --java-options "-Xmx${command_mem_mb}m" PlotModeledSegments \
-            --denoised-copy-ratios ${denoised_copy_ratios} \
-            --allelic-counts ${het_allelic_counts} \
-            --segments ${modeled_segments} \
-            --sequence-dictionary ${ref_fasta_dict} \
-            --minimum-contig-length ${default="1000000" minimum_contig_length} \
-            --output ${output_dir_} \
-            --output-prefix ${entity_id}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}
+
+        gatk --java-options "-Xmx~{command_mem_mb}m" PlotModeledSegments \
+            --denoised-copy-ratios ~{denoised_copy_ratios} \
+            --allelic-counts ~{het_allelic_counts} \
+            --segments ~{modeled_segments} \
+            --sequence-dictionary ~{ref_fasta_dict} \
+            --minimum-contig-length ~{default="1000000" minimum_contig_length} \
+            --maximum-copy-ratio ~{default="4.0" maximum_copy_ratio} \
+            --point-size-copy-ratio ~{default="0.2" point_size_copy_ratio} \
+            --point-size-allele-fraction ~{default="0.4" point_size_allele_fraction} \
+            --output ~{output_dir_} \
+            --output-prefix ~{entity_id}
     >>>
 
     runtime {
-        docker: "${gatk_docker}"
+        docker: "~{gatk_docker}"
         memory: machine_mem_mb + " MB"
         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
         cpu: select_first([cpu, 1])
@@ -943,58 +904,6 @@ task PlotModeledSegments {
     }
 
     output {
-        File modeled_segments_plot = "${output_dir_}/${entity_id}.modeled.png"
-    }
-}
-
-
-task OncotateSegments {
-    File called_file
-    String? additional_args
-
-    # Runtime parameters
-    String? oncotator_docker
-    Int? mem_gb
-    Int? disk_space_gb
-    Int? boot_disk_space_gb
-    Boolean use_ssd = false
-    Int? cpu
-    Int? preemptible_attempts
-
-    Int machine_mem_mb = select_first([mem_gb, 3]) * 1000
-
-    String basename_called_file = basename(called_file)
-
-    command <<<
-        set -e
-
-        # Get rid of the sequence dictionary at the top of the file
-        egrep -v "^\@" ${called_file} > ${basename_called_file}.seq_dict_removed.seg
-
-        echo "Starting the simple_tsv..."
-
-        /root/oncotator_venv/bin/oncotator --db-dir /root/onco_dbdir/ -c /root/tx_exact_uniprot_matches.AKT1_CRLF2_FGFR1.txt \
-          -u file:///root/onco_cache/ -r -v ${basename_called_file}.seq_dict_removed.seg ${basename_called_file}.per_segment.oncotated.txt hg19 \
-          -i SEG_FILE -o SIMPLE_TSV ${default="" additional_args}
-
-        echo "Starting the gene list..."
-
-        /root/oncotator_venv/bin/oncotator --db-dir /root/onco_dbdir/ -c /root/tx_exact_uniprot_matches.AKT1_CRLF2_FGFR1.txt \
-          -u file:///root/onco_cache/ -r -v ${basename_called_file}.seq_dict_removed.seg ${basename_called_file}.gene_list.txt hg19 \
-          -i SEG_FILE -o GENE_LIST ${default="" additional_args}
-    >>>
-
-    runtime {
-        docker: select_first([oncotator_docker, "broadinstitute/oncotator:1.9.5.0-eval-gatk-protected"])
-        memory: machine_mem_mb + " MB"
-        disks: "local-disk " + select_first([disk_space_gb, 50]) + if use_ssd then " SSD" else " HDD"
-        cpu: select_first([cpu, 1])
-        preemptible: select_first([preemptible_attempts, 2])
-        bootDiskSizeGb: select_first([boot_disk_space_gb, 20])
-    }
-
-    output {
-        File oncotated_called_file = "${basename_called_file}.per_segment.oncotated.txt"
-        File oncotated_called_gene_list_file = "${basename_called_file}.gene_list.txt"
+        File modeled_segments_plot = "~{output_dir_}/~{entity_id}.modeled.png"
     }
 }
diff --git a/GATK_CNV_Mutect2/GATK4_CNV/README.md b/GATK_CNV_Mutect2/GATK4_CNV/README.md
index 62ca277..0ff0d75 100644
--- a/GATK_CNV_Mutect2/GATK4_CNV/README.md
+++ b/GATK_CNV_Mutect2/GATK4_CNV/README.md
@@ -5,35 +5,4 @@ This WDL is originally from [CNV_Somatic_Panel_Workflow](https://github.com/gatk
 is modified to output the number of segments, amplifications, and deletions in a Terra workspace table.
 
 
-## Notes:
-* The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the
-   GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
-   These intervals will be padded on both sides by the amount specified by padding (default 250)
-   and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning,
-   e.g., for WES).  For WGS, the intervals should simply cover the autosomal chromosomes (sex chromosomes may be
-   included, but care should be taken to 1) avoid creating panels of mixed sex, and 2) denoise case samples only
-   with panels containing only individuals of the same sex as the case samples).
-
-* Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals
-   argument, which accepts formats compatible with the GATK -XL argument
-   (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
-   This may be useful for excluding centromeric regions, etc. from analysis.  Alternatively, these regions may
-   be manually filtered from the final callset.
-
-* A reasonable blacklist for excluded intervals (-XL) can be found at:
-   hg19: gs://gatk-best-practices/somatic-b37/CNV_and_centromere_blacklist.hg19.list
-   hg38: gs://gatk-best-practices/somatic-hg38/CNV_and_centromere_blacklist.hg38liftover.list (untested)
-
-* The sites file (common_sites) should be a Picard or GATK-style interval list.  This is a list of sites
-   of known variation at which allelic counts will be collected for use in modeling minor-allele fractions.
-
-## Example invocation
-```angular2html
-java -jar cromwell.jar run GATK_CNV.wdl -i GATK4_CNV.local-inputs.json
-```
-
-**GATK4_CNV.local-inputs.json only provide a general structure of the required input to `GATK4_CNV.wdl`. It is the user’s responsibility to correctly set the reference and resource input variables.**
-
-When execute this workflow on Terra, please use the `GATK4_CNV.terra-inputs.json`.
-
-
+**UNDER CONSTRUCTION**
\ No newline at end of file
diff --git a/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.inputs.json b/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.inputs.json
index 6d4d4e1..998984e 100644
--- a/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.inputs.json
+++ b/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.inputs.json
@@ -1 +1,70 @@
-{"Mutect2.CalculateContamination.mem":"${}","Mutect2.CallableLoci.cpu":"${}","Mutect2.CallableLoci.mem":"${}","Mutect2.CallableLoci.normal_coverage":"${8}","Mutect2.CallableLoci.tumor_coverage":"${14}","Mutect2.CollectSequencingArtifactMetrics.cpu":"${}","Mutect2.CollectSequencingArtifactMetrics.mem":"${}","Mutect2.Filter.cpu":"${}","Mutect2.Filter.mem":"${}","Mutect2.FilterByOrientationBias.cpu":"${}","Mutect2.FilterByOrientationBias.mem":"${}","Mutect2.Funcotate.cpu":"${}","Mutect2.Funcotate.disk_space_gb":"${}","Mutect2.Funcotate.mem":"${}","Mutect2.Funcotate.preemptible_attempts":"${}","Mutect2.HaplotypeCaller.cpu":"${}","Mutect2.HaplotypeCaller.mem":"${}","Mutect2.LegoPlot.mem":"${}","Mutect2.M2.cpu":"${}","Mutect2.M2.mem":"${15}","Mutect2.MergeBamOuts.cpu":"${}","Mutect2.MergeBamOuts.mem":"${}","Mutect2.MergeBamOuts.preemptible_attempts":"${3}","Mutect2.MergeGermlineBamOuts.cpu":"${}","Mutect2.MergeGermlineBamOuts.mem":"${}","Mutect2.MergeGermlineBamOuts.preemptible_attempts":"${3}","Mutect2.MergeGermlineVCFs.cpu":"${}","Mutect2.MergeGermlineVCFs.mem":"${}","Mutect2.MergeVCFs.cpu":"${}","Mutect2.MergeVCFs.mem":"${}","Mutect2.MutationalBurden.mem":"${}","Mutect2.SplitIntervals.cpu":"${}","Mutect2.SplitIntervals.mem":"${}","Mutect2.annotation_defaults":"${}","Mutect2.annotation_overrides":"${}","Mutect2.artifact_modes":"${[\"G/T\", \"C/T\"]}","Mutect2.basic_bash_docker":"${}","Mutect2.compress_vcfs":"${}","Mutect2.context_script_override":"gs://gptag/somatic/script/kmer-freq.py","Mutect2.data_sources_tar_gz":"${}","Mutect2.default_config_file":"gs://gatk-best-practices/somatic-b37/onco_config.txt","Mutect2.emergency_extra_disk":"${100}","Mutect2.filter_oncotator_maf":"True","Mutect2.gatk3_override":"${}","Mutect2.gatk_docker":"us.gcr.io/broad-gatk/gatk:4.0.4.0","Mutect2.gatk_override":"${}","Mutect2.germline_contamination":"${}","Mutect2.germline_max_alt_alleles":"${3}","Mutect2.gnomad":"${workspace.gnomad}","Mutect2.gnomad_index":"${workspace.gnomad_index}","Mutect2.haplotypecaller_extra_args":"${}","Mutect2.intervals":"${workspace.intervals}","Mutect2.is_calling_whole_genome":"False","Mutect2.large_input_to_output_multiplier":"${}","Mutect2.lego_plot_script_override":"gs://gptag/somatic/script/lego-plot.py","Mutect2.lego_render_script_override":"gs://gptag/somatic/script/lego-report.py","Mutect2.m2_extra_args":"${}","Mutect2.m2_extra_filtering_args":"${}","Mutect2.make_bamout":"True","Mutect2.mb_script_override":"gs://gptag/somatic/script/mutburden.py","Mutect2.normal_bai":"${this.control_sample.crai_or_bai_path}","Mutect2.normal_bam":"${this.control_sample.cram_or_bam_path}","Mutect2.onco_ds_local_db_dir":"${}","Mutect2.onco_ds_tar_gz":"${workspace.onco_ds_tar_gz}","Mutect2.oncotate_m2.cpu":"${}","Mutect2.oncotate_m2.mem":"${}","Mutect2.oncotate_m2.oncotator_exe":"${}","Mutect2.oncotator_docker":"broadinstitute/oncotator:1.9.8.0","Mutect2.oncotator_extra_args":"${}","Mutect2.pon":"${workspace.m2_pon}","Mutect2.pon_index":"${workspace.m2_pon_index}","Mutect2.preemptible_attempts":"${3}","Mutect2.ref_dict":"${workspace.ref_dict}","Mutect2.ref_fai":"${workspace.ref_fasta_index}","Mutect2.ref_fasta":"${workspace.ref_fasta}","Mutect2.reference_version":"${}","Mutect2.run_funcotator":"${}","Mutect2.run_oncotator":"True","Mutect2.run_orientation_bias_filter":"True","Mutect2.scatter_count":"${50}","Mutect2.sequence_source":"${}","Mutect2.sequencing_center":"${}","Mutect2.small_input_to_output_multiplier":"${}","Mutect2.split_intervals_extra_args":"${}","Mutect2.tag_docker":"us.gcr.io/tag-team-160914/tag-tools:0.0.4","Mutect2.transcript_selection_list":"${}","Mutect2.transcript_selection_mode":"${}","Mutect2.tumor_bai":"${this.case_sample.crai_or_bai_path}","Mutect2.tumor_bam":"${this.case_sample.cram_or_bam_path}","Mutect2.tumor_sequencing_artifact_metrics":"${}","Mutect2.variants_for_contamination":"${workspace.variants_for_contamination}","Mutect2.variants_for_contamination_index":"${workspace.variants_for_contamination_index}"}
\ No newline at end of file
+{
+  "Mutect2.funco_annotation_overrides": "Array[String]? (optional)",
+  "Mutect2.funco_reference_version": "String? (optional)",
+  "Mutect2.realignment_index_bundle": "File? (optional)",
+  "Mutect2.m2_extra_filtering_args": "String? (optional)",
+  "Mutect2.run_orientation_bias_mixture_model_filter": "Boolean? (optional)",
+  "Mutect2.funco_annotation_defaults": "Array[String]? (optional)",
+  "Mutect2.split_intervals_extra_args": "String? (optional)",
+  "Mutect2.realignment_extra_args": "String? (optional)",
+  "Mutect2.funco_default_output_format": "String (optional, default = \"MAF\")",
+  "Mutect2.small_task_cpu": "Int (optional, default = 2)",
+  "Mutect2.sequencing_center": "String? (optional)",
+  "Mutect2.NormalCramToBam.mem": "Int? (optional)",
+  "Mutect2.ref_fai": "File",
+  "Mutect2.variants_for_contamination": "File? (optional)",
+  "Mutect2.pon": "File? (optional)",
+  "Mutect2.CalculateContamination.intervals": "String? (optional)",
+  "Mutect2.ref_fasta": "File",
+  "Mutect2.large_input_to_output_multiplier": "Float (optional, default = 2.25)",
+  "Mutect2.Funcotate.interval_list": "File? (optional)",
+  "Mutect2.gatk_override": "File? (optional)",
+  "Mutect2.small_task_disk": "Int (optional, default = 100)",
+  "Mutect2.filter_alignment_artifacts_mem": "Int (optional, default = 9000)",
+  "Mutect2.normal_reads_index": "File? (optional)",
+  "Mutect2.compress_vcfs": "Boolean? (optional)",
+  "Mutect2.intervals": "File? (optional)",
+  "Mutect2.getpileupsummaries_extra_args": "String? (optional)",
+  "Mutect2.funco_data_sources_tar_gz": "File? (optional)",
+  "Mutect2.funco_transcript_selection_list": "File? (optional)",
+  "Mutect2.funcotator_excluded_fields": "Array[String]? (optional)",
+  "Mutect2.gga_vcf": "File? (optional)",
+  "Mutect2.run_funcotator": "Boolean? (optional)",
+  "Mutect2.Funcotate.default_disk_space_gb": "Int (optional, default = 100)",
+  "Mutect2.gga_vcf_idx": "File? (optional)",
+  "Mutect2.normal_reads": "File? (optional)",
+  "Mutect2.emergency_extra_disk": "Int? (optional)",
+  "Mutect2.preemptible": "Int? (optional)",
+  "Mutect2.funco_output_format": "String? (optional)",
+  "Mutect2.funco_transcript_selection_mode": "String? (optional)",
+  "Mutect2.variants_for_contamination_idx": "File? (optional)",
+  "Mutect2.TumorCramToBam.mem": "Int? (optional)",
+  "Mutect2.sequence_source": "String? (optional)",
+  "Mutect2.learn_read_orientation_mem": "Int (optional, default = 8000)",
+  "Mutect2.boot_disk_size": "Int (optional, default = 12)",
+  "Mutect2.max_retries": "Int? (optional)",
+  "Mutect2.M2.mem": "Int? (optional)",
+  "Mutect2.gatk_docker": "String",
+  "Mutect2.gnomad": "File? (optional)",
+  "Mutect2.M2.cpu": "Int? (optional)",
+  "Mutect2.ref_dict": "File",
+  "Mutect2.pon_idx": "File? (optional)",
+  "Mutect2.filter_funcotations": "Boolean? (optional)",
+  "Mutect2.make_bamout": "Boolean? (optional)",
+  "Mutect2.M2.use_ssd": "Boolean (optional, default = false)",
+  "Mutect2.funco_filter_funcotations": "Boolean? (optional)",
+  "Mutect2.tumor_reads_index": "File",
+  "Mutect2.cram_to_bam_multiplier": "Float (optional, default = 6.0)",
+  "Mutect2.Funcotate.default_ram_mb": "Int (optional, default = 3000)",
+  "Mutect2.small_input_to_output_multiplier": "Float (optional, default = 2.0)",
+  "Mutect2.funco_compress": "Boolean? (optional)",
+  "Mutect2.funco_use_gnomad_AF": "Boolean? (optional)",
+  "Mutect2.m2_extra_args": "String? (optional)",
+  "Mutect2.small_task_mem": "Int (optional, default = 4)",
+  "Mutect2.gnomad_idx": "File? (optional)",
+  "Mutect2.funcotator_extra_args": "String? (optional)",
+  "Mutect2.scatter_count": "Int",
+  "Mutect2.tumor_reads": "File",
+  "Mutect2.basic_bash_docker": "String (optional, default = \"ubuntu:16.04\")"
+}
+
diff --git a/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.wdl b/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.wdl
index 3b112c7..92ef1d2 100644
--- a/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.wdl
+++ b/GATK_CNV_Mutect2/mutect2-gatk4/mutect2-gatk4.wdl
@@ -1,3 +1,5 @@
+version 1.0
+
 ## Copyright Broad Institute, 2017
 ##
 ## This WDL workflow runs GATK4 Mutect 2 on a single tumor-normal pair or on a single tumor sample,
@@ -9,9 +11,9 @@
 ## Description of inputs:
 ##
 ## ** Runtime **
-## gatk_docker, oncotator_docker: docker images to use for GATK 4 Mutect2 and for Oncotator
-## tag_docker: docker images for TAG's add-on analyses
-## preemptible_attempts: how many preemptions to tolerate before switching to a non-preemptible machine (on Google)
+## gatk_docker: docker image to use for GATK 4 Mutect2
+## preemptible: how many preemptions to tolerate before switching to a non-preemptible machine (on Google)
+## max_retries: how many times to retry failed tasks -- very important on the cloud when there are transient errors
 ## gatk_override: (optional) local file or Google bucket path to a GATK 4 java jar file to be used instead of the GATK 4 jar
 ##                in the docker image.  This must be supplied when running in an environment that does not support docker
 ##                (e.g. SGE cluster on a Broad on-prem VM)
@@ -19,14 +21,9 @@
 ## ** Workflow options **
 ## intervals: genomic intervals (will be used for scatter)
 ## scatter_count: number of parallel jobs to generate when scattering over intervals
-## artifact_modes: types of artifacts to consider in the orientation bias filter (optional)
 ## m2_extra_args, m2_extra_filtering_args: additional arguments for Mutect2 calling and filtering (optional)
 ## split_intervals_extra_args: additional arguments for splitting intervals before scattering (optional)
-## run_orientation_bias_filter: if true, run the orientation bias filter post-processing step (optional, false by default)
-## run_oncotator: if true, annotate the M2 VCFs using oncotator (to produce a TCGA MAF).  Important:  This requires a
-##                   docker image and should  not be run in environments where docker is unavailable (e.g. SGE cluster on
-##                   a Broad on-prem VM).  Access to docker hub is also required, since the task downloads a public docker image.
-##                   (optional, false by default)
+## run_orientation_bias_mixture_model_filter: (optional) if true, filter orientation bias sites with the read orientation artifact mixture model.
 ##
 ## ** Primary inputs **
 ## ref_fasta, ref_fai, ref_dict: reference genome, index, and dictionary
@@ -34,27 +31,33 @@
 ## normal_bam, normal_bam_index: BAM and index for the normal sample
 ##
 ## ** Primary resources ** (optional but strongly recommended)
-## pon, pon_index: optional panel of normals in VCF format containing probable technical artifacts (false positves)
-## gnomad, gnomad_index: optional database of known germline variants (see http://gnomad.broadinstitute.org/downloads)
-## variants_for_contamination, variants_for_contamination_index: VCF of common variants with allele frequencies for calculating contamination
+## pon, pon_idx: optional panel of normals (and its index) in VCF format containing probable technical artifacts (false positves)
+## gnomad, gnomad_idx: optional database of known germline variants (and its index) (see http://gnomad.broadinstitute.org/downloads)
+## variants_for_contamination, variants_for_contamination_idx: VCF of common variants (and its index)with allele frequencies for calculating contamination
 ##
 ## ** Secondary resources ** (for optional tasks)
-## onco_ds_tar_gz, default_config_file: Oncotator datasources and config file
-## sequencing_center, sequence_source: metadata for Oncotator
-## filter_oncotator_maf: Whether the MAF generated by oncotator should have the filtered variants removed. Default: true
+## realignment_index_bundle: resource for FilterAlignmentArtifacts, which runs if and only if it is specified.  Generated by BwaMemIndexImageCreator.
 ##
-## ** TAG's modification **
-## - HaplotypeCaller: task runs HaplotypeCaller on normal bam (Note: there will be no filter after HaplotypeCaller)
-## - CallableLoci: task uses GATK3 CallableLoci to compute the number of somatically callable bases
-## - MutationalBurden: task reads MAF and computes both coding and non-coding mutational burdens (# of mutations / callable bases)
-## - QcPlot: task generates lego plots
+## Funcotator parameters (see Funcotator help for more details).
+## funco_reference_version: "hg19" for hg19 or b37.  "hg38" for hg38.  Default: "hg19"
+## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file.  Default: "MAF"
+## funco_compress: (Only valid if funco_output_format == "VCF" )  If true, will compress the output of Funcotator.  If false, produces an uncompressed output file.  Default: false
+## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance).  If false, will not annotate with gnomAD.  Default: false
+## funco_transcript_selection_mode: How to select transcripts in Funcotator.  ALL, CANONICAL, or BEST_EFFECT
+## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
+## funco_data_sources_tar_gz:  Funcotator datasources tar gz file.  Bucket location is recommended when running on the cloud.
+## funco_annotation_defaults:  Default values for annotations, when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
+## funco_annotation_overrides:  Values for annotations, even when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
+## funcotator_excluded_fields:  Annotations that should not appear in the output (VCF or MAF).  Specified as  <ANNOTATION>.  For example:  "ClinVar_ALLELEID"
+## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column).  If false, will annotate all variants in the input file.  Default: true
+## funcotator_extra_args: Any additional arguments to pass to Funcotator.  Default: ""
 ##
 ## Outputs :
 ## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
 ##   file of reassembled reads if requested
 ##
 ## Cromwell version support
-## - Successfully tested on v30
+## - Successfully tested on v34
 ##
 ## LICENSING :
 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
@@ -63,118 +66,172 @@
 ## authorized to run all programs before running this script. Please see the docker
 ## pages at https://hub.docker.com/r/broadinstitute/* for detailed licensing information
 ## pertaining to the included programs.
-workflow Mutect2 {
-    # Mutect2 inputs
-    File? intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
-    File? pon
-    File? pon_index
-    Int scatter_count
-    File? gnomad
-    File? gnomad_index
-    File? variants_for_contamination
-    File? variants_for_contamination_index
-    Boolean? run_orientation_bias_filter
-    Boolean run_ob_filter = select_first([run_orientation_bias_filter, false])
-    Array[String]? artifact_modes
-    File? tumor_sequencing_artifact_metrics
-    String? m2_extra_args
-    String? m2_extra_filtering_args
-    String? split_intervals_extra_args
-    Boolean? make_bamout
-    Boolean make_bamout_or_default = select_first([make_bamout, false])
-    Boolean? compress_vcfs
-    Boolean compress = select_first([compress_vcfs, false])
-    File? gga_vcf
-    File? gga_vcf_index
-
-    # HaplotypeCaller inputs
-    Float? germline_contamination
-    Int? germline_max_alt_alleles
-    String? haplotypecaller_extra_args
-
-    # oncotator inputs
-    Boolean? run_oncotator
-    Boolean run_oncotator_or_default = select_first([run_oncotator, false])
-    File? onco_ds_tar_gz
-    String? onco_ds_local_db_dir
-    String? sequencing_center
-    String? sequence_source
-    File? default_config_file
-
-    # funcotator inputs
-    Boolean? run_funcotator
-    Boolean run_funcotator_or_default = select_first([run_funcotator, false])
-    String? reference_version
-    String? data_sources_tar_gz
-    String? transcript_selection_mode
-    Array[String]? transcript_selection_list
-    Array[String]? annotation_defaults
-    Array[String]? annotation_overrides
 
+struct Runtime {
+    String gatk_docker
     File? gatk_override
-    File? gatk3_override
-    File? mb_script_override
-    File? context_script_override
-    File? lego_plot_script_override
-    File? lego_render_script_override
-
-    # lego plot parameter
-    Boolean? is_calling_whole_genome
-    Boolean use_precomputed_genome = select_first([is_calling_whole_genome, false])
+    Int max_retries
+    Int preemptible
+    Int cpu
+    Int machine_mem
+    Int command_mem
+    Int disk
+    Int boot_disk_size
+}
 
-    # runtime
-    String gatk_docker
-    String tag_docker
-    String basic_bash_docker = "ubuntu:16.04"
-    String? oncotator_docker
-    String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.8.0"])
-    Boolean? filter_oncotator_maf
-    Boolean filter_oncotator_maf_or_default = select_first([filter_oncotator_maf, true])
-    String? oncotator_extra_args
+workflow Mutect2 {
+    input {
+      # Mutect2 inputs
+      File? intervals
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      File tumor_reads
+      File tumor_reads_index
+      File? normal_reads
+      File? normal_reads_index
+      File? pon
+      File? pon_idx
+      Int scatter_count
+      File? gnomad
+      File? gnomad_idx
+      File? variants_for_contamination
+      File? variants_for_contamination_idx
+      File? realignment_index_bundle
+      String? realignment_extra_args
+      Boolean? run_orientation_bias_mixture_model_filter
+      String? m2_extra_args
+      String? m2_extra_filtering_args
+      String? getpileupsummaries_extra_args
+      String? split_intervals_extra_args
+      Boolean? make_bamout
+      Boolean? compress_vcfs
+      File? gga_vcf
+      File? gga_vcf_idx
+
+      # Funcotator inputs
+      Boolean? run_funcotator
+      String? sequencing_center
+      String? sequence_source
+      String? funco_reference_version
+      String? funco_output_format
+      Boolean? funco_compress
+      Boolean? funco_use_gnomad_AF
+      File? funco_data_sources_tar_gz
+      String? funco_transcript_selection_mode
+      File? funco_transcript_selection_list
+      Array[String]? funco_annotation_defaults
+      Array[String]? funco_annotation_overrides
+      Array[String]? funcotator_excluded_fields
+      Boolean? funco_filter_funcotations
+      String? funcotator_extra_args
+
+      String funco_default_output_format = "MAF"
+
+      # runtime
+      String gatk_docker
+      File? gatk_override
+      String basic_bash_docker = "ubuntu:16.04"
+      Boolean? filter_funcotations
+
+      Int? preemptible
+      Int? max_retries
+      Int small_task_cpu = 2
+      Int small_task_mem = 4
+      Int small_task_disk = 100
+      Int boot_disk_size = 12
+      Int learn_read_orientation_mem = 8000
+      Int filter_alignment_artifacts_mem = 9000
+
+      # Use as a last resort to increase the disk given to every task in case of ill behaving data
+      Int? emergency_extra_disk
+
+      # These are multipliers to multipler inputs by to make sure we have enough disk to accommodate for possible output sizes
+      # Large is for Bams/WGS vcfs
+      # Small is for metrics/other vcfs
+      Float large_input_to_output_multiplier = 2.25
+      Float small_input_to_output_multiplier = 2.0
+      Float cram_to_bam_multiplier = 6.0
+    }
 
-    Int? preemptible_attempts
+    Int preemptible_or_default = select_first([preemptible, 2])
+    Int max_retries_or_default = select_first([max_retries, 2])
 
-    # Use as a last resort to increase the disk given to every task in case of ill behaving data
-    Int? emergency_extra_disk
+    Boolean compress = select_first([compress_vcfs, false])
+    Boolean run_ob_filter = select_first([run_orientation_bias_mixture_model_filter, false])
+    Boolean make_bamout_or_default = select_first([make_bamout, false])
+    Boolean run_funcotator_or_default = select_first([run_funcotator, false])
+    Boolean filter_funcotations_or_default = select_first([filter_funcotations, true])
 
     # Disk sizes used for dynamic sizing
     Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_dict, "GB") + size(ref_fai, "GB"))
-    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
-    Int gnomad_vcf_size = if defined(gnomad) then ceil(size(gnomad, "GB") + size(gnomad_index, "GB")) else 0
-    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
+    Int tumor_reads_size = ceil(size(tumor_reads, "GB") + size(tumor_reads_index, "GB"))
+    Int gnomad_vcf_size = if defined(gnomad) then ceil(size(gnomad, "GB")) else 0
+    Int normal_reads_size = if defined(normal_reads) then ceil(size(normal_reads, "GB") + size(normal_reads_index, "GB")) else 0
 
     # If no tar is provided, the task downloads one from broads ftp server
-    Int onco_tar_size = if defined(onco_ds_tar_gz) then ceil(size(onco_ds_tar_gz, "GB") * 3) else 100
+    Int funco_tar_size = if defined(funco_data_sources_tar_gz) then ceil(size(funco_data_sources_tar_gz, "GB") * 3) else 100
     Int gatk_override_size = if defined(gatk_override) then ceil(size(gatk_override, "GB")) else 0
 
     # This is added to every task as padding, should increase if systematically you need more disk for every call
     Int disk_pad = 10 + gatk_override_size + select_first([emergency_extra_disk,0])
 
-    # These are multipliers to multipler inputs by to make sure we have enough disk to accommodate for possible output sizes
-    # Large is for Bams/WGS vcfs
-    # Small is for metrics/other vcfs
-    Float large_input_to_output_multiplier = 2.25
-    Float small_input_to_output_multiplier = 2.0
-
     # logic about output file names -- these are the names *without* .vcf extensions
-    String output_basename = if defined(normal_bam) then basename(tumor_bam, ".bam") + "_" + basename(select_first([normal_bam]), ".bam") else basename(tumor_bam, ".bam")
+    String output_basename = basename(basename(tumor_reads, ".bam"),".cram")  #hacky way to strip either .bam or .cram
     String unfiltered_name = output_basename + "-unfiltered"
     String filtered_name = output_basename + "-filtered"
     String funcotated_name = output_basename + "-funcotated"
-    String germline_name = output_basename + "-germline"
 
-    String output_vcf_name = basename(tumor_bam, ".bam") + ".vcf"
+    String output_vcf_name = output_basename + ".vcf"
+
+    Int tumor_cram_to_bam_disk = ceil(tumor_reads_size * cram_to_bam_multiplier)
+    Int normal_cram_to_bam_disk = ceil(normal_reads_size * cram_to_bam_multiplier)
+
+    Runtime standard_runtime = {"gatk_docker": gatk_docker, "gatk_override": gatk_override,
+            "max_retries": max_retries_or_default, "preemptible": preemptible_or_default, "cpu": small_task_cpu,
+            "machine_mem": small_task_mem * 1000, "command_mem": small_task_mem * 1000 - 500,
+            "disk": small_task_disk + disk_pad, "boot_disk_size": boot_disk_size}
+
+    if (basename(tumor_reads) != basename(tumor_reads, ".cram")) {
+        call CramToBam as TumorCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = tumor_reads,
+                crai = tumor_reads_index,
+                name = output_basename,
+                disk_size = tumor_cram_to_bam_disk
+        }
+    }
+
+    String normal_or_empty = select_first([normal_reads, ""])
+    if (basename(normal_or_empty) != basename(normal_or_empty, ".cram")) {
+        String normal_basename = basename(basename(normal_or_empty, ".bam"),".cram")
+        call CramToBam as NormalCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = normal_reads,
+                crai = normal_reads_index,
+                name = normal_basename,
+                disk_size = normal_cram_to_bam_disk
+        }
+    }
+
+    File tumor_bam = select_first([TumorCramToBam.output_bam, tumor_reads])
+    File tumor_bai = select_first([TumorCramToBam.output_bai, tumor_reads_index])
+    File? normal_bam = if defined(normal_reads) then select_first([NormalCramToBam.output_bam, normal_reads]) else normal_reads
+    File? normal_bai = if defined(normal_reads) then select_first([NormalCramToBam.output_bai, normal_reads_index]) else normal_reads_index
+
+    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
+    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
 
-    # this part needs to be changed when the wdl is designed for NIO
     Int m2_output_size = tumor_bam_size / scatter_count
-    Int germline_output_size = normal_bam_size / scatter_count
+    #TODO: do we need to change this disk size now that NIO is always going to happen (for the google backend only)
+    Int m2_per_scatter_size = (tumor_bam_size + normal_bam_size) + ref_size + gnomad_vcf_size + m2_output_size + disk_pad
+
     call SplitIntervals {
         input:
             intervals = intervals,
@@ -183,13 +240,10 @@ workflow Mutect2 {
             ref_dict = ref_dict,
             scatter_count = scatter_count,
             split_intervals_extra_args = split_intervals_extra_args,
-            gatk_override = gatk_override,
-            gatk_docker = gatk_docker,
-            preemptible_attempts = preemptible_attempts,
-            disk_space = ref_size + ceil(size(intervals, "GB") * small_input_to_output_multiplier) + disk_pad
+            runtime_params = standard_runtime
     }
 
-    scatter (subintervals in SplitIntervals.interval_files) {
+    scatter (subintervals in SplitIntervals.interval_files ) {
         call M2 {
             input:
                 intervals = subintervals,
@@ -201,73 +255,48 @@ workflow Mutect2 {
                 normal_bam = normal_bam,
                 normal_bai = normal_bai,
                 pon = pon,
-                pon_index = pon_index,
+                pon_idx = pon_idx,
                 gnomad = gnomad,
-                gnomad_index = gnomad_index,
-                gga_vcf = gga_vcf,
-                gga_vcf_index = gga_vcf_index,
+                gnomad_idx = gnomad_idx,
+                preemptible = preemptible,
+                max_retries = max_retries,
                 m2_extra_args = m2_extra_args,
+                getpileupsummaries_extra_args = getpileupsummaries_extra_args,
+                variants_for_contamination = variants_for_contamination,
+                variants_for_contamination_idx = variants_for_contamination_idx,
                 make_bamout = make_bamout_or_default,
+                run_ob_filter = run_ob_filter,
                 compress = compress,
+                gga_vcf = gga_vcf,
+                gga_vcf_idx = gga_vcf_idx,
                 gatk_override = gatk_override,
                 gatk_docker = gatk_docker,
-                preemptible_attempts = preemptible_attempts,
-                disk_space = tumor_bam_size + normal_bam_size + ref_size + gnomad_vcf_size + m2_output_size + disk_pad
-        }
-
-        ## TAG: HaplotypeCaller to call germline variants in normal sample
-        if(defined(normal_bam)){
-            call HaplotypeCaller {
-                input:
-                    intervals = subintervals,
-                    ref_fasta = ref_fasta,
-                    ref_fai = ref_fai,
-                    ref_dict = ref_dict,
-                    input_bam = select_first([normal_bam, "NO_NORMAL_GIVEN"]),
-                    input_bai = select_first([normal_bai, "NO_NORMAL_GIVEN"]),
-                    contamination = germline_contamination,
-                    max_alt_alleles = germline_max_alt_alleles,
-                    haplotypecaller_extra_args = haplotypecaller_extra_args,
-                    make_bamout = make_bamout_or_default,
-                    compress = compress,
-                    gatk_override = gatk_override,
-                    gatk_docker = gatk_docker,
-                    preemptible_attempts = preemptible_attempts,
-                    disk_space = normal_bam_size + ref_size + germline_output_size + disk_pad
-            }
-            Float germline_sub_vcf_size = size(HaplotypeCaller.germline_vcf, "GB")
-            Float germline_sub_bamout_size = size(HaplotypeCaller.germline_bamOut, "GB")
+                disk_space = m2_per_scatter_size
         }
-
-        Float sub_vcf_size = size(M2.unfiltered_vcf, "GB")
-        Float sub_bamout_size = size(M2.output_bamOut, "GB")
     }
 
-    call SumFloats as SumSubVcfs {
-        input:
-            sizes = sub_vcf_size,
-            preemptible_attempts = preemptible_attempts
+    Int merged_vcf_size = ceil(size(M2.unfiltered_vcf, "GB"))
+    Int merged_bamout_size = ceil(size(M2.output_bamOut, "GB"))
+
+    if (run_ob_filter) {
+        call LearnReadOrientationModel {
+            input:
+                f1r2_tar_gz = M2.f1r2_counts,
+                runtime_params = standard_runtime,
+                mem = learn_read_orientation_mem
+        }
     }
 
     call MergeVCFs {
         input:
             input_vcfs = M2.unfiltered_vcf,
-            input_vcf_indices = M2.unfiltered_vcf_index,
+            input_vcf_indices = M2.unfiltered_vcf_idx,
             output_name = unfiltered_name,
             compress = compress,
-            gatk_override = gatk_override,
-            gatk_docker = gatk_docker,
-            preemptible_attempts = preemptible_attempts,
-            disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
+            runtime_params = standard_runtime
     }
 
     if (make_bamout_or_default) {
-        call SumFloats as SumSubBamouts {
-            input:
-                sizes = sub_bamout_size,
-                preemptible_attempts = preemptible_attempts
-        }
-
         call MergeBamOuts {
             input:
                 ref_fasta = ref_fasta,
@@ -275,1158 +304,816 @@ workflow Mutect2 {
                 ref_dict = ref_dict,
                 bam_outs = M2.output_bamOut,
                 output_vcf_name = basename(MergeVCFs.merged_vcf, ".vcf"),
-                gatk_override = gatk_override,
-                gatk_docker = gatk_docker,
-                disk_space = ceil(SumSubBamouts.total_size * large_input_to_output_multiplier) + disk_pad
+                runtime_params = standard_runtime,
+                disk_space = ceil(merged_bamout_size * large_input_to_output_multiplier) + disk_pad,
         }
     }
 
-    # TAG: gether results from HaplotypeCaller task
-    if(defined(normal_bam)){
-        call SumFloats as SumGermlineSubVcfs {
-            input:
-                sizes = select_all(germline_sub_vcf_size),
-                preemptible_attempts = preemptible_attempts
-        }
+    call MergeStats { input: stats = M2.stats, runtime_params = standard_runtime }
 
-        call MergeVCFs as MergeGermlineVCFs {
-            input:
-                input_vcfs = select_all(HaplotypeCaller.germline_vcf),
-                input_vcf_indices = select_all(HaplotypeCaller.germline_vcf_index),
-                output_name = germline_name,
-                compress = compress,
-                gatk_override = gatk_override,
-                gatk_docker = gatk_docker,
-                preemptible_attempts = preemptible_attempts,
-                disk_space = ceil(SumGermlineSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
-        }
-    }
-
-    if(defined(normal_bam) && make_bamout_or_default) {
-        call SumFloats as SumGermlineSubBamouts {
-            input:
-                sizes = select_all(germline_sub_bamout_size),
-                preemptible_attempts = preemptible_attempts
-        }
-
-        call MergeBamOuts as MergeGermlineBamOuts {
+    if (defined(variants_for_contamination)) {
+        call MergePileupSummaries as MergeTumorPileups {
             input:
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
+                input_tables = flatten(M2.tumor_pileups),
+                output_name = output_basename,
                 ref_dict = ref_dict,
-                bam_outs = select_all(HaplotypeCaller.germline_bamOut),
-                output_vcf_name = germline_name,
-                gatk_override = gatk_override,
-                gatk_docker = gatk_docker,
-                disk_space = ceil(SumGermlineSubBamouts.total_size * large_input_to_output_multiplier) + disk_pad
+                runtime_params = standard_runtime
         }
-    }
-
 
-    if (run_ob_filter && !defined(tumor_sequencing_artifact_metrics)) {
-        call CollectSequencingArtifactMetrics {
-            input:
-                gatk_docker = gatk_docker,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
-                preemptible_attempts = preemptible_attempts,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                gatk_override = gatk_override,
-                disk_space = tumor_bam_size + ref_size + disk_pad
+        if (defined(normal_bam)){
+            call MergePileupSummaries as MergeNormalPileups {
+                input:
+                    input_tables = flatten(M2.normal_pileups),
+                    output_name = output_basename,
+                    ref_dict = ref_dict,
+                    runtime_params = standard_runtime
+            }
         }
-    }
 
-    if (defined(variants_for_contamination)) {
         call CalculateContamination {
             input:
-                gatk_override = gatk_override,
-                intervals = intervals,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
-                ref_dict = ref_dict,
-                preemptible_attempts = preemptible_attempts,
-                gatk_docker = gatk_docker,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                normal_bam = normal_bam,
-                normal_bai = normal_bai,
-                variants_for_contamination = variants_for_contamination,
-                variants_for_contamination_index = variants_for_contamination_index,
-                disk_space = tumor_bam_size + normal_bam_size + ceil(size(variants_for_contamination, "GB") * small_input_to_output_multiplier) + disk_pad
+                tumor_pileups = MergeTumorPileups.merged_table,
+                normal_pileups = MergeNormalPileups.merged_table,
+                runtime_params = standard_runtime
         }
     }
 
     call Filter {
         input:
-            gatk_override = gatk_override,
-            gatk_docker = gatk_docker,
+            ref_fasta = ref_fasta,
+            ref_fai = ref_fai,
+            ref_dict = ref_dict,
             intervals = intervals,
             unfiltered_vcf = MergeVCFs.merged_vcf,
-            unfiltered_vcf_index = MergeVCFs.merged_vcf_index,
+            unfiltered_vcf_idx = MergeVCFs.merged_vcf_idx,
             output_name = filtered_name,
             compress = compress,
-            preemptible_attempts = preemptible_attempts,
+            mutect_stats = MergeStats.merged_stats,
             contamination_table = CalculateContamination.contamination_table,
             maf_segments = CalculateContamination.maf_segments,
+            artifact_priors_tar_gz = LearnReadOrientationModel.artifact_prior_table,
             m2_extra_filtering_args = m2_extra_filtering_args,
+            runtime_params = standard_runtime,
             disk_space = ceil(size(MergeVCFs.merged_vcf, "GB") * small_input_to_output_multiplier) + disk_pad
     }
 
-    if (run_ob_filter) {
-        # Get the metrics either from the workflow input or CollectSequencingArtifactMetrics if no workflow input is provided
-        File input_artifact_metrics = select_first([tumor_sequencing_artifact_metrics, CollectSequencingArtifactMetrics.pre_adapter_metrics])
-
-        call FilterByOrientationBias {
+    if (defined(realignment_index_bundle)) {
+        call FilterAlignmentArtifacts {
             input:
-                gatk_override = gatk_override,
-                input_vcf = Filter.filtered_vcf,
-                input_vcf_index = Filter.filtered_vcf_index,
-                output_name = filtered_name,
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                bam = tumor_bam,
+                bai = tumor_bai,
+                realignment_index_bundle = select_first([realignment_index_bundle]),
+                realignment_extra_args = realignment_extra_args,
                 compress = compress,
-                gatk_docker = gatk_docker,
-                preemptible_attempts = preemptible_attempts,
-                pre_adapter_metrics = input_artifact_metrics,
-                artifact_modes = artifact_modes,
-                disk_space = ceil(size(Filter.filtered_vcf, "GB") * small_input_to_output_multiplier) + ceil(size(input_artifact_metrics, "GB")) + disk_pad
-        }
-    }
-
-    ## TAG: calculate callable loci in tumor and normal bams
-    call CallableLoci {
-        input:
-            output_basename = output_basename,
-            ref_fasta = ref_fasta,
-            ref_fai = ref_fai,
-            ref_dict = ref_dict,
-            tumor_bam = tumor_bam,
-            tumor_bai = tumor_bai,
-            normal_bam = normal_bam,
-            normal_bai = normal_bai,
-            intervals = intervals,
-            tag_docker = tag_docker,
-            context_script_override = context_script_override,
-            gatk3_override = gatk3_override,
-            preemptible_attempts = preemptible_attempts,
-            disk_space = tumor_bam_size + normal_bam_size + ref_size + disk_pad
-    }
-
-    File oncotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-    if (run_oncotator_or_default) {
-        call oncotate_m2 {
-            input:
-                output_basename = output_basename,
-                m2_vcf = oncotate_vcf_input,
-                onco_ds_tar_gz = onco_ds_tar_gz,
-                onco_ds_local_db_dir = onco_ds_local_db_dir,
-                sequencing_center = sequencing_center,
-                sequence_source = sequence_source,
-                default_config_file = default_config_file,
-                case_id = M2.tumor_sample[0],
-                control_id = M2.normal_sample[0],
-                filter_maf = filter_oncotator_maf_or_default,
-                oncotator_extra_args = oncotator_extra_args,
-                oncotator_docker = oncotator_docker_or_default,
-                preemptible_attempts = preemptible_attempts,
-                disk_space = ceil(size(oncotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad
-        }
-
-        ## TAG: compute coding and non-coding mutational burdens with callable bases
-        call MutationalBurden {
-            input:
-                output_basename = output_basename,
-                input_maf = oncotate_m2.oncotated_m2_maf,
-                mb_script_override = mb_script_override,
-                tag_docker = tag_docker,
-                callable_bases = CallableLoci.callable_bases,
-                preemptible_attempts = preemptible_attempts,
-                disk_space = ceil(size(oncotate_m2.oncotated_m2_maf, "GB") + disk_pad)
+                output_name = filtered_name,
+                input_vcf = Filter.filtered_vcf,
+                input_vcf_idx = Filter.filtered_vcf_idx,
+                runtime_params = standard_runtime,
+                mem = filter_alignment_artifacts_mem
         }
     }
 
     if (run_funcotator_or_default) {
-        File funcotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File funcotate_vcf_input_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
+        File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
         call Funcotate {
             input:
-                m2_vcf = funcotate_vcf_input,
-                m2_vcf_index = funcotate_vcf_input_index,
                 ref_fasta = ref_fasta,
                 ref_fai = ref_fai,
                 ref_dict = ref_dict,
-                reference_version = select_first([reference_version, "NO_REFERENCE_VERSION_GIVEN"]),
-                output_name = funcotated_name,
-                compress = compress,
-                data_sources_tar_gz = data_sources_tar_gz,
-                transcript_selection_mode = transcript_selection_mode,
-                transcript_selection_list = transcript_selection_list,
-                annotation_defaults = annotation_defaults,
-                annotation_overrides = annotation_overrides,
-                gatk_docker = gatk_docker,
-                gatk_override = gatk_override
+                input_vcf = funcotate_vcf_input,
+                input_vcf_idx = funcotate_vcf_input_index,
+                reference_version = select_first([funco_reference_version, "hg19"]),
+                output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated",
+                output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format,
+                compress = if defined(funco_compress) then select_first([funco_compress]) else false,
+                use_gnomad = if defined(funco_use_gnomad_AF) then select_first([funco_use_gnomad_AF]) else false,
+                data_sources_tar_gz = funco_data_sources_tar_gz,
+                case_id = M2.tumor_sample[0],
+                control_id = M2.normal_sample[0],
+                sequencing_center = sequencing_center,
+                sequence_source = sequence_source,
+                transcript_selection_mode = funco_transcript_selection_mode,
+                transcript_selection_list = funco_transcript_selection_list,
+                annotation_defaults = funco_annotation_defaults,
+                annotation_overrides = funco_annotation_overrides,
+                funcotator_excluded_fields = funcotator_excluded_fields,
+                filter_funcotations = filter_funcotations_or_default,
+                extra_args = funcotator_extra_args,
+                runtime_params = standard_runtime,
+                disk_space = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier)  + funco_tar_size + disk_pad
         }
     }
 
-    ## TAG: lego plots to show mutation spectrum
-    File input_mut = select_first([oncotate_m2.oncotated_m2_maf, oncotate_vcf_input])
-    String input_mut_format = if run_oncotator_or_default then "maf" else "vcf"
-    call LegoPlot {
-        input:
-            input_file = input_mut,
-            input_file_format = input_mut_format,
-            output_prefix = output_basename,
-            is_whole_genome = use_precomputed_genome,
-            plotter_override = lego_plot_script_override,
-            renderer_override = lego_render_script_override,
-            callable_contexts = CallableLoci.callable_contexts,
-            ref_fasta = ref_fasta,
-            tag_docker = tag_docker,
-            preemptible_attempts = preemptible_attempts,
-            disk_space = ceil(size(input_mut, "GB") + ref_size + disk_pad)
-    }
-
     output {
-        File unfiltered_vcf = MergeVCFs.merged_vcf
-        File unfiltered_vcf_index = MergeVCFs.merged_vcf_index
-        File filtered_vcf = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File filtered_vcf_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
+        File filtered_vcf = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File filtered_vcf_idx = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
+        File filtering_stats = Filter.filtering_stats
+        File mutect_stats = MergeStats.merged_stats
         File? contamination_table = CalculateContamination.contamination_table
-        Float? contamination_fraction = CalculateContamination.fracContam
 
-        File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
-        File? funcotated_vcf = Funcotate.funcotated_vcf
-        File? funcotated_vcf_index = Funcotate.funcotated_vcf_index
-        File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics
+        File? funcotated_file = Funcotate.funcotated_output_file
+        File? funcotated_file_index = Funcotate.funcotated_output_file_index
         File? bamout = MergeBamOuts.merged_bam_out
         File? bamout_index = MergeBamOuts.merged_bam_out_index
-
-        File? germline_vcf = MergeGermlineVCFs.merged_vcf
-        File? germline_vcf_index = MergeGermlineVCFs.merged_vcf_index
-        File? germline_bamout = MergeGermlineBamOuts.merged_bam_out
-        File? germline_bamout_index = MergeGermlineBamOuts.merged_bam_out_index
-
-        String callable_bases = CallableLoci.callable_bases
-        File callable_regions = CallableLoci.callable_regions
-        File callable_contexts = CallableLoci.callable_contexts
-        File lego_plot = LegoPlot.lego_plot
-
-        String? total_variants = MutationalBurden.total_variants
-        String? coding_variants = MutationalBurden.coding_variants
-        String? coding_mutations_per_mb = MutationalBurden.coding_mutations_per_mb
-        String? noncoding_variants = MutationalBurden.noncoding_variants
-        String? noncoding_mutations_per_mb = MutationalBurden.noncoding_mutations_per_mb
-        File? mutational_burden = MutationalBurden.mutational_burden
+        File? maf_segments = CalculateContamination.maf_segments
+        File? read_orientation_model_params = LearnReadOrientationModel.artifact_prior_table
     }
 }
 
-task SplitIntervals {
-    # inputs
-    File? intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    Int scatter_count
-    String? split_intervals_extra_args
+task CramToBam {
+    input {
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      #cram and crai must be optional since Normal cram is optional
+      File? cram
+      File? crai
+      String name
+      Int disk_size
+      Int? mem
+    }
 
-    File? gatk_override
+    Int machine_mem = if defined(mem) then mem * 1000 else 6000
 
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+    #Calls samtools view to do the conversion
+    command {
+        #Set -e and -o says if any command I run fails in this script, make sure to return a failure
+        set -e
+        set -o pipefail
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
+        samtools view -h -T ~{ref_fasta} ~{cram} |
+            samtools view -b -o ~{name}.bam -
+        samtools index -b ~{name}.bam
+        mv ~{name}.bam.bai ~{name}.bai
+    }
+
+    runtime {
+        docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+        memory: machine_mem + " MB"
+        disks: "local-disk " + disk_size + " HDD"
+    }
+
+    output {
+        File output_bam = "~{name}.bam"
+        File output_bai = "~{name}.bai"
+    }
+}
+
+task SplitIntervals {
+    input {
+      File? intervals
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      Int scatter_count
+      String? split_intervals_extra_args
+
+      # runtime
+      Runtime runtime_params
+    }
 
     command {
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
 
         mkdir interval-files
-        gatk --java-options "-Xmx${command_mem}m" SplitIntervals \
-            -R ${ref_fasta} \
-            ${"-L " + intervals} \
-            -scatter ${scatter_count} \
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" SplitIntervals \
+            -R ~{ref_fasta} \
+            ~{"-L " + intervals} \
+            -scatter ~{scatter_count} \
             -O interval-files \
-            ${split_intervals_extra_args}
-        cp interval-files/*.intervals .
+            ~{split_intervals_extra_args}
+        cp interval-files/*.interval_list .
     }
 
     runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        Array[File] interval_files = glob("*.intervals")
+        Array[File] interval_files = glob("*.interval_list")
     }
 }
 
 task M2 {
-    # inputs
-    File? intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
-    File? pon
-    File? pon_index
-    File? gnomad
-    File? gnomad_index
-    File? gga_vcf
-    File? gga_vcf_index
-    String? m2_extra_args
-    Boolean? make_bamout
-    Boolean compress
+    input {
+      File? intervals
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      File tumor_bam
+      File tumor_bai
+      File? normal_bam
+      File? normal_bai
+      File? pon
+      File? pon_idx
+      File? gnomad
+      File? gnomad_idx
+      String? m2_extra_args
+      String? getpileupsummaries_extra_args
+      Boolean? make_bamout
+      Boolean? run_ob_filter
+      Boolean compress
+      File? gga_vcf
+      File? gga_vcf_idx
+      File? variants_for_contamination
+      File? variants_for_contamination_idx
+
+      File? gatk_override
+
+      # runtime
+      String gatk_docker
+      Int? mem
+      Int? preemptible
+      Int? max_retries
+      Int? disk_space
+      Int? cpu
+      Boolean use_ssd = false
+    }
 
     String output_vcf = "output" + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
-
-    File? gatk_override
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
 
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+    String output_stats = output_vcf + ".stats"
 
     # Mem is in units of GB but our command and memory runtime values are in MB
     Int machine_mem = if defined(mem) then mem * 1000 else 3500
     Int command_mem = machine_mem - 500
 
+    parameter_meta{
+      intervals: {localization_optional: true}
+      ref_fasta: {localization_optional: true}
+      ref_fai: {localization_optional: true}
+      ref_dict: {localization_optional: true}
+      tumor_bam: {localization_optional: true}
+      tumor_bai: {localization_optional: true}
+      normal_bam: {localization_optional: true}
+      normal_bai: {localization_optional: true}
+      pon: {localization_optional: true}
+      pon_idx: {localization_optional: true}
+      gnomad: {localization_optional: true}
+      gnomad_idx: {localization_optional: true}
+      gga_vcf: {localization_optional: true}
+      gga_vcf_idx: {localization_optional: true}
+      variants_for_contamination: {localization_optional: true}
+      variants_for_contamination_idx: {localization_optional: true}
+    }
 
     command <<<
         set -e
 
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
 
         # We need to create these files regardless, even if they stay empty
         touch bamout.bam
+        touch f1r2.tar.gz
         echo "" > normal_name.txt
 
-        gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
-        tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`"
+        gatk --java-options "-Xmx~{command_mem}m" GetSampleName -R ~{ref_fasta} -I ~{tumor_bam} -O tumor_name.txt -encode
+        tumor_command_line="-I ~{tumor_bam} -tumor `cat tumor_name.txt`"
 
-        if [[ -f "${normal_bam}" ]]; then
-            gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${normal_bam} -O normal_name.txt -encode
-            normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`"
+        if [[ ! -z "~{normal_bam}" ]]; then
+            gatk --java-options "-Xmx~{command_mem}m" GetSampleName -R ~{ref_fasta} -I ~{normal_bam} -O normal_name.txt -encode
+            normal_command_line="-I ~{normal_bam} -normal `cat normal_name.txt`"
         fi
 
-        gatk --java-options "-Xmx${command_mem}m" Mutect2 \
-            -R ${ref_fasta} \
+        gatk --java-options "-Xmx~{command_mem}m" Mutect2 \
+            -R ~{ref_fasta} \
             $tumor_command_line \
             $normal_command_line \
-            ${"--germline-resource " + gnomad} \
-            ${"-pon " + pon} \
-            ${"-L " + intervals} \
-            ${"--genotyping-mode GENOTYPE_GIVEN_ALLELES --alleles " + gga_vcf} \
-            -O "${output_vcf}" \
-            ${true='--bam-output bamout.bam' false='' make_bamout} \
-            ${m2_extra_args}
-    >>>
-
-    runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
-    }
-
-    output {
-        File unfiltered_vcf = "${output_vcf}"
-        File unfiltered_vcf_index = "${output_vcf_index}"
-        File output_bamOut = "bamout.bam"
-        String tumor_sample = read_string("tumor_name.txt")
-        String normal_sample = read_string("normal_name.txt")
-    }
-}
+            ~{"--germline-resource " + gnomad} \
+            ~{"-pon " + pon} \
+            ~{"-L " + intervals} \
+            ~{"--alleles " + gga_vcf} \
+            -O "~{output_vcf}" \
+            ~{true='--bam-output bamout.bam' false='' make_bamout} \
+            ~{true='--f1r2-tar-gz f1r2.tar.gz' false='' run_ob_filter} \
+            ~{m2_extra_args}
 
-task HaplotypeCaller {
-    # input
-    File input_bam
-    File input_bai
-    File intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    Float? contamination
-    Int? max_alt_alleles
-    String? haplotypecaller_extra_args
-    Boolean compress
-    Boolean? make_bamout
-
-    String output_vcf = "germline-output" + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+        m2_exit_code=$?
 
-    File? gatk_override
+        ### GetPileupSummaries
 
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+        # If the variants for contamination and the intervals for this scatter don't intersect, GetPileupSummaries
+        # throws an error.  However, there is nothing wrong with an empty intersection for our purposes; it simply doesn't
+        # contribute to the merged pileup summaries that we create downstream.  We implement this by with array outputs.
+        # If the tool errors, no table is created and the glob yields an empty array.
+        set +e
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
+        if [[ ! -z "~{variants_for_contamination}" ]]; then
+            gatk --java-options "-Xmx~{command_mem}m" GetPileupSummaries -R ~{ref_fasta} -I ~{tumor_bam} ~{"--interval-set-rule INTERSECTION -L " + intervals} \
+                -V ~{variants_for_contamination} -L ~{variants_for_contamination} -O tumor-pileups.table ~{getpileupsummaries_extra_args}
 
-    command <<<
-        set -e
 
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
+            if [[ ! -z "~{normal_bam}" ]]; then
+                gatk --java-options "-Xmx~{command_mem}m" GetPileupSummaries -R ~{ref_fasta} -I ~{normal_bam} ~{"--interval-set-rule INTERSECTION -L " + intervals} \
+                    -V ~{variants_for_contamination} -L ~{variants_for_contamination} -O normal-pileups.table ~{getpileupsummaries_extra_args}
+            fi
+        fi
 
-        # We need to create these files regardless, even if they stay empty
-        touch germline-bamout.bam
-
-        # Assumed the contamination in normal sample is 0
-        gatk --java-options "-Xmx${command_mem}m" HaplotypeCaller \
-            -R ${ref_fasta} \
-            -I ${input_bam} \
-            -O "${output_vcf}" \
-            ${true='--bam-output germline-bamout.bam' false='' make_bamout} \
-            ${"-L " + intervals} \
-            -contamination ${default=0 contamination} \
-            --max-alternate-alleles ${default=3 max_alt_alleles} \
-            ${haplotypecaller_extra_args}
+        # the script only fails if Mutect2 itself fails
+        exit $m2_exit_code
     >>>
+
     runtime {
         docker: gatk_docker
+        bootDiskSizeGb: 12
         memory: machine_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
+        preemptible: select_first([preemptible, 10])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
+
     output {
-        File germline_vcf = "${output_vcf}"
-        File germline_vcf_index = "${output_vcf_index}"
-        File germline_bamOut = "germline-bamout.bam"
+        File unfiltered_vcf = "~{output_vcf}"
+        File unfiltered_vcf_idx = "~{output_vcf_idx}"
+        File output_bamOut = "bamout.bam"
+        String tumor_sample = read_string("tumor_name.txt")
+        String normal_sample = read_string("normal_name.txt")
+        File stats = "~{output_stats}"
+        File f1r2_counts = "f1r2.tar.gz"
+        Array[File] tumor_pileups = glob("*tumor-pileups.table")
+        Array[File] normal_pileups = glob("*normal-pileups.table")
     }
 }
 
 task MergeVCFs {
-    # inputs
-    Array[File] input_vcfs
-    Array[File] input_vcf_indices
-    String output_name
-    Boolean compress
-    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
-
-    File? gatk_override
-
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+    input {
+      Array[File] input_vcfs
+      Array[File] input_vcf_indices
+      String output_name
+      Boolean compress
+      Runtime runtime_params
+    }
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 1000
+    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
 
     # using MergeVcfs instead of GatherVcfs so we can create indices
     # WARNING 2015-10-28 15:01:48 GatherVcfs  Index creation not currently supported when gathering block compressed VCFs.
     command {
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        gatk --java-options "-Xmx${command_mem}m" MergeVcfs -I ${sep=' -I ' input_vcfs} -O ${output_vcf}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" MergeVcfs -I ~{sep=' -I ' input_vcfs} -O ~{output_vcf}
     }
 
     runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File merged_vcf = "${output_vcf}"
-        File merged_vcf_index = "${output_vcf_index}"
+        File merged_vcf = "~{output_vcf}"
+        File merged_vcf_idx = "~{output_vcf_idx}"
     }
 }
 
 task MergeBamOuts {
-    # inputs
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    Array[File]+ bam_outs
-    String output_vcf_name
-
-    File? gatk_override
-
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 1000
+    input {
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      Array[File]+ bam_outs
+      String output_vcf_name
+      Runtime runtime_params
+      Int? disk_space   #override to request more disk than default small task params
+    }
 
     command <<<
+        # This command block assumes that there is at least one file in bam_outs.
+        #  Do not call this task if len(bam_outs) == 0
         set -e
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" GatherBamFiles \
+            -I ~{sep=" -I " bam_outs} -O unsorted.out.bam -R ~{ref_fasta}
 
-        # create a file list containing non-empty bams
-        touch bam.list
-        for bam in ${sep=" " bam_outs}; do
-            if [ -s $bam ]; then
-                echo $bam >> bam.list
-            fi
-        done
-
-        if [ -s bam.list ]; then
-            export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-            gatk --java-options "-Xmx${command_mem}m" GatherBamFiles \
-                -I bam.list -O ${output_vcf_name}.unsorted.bam -R ${ref_fasta}
-            samtools sort ${output_vcf_name}.unsorted.bam ${output_vcf_name}.out
-            samtools index ${output_vcf_name}.out.bam ${output_vcf_name}.out.bam.bai
-        else
-            # if len(bam) == 0, return empty bam
-            touch ${output_vcf_name}.out.bam ${output_vcf_name}.out.bam.bai
-        fi
+        # We must sort because adjacent scatters may have overlapping (padded) assembly regions, hence
+        # overlapping bamouts
+
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" SortSam -I unsorted.out.bam \
+            -O ~{output_vcf_name}.out.bam \
+            --SORT_ORDER coordinate -VALIDATION_STRINGENCY LENIENT
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" BuildBamIndex -I ~{output_vcf_name}.out.bam -VALIDATION_STRINGENCY LENIENT
     >>>
 
     runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + select_first([disk_space, runtime_params.disk]) + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File merged_bam_out = "${output_vcf_name}.out.bam"
-        File merged_bam_out_index = "${output_vcf_name}.out.bam.bai"
+        File merged_bam_out = "~{output_vcf_name}.out.bam"
+        File merged_bam_out_index = "~{output_vcf_name}.out.bai"
     }
 }
 
-task CollectSequencingArtifactMetrics {
-    # inputs
-    File ref_fasta
-    File ref_fai
-    File tumor_bam
-    File tumor_bai
-
-    File? gatk_override
-
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 1000
+task MergeStats {
+    input {
+      Array[File]+ stats
+      Runtime runtime_params
+    }
 
     command {
         set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        gatk --java-options "-Xmx${command_mem}m" CollectSequencingArtifactMetrics \
-            -I ${tumor_bam} -O "gatk" -R ${ref_fasta} -VALIDATION_STRINGENCY LENIENT
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
+
+
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" MergeMutectStats \
+            -stats ~{sep=" -stats " stats} -O merged.stats
     }
 
     runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File pre_adapter_metrics = "gatk.pre_adapter_detail_metrics"
+        File merged_stats = "merged.stats"
     }
 }
 
-task CalculateContamination {
-    # inputs
-    File? intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
-    File? variants_for_contamination
-    File? variants_for_contamination_index
-
-    File? gatk_override
-
-    # runtime
-    Int? preemptible_attempts
-    String gatk_docker
-    Int? disk_space
-    Int? mem
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 500
+task MergePileupSummaries {
+    input {
+      Array[File] input_tables
+      String output_name
+      File ref_dict
+      Runtime runtime_params
+    }
 
     command {
         set -e
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
 
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-
-        if [[ -f "${normal_bam}" ]]; then
-            gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -I ${normal_bam} ${"-L " + intervals} -V ${variants_for_contamination} -O normal_pileups.table
-            NORMAL_CMD="-matched normal_pileups.table"
-        fi
-
-        gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"-L " + intervals} -V ${variants_for_contamination} -O pileups.table
-        gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I pileups.table -O contamination.table --tumor-segmentation segments.table $NORMAL_CMD
-
-        tail -n1 contamination.table | cut -f2 > fraction_contamination.txt
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" GatherPileupSummaries \
+        --sequence-dictionary ~{ref_dict} \
+        -I ~{sep=' -I ' input_tables} \
+        -O ~{output_name}.tsv
     }
 
     runtime {
-        docker: gatk_docker
-        memory: command_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File pileups = "pileups.table"
-        File contamination_table = "contamination.table"
-        File maf_segments = "segments.table"
-        Float fracContam=read_float("fraction_contamination.txt")
+        File merged_table = "~{output_name}.tsv"
     }
 }
 
-task Filter {
-    # inputs
-    File? intervals
-    File unfiltered_vcf
-    File unfiltered_vcf_index
-    String output_name
-    Boolean compress
-    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
-    File? contamination_table
-    File? maf_segments
-    String? m2_extra_filtering_args
-
-    File? gatk_override
-
-    # runtime
-    String gatk_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+# Learning step of the orientation bias mixture model, which is the recommended orientation bias filter as of September 2018
+task LearnReadOrientationModel {
+    input {
+      Array[File] f1r2_tar_gz
+      Runtime runtime_params
+      Int? mem  #override memory
+    }
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 500
+    Int machine_mem = select_first([mem, runtime_params.machine_mem])
+    Int command_mem = machine_mem - 1000
 
     command {
         set -e
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
 
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-
-        gatk --java-options "-Xmx${command_mem}m" FilterMutectCalls -V ${unfiltered_vcf} \
-      	    -O ${output_vcf} \
-      	    ${"--contamination-table " + contamination_table} \
-            ${"--tumor-segmentation " + maf_segments} \
-      	    ${m2_extra_filtering_args}
+        gatk --java-options "-Xmx~{command_mem}m" LearnReadOrientationModel \
+            -I ~{sep=" -I " f1r2_tar_gz} \
+            -O "artifact-priors.tar.gz"
     }
 
     runtime {
-        docker: gatk_docker
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
         memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File artifact_prior_table = "artifact-priors.tar.gz"
     }
-}
-
-task FilterByOrientationBias {
-    # input
-    File? gatk_override
-    File input_vcf
-    File input_vcf_index
-    String output_name
-    Boolean compress
-    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
-    File pre_adapter_metrics
-    Array[String]? artifact_modes
 
-    # runtime
-    Int? preemptible_attempts
-    String gatk_docker
-    Int? disk_space
-    Int? mem
-    Int? cpu
-    Boolean use_ssd = false
+}
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 500
+task CalculateContamination {
+    input {
+      String? intervals
+      File tumor_pileups
+      File? normal_pileups
+      Runtime runtime_params
+    }
 
     command {
         set -e
 
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
 
-        gatk --java-options "-Xmx${command_mem}m" FilterByOrientationBias \
-            -V ${input_vcf} \
-            -AM ${sep=" -AM " artifact_modes} \
-            -P ${pre_adapter_metrics} \
-            -O ${output_vcf}
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" CalculateContamination -I ~{tumor_pileups} \
+        -O contamination.table --tumor-segmentation segments.table ~{"-matched " + normal_pileups}
     }
 
     runtime {
-        docker: gatk_docker
-        memory: command_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File contamination_table = "contamination.table"
+        File maf_segments = "segments.table"
     }
 }
 
-task oncotate_m2 {
-    # inputs
-    File m2_vcf
-    File? onco_ds_tar_gz
-    String? onco_ds_local_db_dir
-    String? oncotator_exe
-    String? sequencing_center
-    String? sequence_source
-    File? default_config_file
-    String case_id
-    String output_basename
-    String? control_id
-    String? oncotator_extra_args
-
-    Boolean filter_maf
-    String filter_maf_args = if (filter_maf) then " --collapse-filter-cols --prune-filter-cols " else ""
-
-    # runtime
-    String oncotator_docker
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
+task Filter {
+    input {
+      File? intervals
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      File unfiltered_vcf
+      File unfiltered_vcf_idx
+      String output_name
+      Boolean compress
+      File? mutect_stats
+      File? artifact_priors_tar_gz
+      File? contamination_table
+      File? maf_segments
+      String? m2_extra_filtering_args
+
+      Runtime runtime_params
+      Int? disk_space
+    }
 
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
+    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
 
-    command <<<
-        # fail if *any* command below (not just the last) doesn't return 0, in particular if wget fails
-        set -e
+    parameter_meta{
+      ref_fasta: {localization_optional: true}
+      ref_fai: {localization_optional: true}
+      ref_dict: {localization_optional: true}
+    }
 
-        # local db dir is a directory and has been specified
-        if [[ -d "${onco_ds_local_db_dir}" ]]; then
-            echo "Using local db-dir: ${onco_ds_local_db_dir}"
-            echo "THIS ONLY WORKS WITHOUT DOCKER!"
-            ln -s ${onco_ds_local_db_dir} onco_dbdir
-        elif [[ "${onco_ds_tar_gz}" == *.tar.gz ]]; then
-            echo "Using given tar file: ${onco_ds_tar_gz}"
-            mkdir onco_dbdir
-            tar zxvf ${onco_ds_tar_gz} -C onco_dbdir --strip-components 1
-        else
-            echo "Downloading and installing oncotator datasources from Broad FTP site..."
-            # Download and untar the db-dir
-            wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/oncotator/oncotator_v1_ds_April052016.tar.gz
-            tar zxvf oncotator_v1_ds_April052016.tar.gz
-            ln -s oncotator_v1_ds_April052016 onco_dbdir
-        fi
+    command {
+        set -e
 
-        ${default="/root/oncotator_venv/bin/oncotator" oncotator_exe} --db-dir onco_dbdir/ -c $HOME/tx_exact_uniprot_matches.AKT1_CRLF2_FGFR1.txt  \
-            -v ${m2_vcf} ${output_basename}.maf.annotated hg19 -i VCF -o TCGAMAF --skip-no-alt --infer-onps --collapse-number-annotations --log_name oncotator.log \
-            -a Center:${default="Unknown" sequencing_center} \
-            -a source:${default="Unknown" sequence_source} \
-            -a normal_barcode:${control_id} \
-            -a tumor_barcode:${case_id} \
-            ${"--default_config " + default_config_file} \
-            ${filter_maf_args} \
-            ${oncotator_extra_args}
-    >>>
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
+
+        gatk --java-options "-Xmx~{runtime_params.command_mem}m" FilterMutectCalls -V ~{unfiltered_vcf} \
+            -R ~{ref_fasta} \
+            -O ~{output_vcf} \
+            ~{"--contamination-table " + contamination_table} \
+            ~{"--tumor-segmentation " + maf_segments} \
+            ~{"--ob-priors " + artifact_priors_tar_gz} \
+            ~{"-stats " + mutect_stats} \
+            --filtering-stats filtering.stats \
+            ~{m2_extra_filtering_args}
+    }
 
     runtime {
-        docker: oncotator_docker
-        memory: machine_mem + " MB"
-        bootDiskSizeGb: 12
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + select_first([disk_space, runtime_params.disk]) + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        File oncotated_m2_maf="${output_basename}.maf.annotated"
+        File filtered_vcf = "~{output_vcf}"
+        File filtered_vcf_idx = "~{output_vcf_idx}"
+        File filtering_stats = "filtering.stats"
     }
 }
 
-# Calculates sum of a list of floats
-task SumFloats {
-    Array[Float] sizes
-
-    # Runtime parameters
-    Int? preemptible_attempts
-
-    command <<<
-        python -c "print ${sep="+" sizes}"
-    >>>
-
-    output {
-        Float total_size = read_float(stdout())
-    }
-
-    runtime {
-        docker: "python:2.7"
-        disks: "local-disk " + 10 + " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
+task FilterAlignmentArtifacts {
+    input {
+      File ref_fasta
+      File ref_fai
+      File ref_dict
+      File input_vcf
+      File input_vcf_idx
+      File bam
+      File bai
+      String output_name
+      Boolean compress
+      File realignment_index_bundle
+      String? realignment_extra_args
+      Runtime runtime_params
+      Int mem
     }
-}
 
-task Funcotate {
-    # inputs
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File m2_vcf
-    File m2_vcf_index
-    String reference_version
-    String output_name
-    Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
-
-    File? data_sources_tar_gz
-    String? transcript_selection_mode
-    Array[String]? transcript_selection_list
-    Array[String]? annotation_defaults
-    Array[String]? annotation_overrides
-
-    # ==============
-    # Process input args:
-    String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
-    String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
-    String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
-    # ==============
-
-    # runtime
-
-    String gatk_docker
-    File? gatk_override
-    Int? mem
-    Int? preemptible_attempts
-    Int? disk_space_gb
-    Int? cpu
-
-    Boolean use_ssd = false
-
-    # You may have to change the following two parameter values depending on the task requirements
-    Int default_ram_mb = 3000
-    # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
-    Int default_disk_space_gb = 100
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb
-    Int command_mem = machine_mem - 1000
+    String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
 
-    command <<<
-        set -e
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-
-        DATA_SOURCES_TAR_GZ=${data_sources_tar_gz}
-        if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then
-            # We have to download the data sources:
-            echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ"
-            echo "Downloading default data sources..."
-            wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/funcotator_dataSources.v1.0.20180105.tar.gz
-            tar -zxf funcotator_dataSources.v1.0.20180105.tar.gz
-            DATA_SOURCES_FOLDER=funcotator_dataSources.v1.0.20180105
-        else
-            # Extract the tar.gz:
-            mkdir datasources_dir
-            tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
-            DATA_SOURCES_FOLDER="$PWD/datasources_dir"
-        fi
-
-        gatk --java-options "-Xmx${command_mem}m" Funcotator \
-            --data-sources-path $DATA_SOURCES_FOLDER \
-            --ref-version ${reference_version} \
-            -R ${ref_fasta} \
-            -V ${m2_vcf} \
-            -O ${output_vcf} \
-            ${"--transcript-selection-mode " + transcript_selection_mode} \
-            ${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \
-            ${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \
-            ${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides}
-    >>>
+    Int machine_mem = mem
+    Int command_mem = machine_mem - 500
 
-    runtime {
-        docker: gatk_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 3])
-        cpu: select_first([cpu, 1])
+    parameter_meta{
+      ref_fasta: {localization_optional: true}
+      ref_fai: {localization_optional: true}
+      ref_dict: {localization_optional: true}
+      input_vcf: {localization_optional: true}
+      input_vcf_idx: {localization_optional: true}
+      bam: {localization_optional: true}
+      bai: {localization_optional: true}
     }
 
-    output {
-        File funcotated_vcf = "${output_vcf}"
-        File funcotated_vcf_index = "${output_vcf_index}"
-    }
-}
-
-task CallableLoci {
-    String output_basename
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
-    File? intervals
-
-    String tag_docker
-    File? gatk3_override
-    File? context_script_override
-
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? mem
-    Int? cpu
-
-    # Cutoff to judge covered bases
-    Int? tumor_coverage
-    Int? normal_coverage
-    Int tumor_cutoff = select_first([tumor_coverage,14])
-    Int normal_cutoff = select_first([normal_coverage,8])
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
-
-    command <<<
+    command {
         set -e
-        export GATK_JAR=${default="/usr/tag/GATK36.jar" gatk3_override}
-        export CONTEXT_PY=${default="/usr/tag/kmer_freq.py" context_script_override}
-
-        java "-Xmx${command_mem}m" -jar $GATK_JAR -T CallableLoci \
-            -I ${tumor_bam} \
-            -R ${ref_fasta} \
-            --minMappingQuality 20 \
-            --minBaseQuality 20 \
-            --minDepth ${tumor_cutoff} \
-            ${"-L " + intervals} \
-            -o tumor_callable.bed \
-            --summary tumor_callable.summary
-
-        if [[ -f "${normal_bam}" ]]; then
-            java "-Xmx${command_mem}m" -jar $GATK_JAR -T CallableLoci \
-                -I ${normal_bam} \
-                -R ${ref_fasta} \
-                --minMappingQuality 20 \
-                --minBaseQuality 20 \
-                --minDepth ${normal_cutoff} \
-                ${"-L " + intervals} \
-                -o normal_callable.bed \
-                --summary normal_callable.summary
-
-            bedtools intersect -a <(grep 'CALLABLE' tumor_callable.bed) \
-                               -b <(grep 'CALLABLE' normal_callable.bed) > ${output_basename}_callable.bed
-        else
-            grep 'CALLABLE' tumor_callable.bed > ${output_basename}_callable.bed
-        fi
 
-        # Tally callable bases from BED
-        awk 'BEGIN{sum=0}{sum+=$3-$2}END{print(sum)}' ${output_basename}_callable.bed > callable_bases.txt
+        export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
 
-        # Obtain callable bases in 3-base contexts
-        # awk command is for including flanking bases
-        awk 'BEGIN{OFS="\t"; FS="\t"}{$2-=1; $3+=1; print $0}' ${output_basename}_callable.bed | \
-        bedtools getfasta -fi ${ref_fasta} -bed stdin | \
-        python $CONTEXT_PY 3 - > ${output_basename}_context.txt
-    >>>
+        gatk --java-options "-Xmx~{command_mem}m" FilterAlignmentArtifacts \
+            -R ~{ref_fasta} \
+            -V ~{input_vcf} \
+            -I ~{bam} \
+            --bwa-mem-index-image ~{realignment_index_bundle} \
+            ~{realignment_extra_args} \
+            -O ~{output_vcf}
+    }
 
     runtime {
-        docker: tag_docker
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
         memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 12]) + " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        cpu: select_first([cpu, 1])
+        disks: "local-disk " + runtime_params.disk + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
 
     output {
-        String callable_bases = read_string("callable_bases.txt")
-        File callable_regions = "${output_basename}_callable.bed"
-        File callable_contexts = "${output_basename}_context.txt"
+        File filtered_vcf = "~{output_vcf}"
+        File filtered_vcf_idx = "~{output_vcf_idx}"
     }
 }
 
-task MutationalBurden {
-    String output_basename
-    File input_maf
-    String callable_bases
-    File? mb_script_override
-
-    # runtime
-    String tag_docker
-    Int? preemptible_attempts
-    Int? disk_space
-    Int? mem
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
-
-    command <<<
-        set -e
-        export MB_PY=${default="/usr/tag/scripts/mutburden.py" mb_script_override}
-
-        python $MB_PY --sample-id ${output_basename} ${callable_bases} ${input_maf}
-
-        # Extract values for displaying in FireCloud data table
-        grep "^total_variants" ${output_basename}.mutational_burden.txt | cut -f2 > total_variants.txt
-        grep "^coding_variants" ${output_basename}.mutational_burden.txt | cut -f2 > coding_variants.txt
-        grep "^noncoding_variants" ${output_basename}.mutational_burden.txt | cut -f2 > noncoding_variants.txt
-        grep "^coding_mutations_per_Mb" ${output_basename}.mutational_burden.txt | cut -f2 > coding_mb.txt
-        grep "^noncoding_mutations_per_Mb" ${output_basename}.mutational_burden.txt | cut -f2 > noncoding_mb.txt
-    >>>
-
-    output  {
-        File mutational_burden="${output_basename}.mutational_burden.txt"
-        String total_variants = read_string("total_variants.txt")
-        String coding_variants = read_string("coding_variants.txt")
-        String noncoding_variants = read_string("noncoding_variants.txt")
-        String coding_mutations_per_mb = read_string("coding_mb.txt")
-        String noncoding_mutations_per_mb = read_string("noncoding_mb.txt")
-    }
+task Funcotate {
+     input {
+       File ref_fasta
+       File ref_fai
+       File ref_dict
+       File input_vcf
+       File input_vcf_idx
+       String reference_version
+       String output_file_base_name
+       String output_format
+       Boolean compress
+       Boolean use_gnomad
+       # This should be updated when a new version of the data sources is released
+       # TODO: Make this dynamically chosen in the command.
+       File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz"
+       String? control_id
+       String? case_id
+       String? sequencing_center
+       String? sequence_source
+       String? transcript_selection_mode
+       File? transcript_selection_list
+       Array[String]? annotation_defaults
+       Array[String]? annotation_overrides
+       Array[String]? funcotator_excluded_fields
+       Boolean? filter_funcotations
+       File? interval_list
+
+       String? extra_args
+
+       # ==============
+       Runtime runtime_params
+       Int? disk_space   #override to request more disk than default small task params
+
+       # You may have to change the following two parameter values depending on the task requirements
+       Int default_ram_mb = 3000
+       # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
+       Int default_disk_space_gb = 100
+     }
+
+     # ==============
+     # Process input args:
+     String output_maf = output_file_base_name + ".maf"
+     String output_maf_index = output_maf + ".idx"
+     String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf"
+     String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
+     String output_file = if output_format == "MAF" then output_maf else output_vcf
+     String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_idx
+     String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
+     String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
+     String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
+     String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else ""
+     String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else ""
+     String interval_list_arg = if defined(interval_list) then " -L " else ""
+     String extra_args_arg = select_first([extra_args, ""])
+
+     String dollar = "$"
+
+     parameter_meta{
+      ref_fasta: {localization_optional: true}
+      ref_fai: {localization_optional: true}
+      ref_dict: {localization_optional: true}
+      input_vcf: {localization_optional: true}
+      input_vcf_idx: {localization_optional: true}
+     }
+
+     command <<<
+         set -e
+         export GATK_LOCAL_JAR=~{default="/root/gatk.jar" runtime_params.gatk_override}
+
+         # Extract our data sources:
+         echo "Extracting data sources zip file..."
+         mkdir datasources_dir
+         tar zxvf ~{data_sources_tar_gz} -C datasources_dir --strip-components 1
+         DATA_SOURCES_FOLDER="$PWD/datasources_dir"
+
+         # Handle gnomAD:
+         if ~{use_gnomad} ; then
+             echo "Enabling gnomAD..."
+             for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do
+                 if [[ -f ~{dollar}{DATA_SOURCES_FOLDER}/~{dollar}{potential_gnomad_gz} ]] ; then
+                     cd ~{dollar}{DATA_SOURCES_FOLDER}
+                     tar -zvxf ~{dollar}{potential_gnomad_gz}
+                     cd -
+                 else
+                     echo "ERROR: Cannot find gnomAD folder: ~{dollar}{potential_gnomad_gz}" 1>&2
+                     false
+                 fi
+             done
+         fi
+
+         # Run Funcotator:
+         gatk --java-options "-Xmx~{runtime_params.command_mem}m" Funcotator \
+             --data-sources-path $DATA_SOURCES_FOLDER \
+             --ref-version ~{reference_version} \
+             --output-file-format ~{output_format} \
+             -R ~{ref_fasta} \
+             -V ~{input_vcf} \
+             -O ~{output_file} \
+             ~{interval_list_arg} ~{default="" interval_list} \
+             --annotation-default normal_barcode:~{default="Unknown" control_id} \
+             --annotation-default tumor_barcode:~{default="Unknown" case_id} \
+             --annotation-default Center:~{default="Unknown" sequencing_center} \
+             --annotation-default source:~{default="Unknown" sequence_source} \
+             ~{"--transcript-selection-mode " + transcript_selection_mode} \
+             ~{transcript_selection_arg}~{default="" sep=" --transcript-list " transcript_selection_list} \
+             ~{annotation_def_arg}~{default="" sep=" --annotation-default " annotation_defaults} \
+             ~{annotation_over_arg}~{default="" sep=" --annotation-override " annotation_overrides} \
+             ~{excluded_fields_args}~{default="" sep=" --exclude-field " funcotator_excluded_fields} \
+             ~{filter_funcotations_args} \
+             ~{extra_args_arg}
+         # Make sure we have a placeholder index for MAF files so this workflow doesn't fail:
+         if [[ "~{output_format}" == "MAF" ]] ; then
+            touch ~{output_maf_index}
+         fi
+     >>>
 
     runtime {
-        docker: tag_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 10]) + " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
+        docker: runtime_params.gatk_docker
+        bootDiskSizeGb: runtime_params.boot_disk_size
+        memory: runtime_params.machine_mem + " MB"
+        disks: "local-disk " + select_first([disk_space, runtime_params.disk]) + " HDD"
+        preemptible: runtime_params.preemptible
+        maxRetries: runtime_params.max_retries
+        cpu: runtime_params.cpu
     }
-}
-
-task LegoPlot {
-    File input_file
-    String input_file_format
-    String output_prefix
-    Boolean is_whole_genome
-    String precomputed_option = if is_whole_genome then "--mutsig-genome" else "--mutsig-exome"
-
-    File? plotter_override
-    File? renderer_override
-    File? ref_fasta
-    File? callable_contexts
-
-    # runtime
-    String tag_docker
-    Int? disk_space
-    Int? mem
-    Int? preemptible_attempts
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 3500
-    Int command_mem = machine_mem - 500
 
-    command <<<
-
-        export PLOTTER_SRC=${default="/usr/tag/scripts/lego-plot.py" plotter_override}
-        export RENDERER_SRC=${default="/usr/tag/scripts/lego-report.py" renderer_override}
-
-        # Mutation rate spectrum
-        python $PLOTTER_SRC --plot-title "${output_prefix}: MutSig 2CV precomputed callable regions" \
-                            --output-prefix precomputed_rate \
-                            ${precomputed_option} \
-                            ${"-s " + ref_fasta} \
-                            ${input_file_format} ${input_file}
-        if [[ -f "${callable_contexts}"  ]]; then
-           python $PLOTTER_SRC --plot-title "${output_prefix}: Sample callable regions" \
-                               --output-prefix sample_rate \
-                               --user-coverage ${callable_contexts} \
-                               ${"-s " + ref_fasta} \
-                               ${input_file_format} ${input_file}
-        fi
-
-        # Mutation count spectrum
-        python $PLOTTER_SRC --plot-title "${output_prefix}: All variants" \
-                            --all-variants \
-                            --output-prefix all_count \
-                            ${"-s " + ref_fasta} \
-                            ${input_file_format} ${input_file}
-        python $PLOTTER_SRC --plot-title "${output_prefix}: PASSed variants" \
-                            --output-prefix pass_count \
-                            ${"-s " + ref_fasta} \
-                            ${input_file_format} ${input_file}
-
-        # MAF ONLY: Mutation count spectrum sliced by allele fraction
-        if [[  "${input_file_format}" == "maf" ]]; then
-            python $PLOTTER_SRC --plot-title "0 <= AF < 0.1" \
-                                --output-prefix af_0_01 \
-                                --af-slice 0 0.1 maf ${input_file}
-            python $PLOTTER_SRC --plot-title "0.1 <= AF < 0.25" \
-                                --output-prefix af_01_025 \
-                                --af-slice 0.1 0.25 maf ${input_file}
-            python $PLOTTER_SRC --plot-title "0.25 <= AF < 0.5" \
-                                --output-prefix af_025_05 \
-                                --af-slice 0.25 0.5 maf ${input_file}
-            python $PLOTTER_SRC --plot-title "0.5 <= AF < 1" \
-                                --output-prefix af_05_1 \
-                                --af-slice 0.5 1 maf ${input_file}
-            ALLELE_SLICE_PDF="--allele-slice af_0_01.pdf af_025_05.pdf af_01_025.pdf af_05_1.pdf"
-        fi
-
-        # Summarize lego plots into slides
-        python $RENDERER_SRC --output-prefix ${output_prefix} \
-                             --mutation-rate `ls *_rate.pdf` \
-                             --mutation-count all_count.pdf pass_count.pdf \
-                             $ALLELE_SLICE_PDF
-        pdflatex ${output_prefix}.tex
-    >>>
-    runtime {
-        docker: tag_docker
-        memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 10]) + " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-    }
-    output {
-        File lego_plot = "${output_prefix}.pdf"
-    }
+     output {
+         File funcotated_output_file = "~{output_file}"
+         File funcotated_output_file_index = "~{output_file_index}"
+     }
 }