From 47a44afe8149f80d8a80ebee9a165da494276598 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 15:26:39 -0400 Subject: [PATCH 01/22] first commit of CNV profiler --- CNV-Profiler/CNV-Profiler.inputs.json | 37 ++++ CNV-Profiler/CNV-Profiler.wdl | 279 ++++++++++++++++++++++++++ 2 files changed, 316 insertions(+) create mode 100644 CNV-Profiler/CNV-Profiler.inputs.json create mode 100644 CNV-Profiler/CNV-Profiler.wdl diff --git a/CNV-Profiler/CNV-Profiler.inputs.json b/CNV-Profiler/CNV-Profiler.inputs.json new file mode 100644 index 0000000..e10b888 --- /dev/null +++ b/CNV-Profiler/CNV-Profiler.inputs.json @@ -0,0 +1,37 @@ +{ + "CNV_Profiler.CramToBam.mem": "Int (optional, default = 64)", + "CNV_Profiler.SamtoolsDepth.cpu": "Int? (optional)", + "CNV_Profiler.GetPaddedCnvBed.mem_gb": "Int (optional, default = 1)", + "CNV_Profiler.SamtoolsDepth.disk_size_gb": "Int? (optional)", + "CNV_Profiler.cnvDepthProfiler.maxRetries": "Int (optional, default = 1)", + "CNV_Profiler.cramOrBamIndexFile": "File", + "CNV_Profiler.HeterozygosityCheck.maxRetries": "Int (optional, default = 1)", + "CNV_Profiler.HeterozygosityCheck.cpu": "Int (optional, default = 8)", + "CNV_Profiler.cramOrBamFile": "File", + "CNV_Profiler.HeterozygosityCheck.HG2_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz\")", + "CNV_Profiler.HeterozygosityCheck.mem_gb": "Int (optional, default = 64)", + "CNV_Profiler.cnvDepthProfiler.cpu": "Int (optional, default = 8)", + "CNV_Profiler.cnvDepthProfiler.preemptible": "Int (optional, default = 0)", + "CNV_Profiler.SamtoolsDepth.minBaseQuality": "Int (optional, default = 20)", + "CNV_Profiler.heterozygosityCheck": "Boolean (optional, default = false)", + "CNV_Profiler.CramToBam.cpu": "Int (optional, default = 8)", + "CNV_Profiler.HeterozygosityCheck.disk_size_gb": "Int (optional, default = 500)", + "CNV_Profiler.hardFilteredVcfFile": "File? (optional)", + "CNV_Profiler.GetPaddedCnvBed.cpu": "Int (optional, default = 1)", + "CNV_Profiler.referenceFasta": "File", + "CNV_Profiler.GetPaddedCnvBed.disk_size_gb": "Int (optional, default = 10)", + "CNV_Profiler.cnvDepthProfiler.mem_gb": "Int (optional, default = 64)", + "CNV_Profiler.cnvDepthProfiler.disk_size_gb": "Int (optional, default = 500)", + "CNV_Profiler.sampleName": "String", + "CNV_Profiler.cnvProfiler_Docker": "String (optional, default = \"us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2\")", + "CNV_Profiler.referenceDict": "File", + "CNV_Profiler.SamtoolsDepth.samtools_docker": "String (optional, default = \"euformatics/samtools:1.20\")", + "CNV_Profiler.referenceFastaIndex": "File", + "CNV_Profiler.SamtoolsDepth.minMappingQuality": "Int (optional, default = 20)", + "CNV_Profiler.HeterozygosityCheck.HG1_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz\")", + "CNV_Profiler.SamtoolsDepth.mem_gb": "Int? (optional)", + "CNV_Profiler.CramToBam.disk_size": "Int (optional, default = 500)", + "CNV_Profiler.HeterozygosityCheck.preemptible": "Int (optional, default = 0)", + "CNV_Profiler.cnvBedFile": "File" +} + diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl new file mode 100644 index 0000000..1678f8d --- /dev/null +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -0,0 +1,279 @@ +version 1.0 + +workflow CNV_Profiler { + input{ + String sampleName + String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2" + File cramOrBamFile + File cramOrBamIndexFile + File referenceFasta + File referenceFastaIndex + File referenceDict + File cnvBedFile + Boolean heterozygosityCheck = false + File? hardFilteredVcfFile + } + if (basename(cramOrBamFile) != basename(cramOrBamFile, ".cram")) { + call CramToBam { + input: + sampleName = sampleName, + cramFile = cramOrBamFile, + cramIndexFile = cramOrBamIndexFile, + referenceFasta = referenceFasta, + referenceFastaIndex = referenceFastaIndex, + referenceDict = referenceDict + } + } + File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam]) + File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai]) + call GetPaddedCnvBed { + input: + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + call SamtoolsDepth { + input: + sampleName = sampleName, + alignedBam = alignedBam, + alignedBai = alignedBai, + target_bed = GetPaddedCnvBed.paddedCnvBed + + } + call cnvDepthProfiler { + input: + sampleName = sampleName, + depthProfile = SamtoolsDepth.depth_profile, + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + if (heterozygosityCheck) { + call HeterozygosityCheck { + input: + sampleName = sampleName, + hardFilteredVcfFile = hardFilteredVcfFile, + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + } + output { + File samtools_depth_profile = SamtoolsDepth.depth_profile + Array[File] cnv_depth_profile = cnvDepthProfiler.cnv_depth_profile + Array[File]? heterozygosity_plot = HeterozygosityCheck.heterozygosity_plot + } + meta { + description: "This workflow takes a BAM or CRAM file and a CNV bed file as input and generates a coverage profile for the CNV regions in the bed file. Optionally, it can also generate a heterozygosity plot using a hard-filtered VCF file." + author: "Yueyao Gao" + email: "tag@broadinstitute.org" + } +} + + +task CramToBam { + input { + File referenceFasta + File referenceFastaIndex + File referenceDict + #cram and crai must be optional since Normal cram is optional + File? cramFile + File? cramIndexFile + String sampleName + Int disk_size = 500 + Int mem = 64 + Int cpu = 8 + } + + Int machine_mem = if defined(mem) then mem * 1000 else 6000 + + #Calls samtools view to do the conversion + command <<< + set -e + set -o pipefail + + samtools view -h -T ~{referenceFasta} ~{cramFile} | + samtools view -b -o ~{sampleName}.bam - + samtools index -b ~{sampleName}.bam + mv ~{sampleName}.bam.bai ~{sampleName}.bai + >>> + + runtime { + docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" + cpu: cpu + memory: machine_mem + " MB" + disks: "local-disk " + disk_size + " SSD" + } + + output { + File output_bam = "~{sampleName}.bam" + File output_bai = "~{sampleName}.bai" + } +} + +task GetPaddedCnvBed { + input { + File cnvBedFile + String cnvProfiler_Docker + Int mem_gb = 1 + Int cpu = 1 + Int disk_size_gb = 10 + } + + command <<< + source activate env_viz + python3 <>> + runtime { + docker: cnvProfiler_Docker + cpu: cpu + memory: mem_gb + " GB" + disks: "local-disk " + disk_size_gb + " HDD" + } + output { + File paddedCnvBed = "padded_cnv.bed" + } +} + +task SamtoolsDepth { + input { + String sampleName + File alignedBam + File alignedBai + File target_bed + Int minBaseQuality = 20 + Int minMappingQuality = 20 + Int? mem_gb + Int? cpu + Int? disk_size_gb + String samtools_docker = "euformatics/samtools:1.20" + } + command <<< + # Create directories for input & output + mkdir input + mkdir output + readlink -f ~{alignedBam} > input/bam_path.txt + + # Run samtools depth + # Counting fragments instead of reads using -s option + samtools depth \ + -b ~{target_bed} \ + -f input/bam_path.txt \ + --min-BQ ~{minBaseQuality} \ + --min-MQ ~{minMappingQuality} \ + -s \ + -o output/~{sampleName}_samtools.depth + + >>> + output { + File depth_profile = "output/~{sampleName}_samtools.depth" + } + runtime { + memory: select_first([mem_gb, 7]) * 1000 + " MB" + cpu: select_first([cpu, 1]) + docker: samtools_docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: 0 + maxRetries: 3 + } +} + +task cnvDepthProfiler{ + input { + String sampleName + String cnvProfiler_Docker + File depthProfile + File cnvBedFile + Int mem_gb = 64 + Int cpu = 8 + Int preemptible = 0 + Int disk_size_gb = 500 + Int maxRetries = 1 + } + command <<< + set -e + mkdir output + + # Run the coverage profile visualization script + conda run --no-capture-output \ + -n env_viz \ + python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \ + -c ~{depthProfile} \ + -b ~{cnvBedFile} \ + -n output/~{sampleName} + + >>> + output { + Array[File] cnv_depth_profile = glob("output/*png") + } + runtime { + memory: mem_gb + " GB" + cpu: cpu + docker: cnvProfiler_Docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: preemptible + maxRetries: maxRetries + } +} + +task HeterozygosityCheck{ + input { + String sampleName + String cnvProfiler_Docker + File HG1_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz" + File HG2_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz" + File? hardFilteredVcfFile + File cnvBedFile + Int mem_gb = 64 + Int cpu = 8 + Int preemptible = 0 + Int disk_size_gb = 500 + Int maxRetries = 1 + } + command <<< + set -e + mkdir output + + # Run the coverage profile visualization script + conda run --no-capture-output \ + -n env_viz \ + python3 /BaseImage/CovProfileViz/scripts/CNV_SNP_HET_Profiler.py \ + -v1 ~{hardFilteredVcfFile} \ + -v2 ~{HG1_vcf_path} \ + -v3 ~{HG2_vcf_path} \ + -b ~{cnvBedFile} \ + -n1 ~{sampleName} \ + -n2 HG001 \ + -n3 HG002 \ + -o output/~{sampleName} + + >>> + output { + Array[File] heterozygosity_plot = glob("output/*png") + } + runtime { + memory: mem_gb + " GB" + cpu: cpu + docker: cnvProfiler_Docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: preemptible + maxRetries: maxRetries + } +} + + + + + + + From c6aa0f398083329373f03be74b2898d40c6e82d7 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 15:33:05 -0400 Subject: [PATCH 02/22] added to dockstore yml --- .dockstore.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.dockstore.yml b/.dockstore.yml index 4ce4927..2f9775e 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -129,3 +129,8 @@ workflows: primaryDescriptorPath: /PECGS-QUICviz/QUICviz.wdl testParameterFiles: - /PECGS-QUICviz/QUICviz.inputs.json + - name: CNV-Profiler + subclass: WDL + primaryDescriptorPath: /CNV-Profiler/CNV-Profiler.wdl + testParameterFiles: + - /CNV-Profiler/CNV-Profiler.inputs.json From 7bf2094430eda13199cd694a20b40c6493bf6aa1 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 17:02:56 -0400 Subject: [PATCH 03/22] updated the padded command --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 1678f8d..8dc0a22 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -121,7 +121,7 @@ task GetPaddedCnvBed { source activate env_viz python3 < Date: Tue, 2 Jul 2024 17:23:02 -0400 Subject: [PATCH 04/22] update SSD disk requirement --- CNV-Profiler/CNV-Profiler.wdl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 8dc0a22..79dad90 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -153,9 +153,10 @@ task SamtoolsDepth { File target_bed Int minBaseQuality = 20 Int minMappingQuality = 20 - Int? mem_gb - Int? cpu - Int? disk_size_gb + Int mem_gb = 32 + Int cpu = 4 + Int disk_size_gb = 500 + Boolean use_ssd = true String samtools_docker = "euformatics/samtools:1.20" } command <<< @@ -179,10 +180,10 @@ task SamtoolsDepth { File depth_profile = "output/~{sampleName}_samtools.depth" } runtime { - memory: select_first([mem_gb, 7]) * 1000 + " MB" - cpu: select_first([cpu, 1]) + memory: mem_gb * 1000 + " MB" + cpu: cpu docker: samtools_docker - disks: "local-disk ~{disk_size_gb} SSD" + disks: "local-disk " + disk_size_gb + if use_ssd then " SSD" else " HDD" preemptible: 0 maxRetries: 3 } @@ -199,6 +200,7 @@ task cnvDepthProfiler{ Int preemptible = 0 Int disk_size_gb = 500 Int maxRetries = 1 + Boolean use_ssd = true } command <<< set -e @@ -220,7 +222,7 @@ task cnvDepthProfiler{ memory: mem_gb + " GB" cpu: cpu docker: cnvProfiler_Docker - disks: "local-disk ~{disk_size_gb} SSD" + disks: "local-disk " + disk_size_gb + if use_ssd then " SSD" else " HDD" preemptible: preemptible maxRetries: maxRetries } @@ -239,6 +241,7 @@ task HeterozygosityCheck{ Int preemptible = 0 Int disk_size_gb = 500 Int maxRetries = 1 + Boolean use_ssd = true } command <<< set -e @@ -265,7 +268,7 @@ task HeterozygosityCheck{ memory: mem_gb + " GB" cpu: cpu docker: cnvProfiler_Docker - disks: "local-disk ~{disk_size_gb} SSD" + disks: "local-disk " + disk_size_gb + if use_ssd then " SSD" else " HDD" preemptible: preemptible maxRetries: maxRetries } From adaa808283605d79e562e314d61e2988961228a1 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 22:24:28 -0400 Subject: [PATCH 05/22] bed file format bug fix --- CNV-Profiler/CNV-Profiler.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 79dad90..808bbdf 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -131,7 +131,10 @@ task GetPaddedCnvBed { with open('padded_cnv.bed', 'a') as f: for interval in padded_cnv_interval_list: - f.write(interval + '\n') + chrom = interval.split(':')[0] + start = interval.split(':')[1].split('-')[0] + end = interval.split(':')[1].split('-')[1] + f.write("{chr}\t{start}\t{end}" + '\n') CODE >>> runtime { From 8b2e677d5befc359a5bcb5bd4667174935ec02df Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 22:44:48 -0400 Subject: [PATCH 06/22] bug fix --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 808bbdf..26f8272 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -134,7 +134,7 @@ task GetPaddedCnvBed { chrom = interval.split(':')[0] start = interval.split(':')[1].split('-')[0] end = interval.split(':')[1].split('-')[1] - f.write("{chr}\t{start}\t{end}" + '\n') + f.write(f"{chr}\t{start}\t{end}" + '\n') CODE >>> runtime { From b69663b4b16bf731c519a11e6bb4bf4b2349ed6a Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 23:01:29 -0400 Subject: [PATCH 07/22] bug fix for padded section --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 26f8272..e3798bd 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -131,7 +131,7 @@ task GetPaddedCnvBed { with open('padded_cnv.bed', 'a') as f: for interval in padded_cnv_interval_list: - chrom = interval.split(':')[0] + chr = interval.split(':')[0] start = interval.split(':')[1].split('-')[0] end = interval.split(':')[1].split('-')[1] f.write(f"{chr}\t{start}\t{end}" + '\n') From 6fd175f33baf480ba9dd200c388e73ae9a19132a Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 2 Jul 2024 23:52:06 -0400 Subject: [PATCH 08/22] argument name bug fix --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index e3798bd..6487716 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -215,7 +215,7 @@ task cnvDepthProfiler{ python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \ -c ~{depthProfile} \ -b ~{cnvBedFile} \ - -n output/~{sampleName} + -o output/~{sampleName} >>> output { From 27e3155a6beb7404274763a0cf1ee777c629149c Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Mon, 22 Jul 2024 12:56:53 -0400 Subject: [PATCH 09/22] updated the depth viz --- CNV-Profiler/CNV-Profiler.wdl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 6487716..c9a5f2a 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -3,7 +3,7 @@ version 1.0 workflow CNV_Profiler { input{ String sampleName - String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2" + String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.3" File cramOrBamFile File cramOrBamIndexFile File referenceFasta @@ -198,6 +198,9 @@ task cnvDepthProfiler{ String cnvProfiler_Docker File depthProfile File cnvBedFile + File? secondBedFile + Int smooth_window = 5000 + Int intervalPadding = 0 Int mem_gb = 64 Int cpu = 8 Int preemptible = 0 @@ -215,7 +218,12 @@ task cnvDepthProfiler{ python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \ -c ~{depthProfile} \ -b ~{cnvBedFile} \ - -o output/~{sampleName} + -n ~{sampleName} \ + -sb ~{secondBedFile} \ + -sn DRAGEN \ + -p ~{intervalPadding} \ + -s ~{smooth_window} \ + -o output >>> output { From a64c53670e65bbcdba1534ac624663d06b8390cb Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 23 Jul 2024 08:23:49 -0400 Subject: [PATCH 10/22] adjust the padding logic --- CNV-Profiler/CNV-Profiler.wdl | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index c9a5f2a..51c701e 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -29,6 +29,7 @@ workflow CNV_Profiler { call GetPaddedCnvBed { input: cnvBedFile = cnvBedFile, + referenceDict = referenceDict, cnvProfiler_Docker = cnvProfiler_Docker } call SamtoolsDepth { @@ -111,6 +112,7 @@ task CramToBam { task GetPaddedCnvBed { input { File cnvBedFile + File referenceDict String cnvProfiler_Docker Int mem_gb = 1 Int cpu = 1 @@ -120,6 +122,21 @@ task GetPaddedCnvBed { command <<< source activate env_viz python3 < length_dict[chr]: + padding_end = length_dict[chr] + else: + padding_end = initial_padding_end + # Add the padded interval to the list + padded_cnv_interval_list.append(f'{chr}:{padding_start}-{padding_end}') + with open('padded_cnv.bed', 'a') as f: for interval in padded_cnv_interval_list: From b2e9d276b2b0d7d5d97c444aa09158bfc06f96a9 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 23 Jul 2024 08:43:26 -0400 Subject: [PATCH 11/22] fixed syntax error --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 51c701e..fe47b31 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -126,7 +126,7 @@ task GetPaddedCnvBed { # Read the reference dictionary file to get the chromosome lengths length_dict = {} - with open(~{referenceDict}, 'r') as f: + with open('~{referenceDict}', 'r') as f: for i in f.readlines(): if i.startswith('@SQ\tSN'): chrom = i.split('\t')[1].split('SN:')[1] From d1e43be1510b26787b638f393e1bec3a262fbf17 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 23 Jul 2024 13:52:27 -0400 Subject: [PATCH 12/22] docker image update with output path changes --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index fe47b31..d490b07 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -298,7 +298,7 @@ task HeterozygosityCheck{ -n1 ~{sampleName} \ -n2 HG001 \ -n3 HG002 \ - -o output/~{sampleName} + -o output >>> output { From bcf98a8a1831243d6d6243e83b999e7f5868e557 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Wed, 31 Jul 2024 15:24:05 -0400 Subject: [PATCH 13/22] update WDL according to viz tool change --- CNV-Profiler/CNV-Profiler.wdl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index d490b07..dd0f543 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -45,7 +45,8 @@ workflow CNV_Profiler { sampleName = sampleName, depthProfile = SamtoolsDepth.depth_profile, cnvBedFile = cnvBedFile, - cnvProfiler_Docker = cnvProfiler_Docker + cnvProfiler_Docker = cnvProfiler_Docker, + PaddedcnvBedFile = GetPaddedCnvBed.paddedCnvBed } if (heterozygosityCheck) { call HeterozygosityCheck { @@ -227,8 +228,7 @@ task cnvDepthProfiler{ String cnvProfiler_Docker File depthProfile File cnvBedFile - File? secondBedFile - Int smooth_window = 5000 + File PaddedcnvBedFile Int intervalPadding = 0 Int mem_gb = 64 Int cpu = 8 @@ -248,10 +248,8 @@ task cnvDepthProfiler{ -c ~{depthProfile} \ -b ~{cnvBedFile} \ -n ~{sampleName} \ - -sb ~{secondBedFile} \ - -sn DRAGEN \ + -pd ~{PaddedcnvBedFile} \ -p ~{intervalPadding} \ - -s ~{smooth_window} \ -o output >>> From 4d53ceefea76c101ec3035aaf93af81530a31d8a Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Wed, 31 Jul 2024 16:33:23 -0400 Subject: [PATCH 14/22] bug fix wrong arg name --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index dd0f543..2ba3b18 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -248,7 +248,7 @@ task cnvDepthProfiler{ -c ~{depthProfile} \ -b ~{cnvBedFile} \ -n ~{sampleName} \ - -pd ~{PaddedcnvBedFile} \ + -pb ~{PaddedcnvBedFile} \ -p ~{intervalPadding} \ -o output From 413d9d26f472fe1590438319c2a1fb18ce2c4bed Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 20:12:25 -0400 Subject: [PATCH 15/22] acccept CNV intervals --- CNV-Profiler/CNV-Profiler.wdl | 81 ++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 2ba3b18..35ea27a 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -9,11 +9,14 @@ workflow CNV_Profiler { File referenceFasta File referenceFastaIndex File referenceDict - File cnvBedFile + File? cnvBedFile + Array[String]? cnvIntervals Boolean heterozygosityCheck = false File? hardFilteredVcfFile } - if (basename(cramOrBamFile) != basename(cramOrBamFile, ".cram")) { + Boolean isCram = basename(cramOrBamFile) != basename(cramOrBamFile, ".cram") + + if (isCram) { call CramToBam { input: sampleName = sampleName, @@ -24,8 +27,27 @@ workflow CNV_Profiler { referenceDict = referenceDict } } + File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam]) File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai]) + + call ValidateCnvInputs { + input: + cnvBedFile = cnvBedFile, + cnvIntervals = cnvIntervals, + cnvProfiler_Docker = cnvProfiler_Docker + } + + if (defined(cnvIntervals)) { + call CreateBedFromIntervals { + input: + cnvIntervals = cnvIntervals, + cnvProfiler_Docker = cnvProfiler_Docker + } + } + + File cnvBedFile = select_first([cnvBedFile, CreateBedFromIntervals.output_bed]) + call GetPaddedCnvBed { input: cnvBedFile = cnvBedFile, @@ -110,6 +132,61 @@ task CramToBam { } } +task ValidateCnvInputs { + input { + File? cnvBedFile + Array[String]? cnvIntervals + String cnvProfiler_Docker + Int mem_gb = 1 + Int cpu = 1 + Int disk_size_gb = 10 + } + command <<< + if [[ -n "${cnvBedFile}" && -n "${cnvIntervals}" ]]; then + echo "Both CNV bed file and CNV intervals were provided. Please provide only one." 1>&2 + exit 1 + elif [[ -z "${cnvBedFile}" && -z "${cnvIntervals}" ]]; then + echo "Neither CNV bed file nor CNV intervals were provided. Please provide one." 1>&2 + exit 1 + else + echo "Input validation passed." + fi + >>> + runtime { + docker: cnvProfiler_Docker + cpu: cpu + memory: mem_gb + " GB" + disks: "local-disk " + disk_size_gb + " HDD" + } + output { + String cnv_input_validation = read_string(stdout()) + } +} + +task CreateBedFromIntervals { + input { + Array[String] cnvIntervals + String cnvProfiler_Docker + Int mem_gb = 1 + Int cpu = 1 + Int disk_size_gb = 10 + } + command <<< + for interval in ~{join(cnvIntervals, " ")}; do + echo $interval | tr ':' '\t' | tr '-' '\t' >> cnv_intervals.bed + done + >>> + runtime { + docker: cnvProfiler_Docker + cpu: cpu + memory: mem_gb + " GB" + disks: "local-disk " + disk_size_gb + " HDD" + } + output { + File output_bed = "cnv_intervals.bed" + } +} + task GetPaddedCnvBed { input { File cnvBedFile From ba11915d05b0be6eba494e024c1ce8f9d074f1d3 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 20:37:01 -0400 Subject: [PATCH 16/22] change the create BED script from bash to python --- CNV-Profiler/CNV-Profiler.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 35ea27a..219446e 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -165,16 +165,22 @@ task ValidateCnvInputs { task CreateBedFromIntervals { input { - Array[String] cnvIntervals + Array[String]? cnvIntervals String cnvProfiler_Docker Int mem_gb = 1 Int cpu = 1 Int disk_size_gb = 10 } command <<< - for interval in ~{join(cnvIntervals, " ")}; do - echo $interval | tr ':' '\t' | tr '-' '\t' >> cnv_intervals.bed - done + source activate env_viz + python3 <>> runtime { docker: cnvProfiler_Docker From e42d5691939fa9dbb52f2e1aaf38f1ac3a18272d Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 21:03:26 -0400 Subject: [PATCH 17/22] updated create bed from interval task --- CNV-Profiler/CNV-Profiler.wdl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 219446e..94d557d 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -172,10 +172,21 @@ task CreateBedFromIntervals { Int disk_size_gb = 10 } command <<< + # Write the CNV intervals to a file + cnvIntervals=(~{sep=" " cnvIntervals}) + for interval in "${cnvIntervals[@]}"; do + echo $interval >> cnv_intervals.txt + done + + # Create a bed file from the CNV intervals source activate env_viz python3 < Date: Tue, 24 Sep 2024 21:36:42 -0400 Subject: [PATCH 18/22] updated default docker tag --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 94d557d..7ea3706 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -3,7 +3,7 @@ version 1.0 workflow CNV_Profiler { input{ String sampleName - String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.3" + String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.4" File cramOrBamFile File cramOrBamIndexFile File referenceFasta From a2d2ed568a11ec0d420da9b51a5939c68d7f06c1 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 22:10:53 -0400 Subject: [PATCH 19/22] update default task runtime config --- CNV-Profiler/CNV-Profiler.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 7ea3706..0fdcfda 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -280,8 +280,8 @@ task SamtoolsDepth { File target_bed Int minBaseQuality = 20 Int minMappingQuality = 20 - Int mem_gb = 32 - Int cpu = 4 + Int mem_gb = 64 + Int cpu = 8 Int disk_size_gb = 500 Boolean use_ssd = true String samtools_docker = "euformatics/samtools:1.20" From 91e5d8ffb1a4d66a094514377f5e7f7f7d3a1b6a Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 22:32:52 -0400 Subject: [PATCH 20/22] removed CNV input validation task --- CNV-Profiler/CNV-Profiler.wdl | 38 ----------------------------------- 1 file changed, 38 deletions(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 0fdcfda..b65b0c4 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -31,13 +31,6 @@ workflow CNV_Profiler { File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam]) File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai]) - call ValidateCnvInputs { - input: - cnvBedFile = cnvBedFile, - cnvIntervals = cnvIntervals, - cnvProfiler_Docker = cnvProfiler_Docker - } - if (defined(cnvIntervals)) { call CreateBedFromIntervals { input: @@ -132,37 +125,6 @@ task CramToBam { } } -task ValidateCnvInputs { - input { - File? cnvBedFile - Array[String]? cnvIntervals - String cnvProfiler_Docker - Int mem_gb = 1 - Int cpu = 1 - Int disk_size_gb = 10 - } - command <<< - if [[ -n "${cnvBedFile}" && -n "${cnvIntervals}" ]]; then - echo "Both CNV bed file and CNV intervals were provided. Please provide only one." 1>&2 - exit 1 - elif [[ -z "${cnvBedFile}" && -z "${cnvIntervals}" ]]; then - echo "Neither CNV bed file nor CNV intervals were provided. Please provide one." 1>&2 - exit 1 - else - echo "Input validation passed." - fi - >>> - runtime { - docker: cnvProfiler_Docker - cpu: cpu - memory: mem_gb + " GB" - disks: "local-disk " + disk_size_gb + " HDD" - } - output { - String cnv_input_validation = read_string(stdout()) - } -} - task CreateBedFromIntervals { input { Array[String]? cnvIntervals From ab7052421cb8a29e186592ae287f4e4643c2c4ab Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Tue, 24 Sep 2024 22:38:37 -0400 Subject: [PATCH 21/22] Remove empty line from createBed Output --- CNV-Profiler/CNV-Profiler.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index b65b0c4..91d82a2 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -196,6 +196,7 @@ task GetPaddedCnvBed { # Padding is 2 times the length of the CNV unless it goes beyond the chromosome length padded_cnv_interval_list = [] with open("~{cnvBedFile}", 'r') as f: + f = [line for line in f if line.strip()] for line in f: chr = line.strip().split('\t')[0] start = line.strip().split('\t')[1] From 061c114df3ee53edfb958007a7e222974a3ff404 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Wed, 25 Sep 2024 08:59:41 -0400 Subject: [PATCH 22/22] updated the docker image to public repo --- CNV-Profiler/CNV-Profiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl index 91d82a2..4ccaef5 100644 --- a/CNV-Profiler/CNV-Profiler.wdl +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -3,7 +3,7 @@ version 1.0 workflow CNV_Profiler { input{ String sampleName - String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.4" + String cnvProfiler_Docker = "us.gcr.io/tag-public/covprofileviz:0.0.4" File cramOrBamFile File cramOrBamIndexFile File referenceFasta