From 7b095bb16cd132718d90188247a402ac6f9ca18f Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sat, 15 Jul 2023 17:10:19 -0700 Subject: [PATCH 01/22] compress QC readcount file --- module/somaticsniper-processes.nf | 27 ++++++++++++++++++++++++++- module/somaticsniper.nf | 3 ++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index 8a743fc7..7aba30c3 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -206,9 +206,10 @@ process create_ReadCountPosition_SomaticSniper { // Recommend to use the same mapping quality -q setting as SomaticSniper process generate_ReadCount_bam_readcount { container params.docker_image_bam_readcount - publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", + publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", mode: "copy", pattern: "*.readcount" + enabled: params.save_intermediate_files publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", @@ -268,6 +269,30 @@ process filter_FalsePositive_SomaticSniper { """ } +// After running fpfilter.pl above, readcount_file can now be compressed +process compress_readcount_SomaticSniper { + container params.docker_image_somaticsniper + publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", + mode: "copy", + pattern: "*.readcount.gz", + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } + + input: + path readcount_file + + output: + path "*.readcount.gz" + path ".command.*" + + """ + set -euo pipefail + gzip $readcount_file + """ + } + // To obtain the "high confidence" set based on further filtering of the somatic score and mapping quality process call_HighConfidenceSNV_SomaticSniper { container params.docker_image_somaticsniper diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 7d7e5fff..50395e31 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -1,4 +1,4 @@ -include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper } from './somaticsniper-processes' +include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; compress_readcount_SomaticSniper; call_HighConfidenceSNV_SomaticSniper } from './somaticsniper-processes' include { rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF as compress_index_VCF_hc } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -50,6 +50,7 @@ workflow somaticsniper { create_ReadCountPosition_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor) generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index) filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount) + compress_readcount_SomaticSniper(generate_ReadCount_bam_readcount.out.readcount) call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass) // rename_samples_BCFtools needs bgzipped input compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc From d1a2055faa85d9bf64f68aa1064fcf540a5d664c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sat, 15 Jul 2023 17:24:40 -0700 Subject: [PATCH 02/22] typos --- module/somaticsniper-processes.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index 7aba30c3..dadd227f 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -208,7 +208,7 @@ process generate_ReadCount_bam_readcount { container params.docker_image_bam_readcount publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", mode: "copy", - pattern: "*.readcount" + pattern: "*.readcount", enabled: params.save_intermediate_files publishDir path: "${params.workflow_log_output_dir}", mode: "copy", @@ -269,12 +269,12 @@ process filter_FalsePositive_SomaticSniper { """ } -// After running fpfilter.pl above, readcount_file can now be compressed +// After running fpfilter.pl above, readcount file can now be compressed process compress_readcount_SomaticSniper { container params.docker_image_somaticsniper publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", mode: "copy", - pattern: "*.readcount.gz", + pattern: "*.readcount.gz" publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", From d597a37423304e15d820ca5b05c1b8bdd48960dd Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 17 Jul 2023 10:46:15 -0700 Subject: [PATCH 03/22] gzip --stdout to avoid circular links --- module/somaticsniper-processes.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index dadd227f..558d96d3 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -271,7 +271,7 @@ process filter_FalsePositive_SomaticSniper { // After running fpfilter.pl above, readcount file can now be compressed process compress_readcount_SomaticSniper { - container params.docker_image_somaticsniper + container params.docker_image_bam_readcount publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", mode: "copy", pattern: "*.readcount.gz" @@ -289,7 +289,7 @@ process compress_readcount_SomaticSniper { """ set -euo pipefail - gzip $readcount_file + gzip --stdout $readcount_file > ${readcount_file}.gz """ } From bdaf8d07778220e9c9a2b9238ac7cbfd37f08ac7 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 11 Aug 2023 16:10:53 -0700 Subject: [PATCH 04/22] add compress_readcount process --- module/somaticsniper-processes.nf | 26 +++++++++++++++++++++++++- module/somaticsniper.nf | 3 ++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index 8a743fc7..6a8bb4a3 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -206,7 +206,7 @@ process create_ReadCountPosition_SomaticSniper { // Recommend to use the same mapping quality -q setting as SomaticSniper process generate_ReadCount_bam_readcount { container params.docker_image_bam_readcount - publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", + publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", mode: "copy", pattern: "*.readcount" publishDir path: "${params.workflow_log_output_dir}", @@ -298,3 +298,27 @@ process call_HighConfidenceSNV_SomaticSniper { --out-file "${params.output_filename}_hc.vcf" """ } + + process compress_readcount_bam_readcount { + container params.docker_image_bam_readcount + publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", + mode: "copy", + pattern: "*readcount.gz" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + path readcount + + output: + path "*readcount.gz" + path ".command.*" + + script: + """ + set -euo pipefail + gzip --stdout ${readcount} > ${readcount}.gz + """ + } diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 7d7e5fff..4a62b023 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -1,4 +1,4 @@ -include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper } from './somaticsniper-processes' +include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper; compress_readcount_bam_readcount } from './somaticsniper-processes' include { rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF as compress_index_VCF_hc } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -51,6 +51,7 @@ workflow somaticsniper { generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index) filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount) call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass) + compress_readcount_bam_readcount(generate_ReadCount_bam_readcount.out.readcount) // rename_samples_BCFtools needs bgzipped input compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc .map{ it -> ['SNV', it] }) From a3e0acd30a048bdfbeace552ab8543809f126ba2 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 15 Aug 2023 12:16:09 -0700 Subject: [PATCH 05/22] change to bzip2 in progress --- config/default.config | 2 ++ module/somaticsniper-processes.nf | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/config/default.config b/config/default.config index a052eb54..94c09643 100644 --- a/config/default.config +++ b/config/default.config @@ -19,6 +19,7 @@ params { GATK_version = "4.4.0.0" somaticsniper_version = "1.0.5.0" bam_readcount_version = "0.8.0" + blarchive_version = "dev" strelka2_version = "2.9.10" manta_version = "1.6.0" MuSE_version = "2.0.2" @@ -30,6 +31,7 @@ params { docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" docker_image_somaticsniper = "${-> params.docker_container_registry}/somaticsniper:${params.somaticsniper_version}" docker_image_bam_readcount = "${-> params.docker_container_registry}/bam-readcount:${params.bam_readcount_version}" + docker_image_bam_blarchive = "${-> params.docker_container_registry}/blarchive:${params.blarchive_version}" docker_image_strelka2 = "${-> params.docker_container_registry}/strelka2:${params.strelka2_version}" docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index ce079bd4..f8788cc4 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -6,6 +6,7 @@ log.info """\ Docker Images: - docker_image_somaticsniper: ${params.docker_image_somaticsniper} - docker_image_bam_readcount: ${params.docker_image_bam_readcount} +- docker_image_blarchive: ${params.docker_image_blarchive} """ @@ -300,11 +301,11 @@ process call_HighConfidenceSNV_SomaticSniper { """ } - process compress_readcount_bam_readcount { - container params.docker_image_bam_readcount + process compress_readcount_blarchive { + container params.docker_image_blarchive publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", mode: "copy", - pattern: "*readcount.gz" + pattern: "*readcount.bz2" publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", @@ -314,12 +315,12 @@ process call_HighConfidenceSNV_SomaticSniper { path readcount output: - path "*readcount.gz" + path "*readcount.bz2" path ".command.*" script: """ set -euo pipefail - gzip --stdout ${readcount} > ${readcount}.gz + blarchive compress_files --input ${readcount} """ } From 1276e98d6c9ca2b5cb760fc639deda39f43ec447 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 15 Aug 2023 18:53:22 -0700 Subject: [PATCH 06/22] gzip -> bzip2 still in progress --- module/somaticsniper.nf | 4 ++-- test/config/a_mini-somaticsniper.config | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 4a62b023..dd60e323 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -1,4 +1,4 @@ -include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper; compress_readcount_bam_readcount } from './somaticsniper-processes' +include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper; compress_readcount_blarchive } from './somaticsniper-processes' include { rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF as compress_index_VCF_hc } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -51,7 +51,7 @@ workflow somaticsniper { generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index) filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount) call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass) - compress_readcount_bam_readcount(generate_ReadCount_bam_readcount.out.readcount) + compress_readcount_blarchive(generate_ReadCount_bam_readcount.out.readcount) // rename_samples_BCFtools needs bgzipped input compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc .map{ it -> ['SNV', it] }) diff --git a/test/config/a_mini-somaticsniper.config b/test/config/a_mini-somaticsniper.config index b8db8cdd..73f4c877 100644 --- a/test/config/a_mini-somaticsniper.config +++ b/test/config/a_mini-somaticsniper.config @@ -23,6 +23,21 @@ params { // module options bgzip_extra_args = '' tabix_extra_args = '' + + // mutect2 options + split_intervals_extra_args = '' + mutect2_extra_args = '' + filter_mutect_calls_extra_args = '' + gatk_command_mem_diff = 500.MB + scatter_count = 12 + germline_resource_gnomad_vcf = '/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz' + + // MuSE options + dbSNP = '/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz' + + // Intersect options + ncbi_build = 'GRCh38' + vcf2maf_extra_args = '' } methods.setup() From caecb5959cfe4fe83af18e8885fed3ff20dabdd7 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 16 Aug 2023 10:05:22 -0700 Subject: [PATCH 07/22] fix blarchive docker typo --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index 94c09643..6d6909d8 100644 --- a/config/default.config +++ b/config/default.config @@ -31,7 +31,7 @@ params { docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" docker_image_somaticsniper = "${-> params.docker_container_registry}/somaticsniper:${params.somaticsniper_version}" docker_image_bam_readcount = "${-> params.docker_container_registry}/bam-readcount:${params.bam_readcount_version}" - docker_image_bam_blarchive = "${-> params.docker_container_registry}/blarchive:${params.blarchive_version}" + docker_image_blarchive = "${-> params.docker_container_registry}/blarchive:${params.blarchive_version}" docker_image_strelka2 = "${-> params.docker_container_registry}/strelka2:${params.strelka2_version}" docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" From 4f3afaf223bf5bceabc8648869fedfc623de3864 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 16 Aug 2023 12:10:44 -0700 Subject: [PATCH 08/22] deference file for bzip2 --- module/somaticsniper-processes.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index f8788cc4..e5a27afa 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -321,6 +321,8 @@ process call_HighConfidenceSNV_SomaticSniper { script: """ set -euo pipefail - blarchive compress_files --input ${readcount} + dereferenced_readcount=\$(readlink -f ${readcount}) + blarchive compress_files --input \$dereferenced_readcount --log ${params.work_dir} + ln -s \$dereferenced_readcount.bz2 ${readcount}.bz2 """ } From 481e185d60b84b625c11ad4f3ba03e8e1591be59 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 16 Aug 2023 12:16:08 -0700 Subject: [PATCH 09/22] maf gzip to bzip2 --- module/intersect-processes.nf | 11 +++++++---- module/intersect.nf | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 99a8245a..d6794962 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -5,6 +5,7 @@ log.info """\ Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram} +- docker_image_blarchive: ${params.docker_image_blarchive} ==================================== """ process intersect_VCFs_BCFtools { @@ -162,10 +163,10 @@ process convert_VCF_vcf2maf { } process compress_MAF_vcf2maf { - container params.docker_image_vcf2maf + container params.docker_image_blarchive publishDir path: "${params.workflow_output_dir}/output", mode: "copy", - pattern: "*.gz" + pattern: "*.bz2" publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", @@ -175,12 +176,14 @@ process compress_MAF_vcf2maf { path maf output: - path "*.gz", emit: concat_maf_gz + path "*.bz2", emit: concat_maf_bz2 path ".command.*" script: """ set -euo pipefail - gzip --stdout ${maf} > ${maf}.gz + dereferenced_readcount=\$(readlink -f ${maf}) + blarchive compress_files --input \$dereferenced_maf --log ${params.work_dir} + ln -s \$dereferenced_maf.bz2 ${maf}.bz2 """ } diff --git a/module/intersect.nf b/module/intersect.nf index 4aed0f42..a4e5dd14 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -65,7 +65,7 @@ workflow intersect { .mix(compress_index_VCF.out.index_out .map{ it -> ["intersect-${it[0]}-index", it[2]] } ) - .mix(compress_MAF_vcf2maf.out.concat_maf_gz + .mix(compress_MAF_vcf2maf.out.concat_maf_bz2 .map{ it -> ["intersect-${file(it).getName().split('_')[0]}-maf", it]} ) generate_sha512sum(file_for_sha512) From b1cd3cd7d8fbf0061a7fd219314ac969344dfd1f Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 16 Aug 2023 14:08:43 -0700 Subject: [PATCH 10/22] finish changing maf compression to bzip2 --- module/intersect-processes.nf | 6 +++--- module/intersect.nf | 12 ++++++------ module/somaticsniper-processes.nf | 2 +- module/somaticsniper.nf | 7 +++++-- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index d6794962..25b0dd6c 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -162,7 +162,7 @@ process convert_VCF_vcf2maf { """ } -process compress_MAF_vcf2maf { +process compress_MAF_blarchive { container params.docker_image_blarchive publishDir path: "${params.workflow_output_dir}/output", mode: "copy", @@ -182,8 +182,8 @@ process compress_MAF_vcf2maf { script: """ set -euo pipefail - dereferenced_readcount=\$(readlink -f ${maf}) + dereferenced_maf=\$(readlink -f ${maf}) blarchive compress_files --input \$dereferenced_maf --log ${params.work_dir} - ln -s \$dereferenced_maf.bz2 ${maf}.bz2 + ln -s \${dereferenced_maf}.bz2 ${maf}.bz2 """ } diff --git a/module/intersect.nf b/module/intersect.nf index a4e5dd14..5db4ffac 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,5 @@ include { generate_sha512sum } from './common' -include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf; compress_MAF_vcf2maf } from './intersect-processes.nf' +include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf; compress_MAF_blarchive } from './intersect-processes.nf' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, @@ -51,7 +51,7 @@ workflow intersect { compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf .map{ it -> ['SNV', it]} ) - compress_MAF_vcf2maf(convert_VCF_vcf2maf.out.concat_maf) + compress_MAF_blarchive(convert_VCF_vcf2maf.out.concat_maf) file_for_sha512 = intersect_VCFs_BCFtools.out.consensus_vcf .flatten() .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]} @@ -60,13 +60,13 @@ workflow intersect { .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]} ) .mix(compress_index_VCF.out.index_out - .map{ it -> ["intersect-${it[0]}-vcf", it[1]] } + .map{ it -> ["concat-${it[0]}-vcf", it[1]] } ) .mix(compress_index_VCF.out.index_out - .map{ it -> ["intersect-${it[0]}-index", it[2]] } + .map{ it -> ["concat-${it[0]}-index", it[2]] } ) - .mix(compress_MAF_vcf2maf.out.concat_maf_bz2 - .map{ it -> ["intersect-${file(it).getName().split('_')[0]}-maf", it]} + .mix(compress_MAF_blarchive.out.concat_maf_bz2 + .map{ it -> ["concat-SNV-MAF", it]} ) generate_sha512sum(file_for_sha512) } diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index e5a27afa..a5fc6de9 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -315,7 +315,7 @@ process call_HighConfidenceSNV_SomaticSniper { path readcount output: - path "*readcount.bz2" + path "*readcount.bz2", emit: readcount_bz2 path ".command.*" script: diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index dd60e323..e001bac8 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -58,8 +58,11 @@ workflow somaticsniper { rename_samples_BCFtools(normal_id, tumor_id, compress_index_VCF_hc.out.index_out .map{ it -> [it[0], it[1]] }) compress_index_VCF_fix(rename_samples_BCFtools.out.fix_vcf) - file_for_sha512 = compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-vcf", it[1]] } - .mix(compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-index", it[2]] }) + file_for_sha512 = compress_index_VCF_fix.out.index_out + .map{ it -> ["${it[0]}-vcf", it[1]] } + .mix(compress_index_VCF_fix.out.index_out + .map{ it -> ["${it[0]}-index", it[2]] } + ) generate_sha512sum(file_for_sha512) emit: vcf = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[1]}"] } From 30463a52f14f1d1c781451d9286fb12f8ef07d64 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 16 Aug 2023 14:31:04 -0700 Subject: [PATCH 11/22] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a192ac60..15000f27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add compression of `SomaticSniper` `bam-readcount` QC output - Add `ncbi_build` parameter - Add conversion of concatenated VCF to MAF - Add concatenation of consensus variants to one VCF @@ -18,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels ### Changed +- Change compression of intersect MAF file to bzip2 - Update `README.md` - Use `set_env` from `pipeline-Nextflow-config` - Update resource allocation to include new processes From 368c305e1fa6efdc7a192d793ef589c85c0340f7 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 18 Aug 2023 12:26:04 -0700 Subject: [PATCH 12/22] move bzip2 process to common in progress --- main.nf | 14 +++++++------- module/common.nf | 26 ++++++++++++++++++++++++++ module/intersect-processes.nf | 26 -------------------------- module/intersect.nf | 13 ++++++++----- module/somaticsniper-processes.nf | 26 -------------------------- module/somaticsniper.nf | 10 ++++++++-- 6 files changed, 49 insertions(+), 66 deletions(-) diff --git a/main.nf b/main.nf index 3ce44f91..b264b02c 100755 --- a/main.nf +++ b/main.nf @@ -234,12 +234,12 @@ workflow { .mix(muse_idx_ch)) .collect() - intersect( - tool_vcfs, - tool_indices, - script_dir_ch, - run_GetSampleName_Mutect2_normal.out.name_ch, - run_GetSampleName_Mutect2_tumor.out.name_ch - ) +// intersect( +// tool_vcfs, +// tool_indices, +// script_dir_ch, +// run_GetSampleName_Mutect2_normal.out.name_ch, +// run_GetSampleName_Mutect2_tumor.out.name_ch +// ) } } diff --git a/module/common.nf b/module/common.nf index 1af598fa..7d6ab605 100644 --- a/module/common.nf +++ b/module/common.nf @@ -63,3 +63,29 @@ process rename_samples_BCFtools { bcftools reheader -s ${params.output_filename}_samples.txt --output ${params.output_filename}_${var_type}.vcf.gz ${vcf} """ } + +process compress_file_blarchive { + container params.docker_image_blarchive + publishDir path: "${params.workflow_output_dir}/output", + mode: "copy", + pattern: "*.bz2" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}-${file_type}/log${file(it).getName()}" } + + input: + tuple val(file_type), path(file_to_compress) + + output: + tuple val(file_type), path("*.bz2"), emit: file_bz2 + path ".command.*" + + script: + """ + set -euo pipefail + dereferenced_file=\$(readlink -f ${file_to_compress}) + blarchive compress_files --input \$dereferenced_file --log ${params.work_dir} + ln -s \${dereferenced_file}.bz2 ${file_to_compress}.bz2 + """ + } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 2a4cbfcb..cbf99b78 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -161,29 +161,3 @@ process convert_VCF_vcf2maf { ${params.vcf2maf_extra_args} """ } - -process compress_MAF_blarchive { - container params.docker_image_blarchive - publishDir path: "${params.workflow_output_dir}/output", - mode: "copy", - pattern: "*.bz2" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } - - input: - path maf - - output: - path "*.bz2", emit: concat_maf_bz2 - path ".command.*" - - script: - """ - set -euo pipefail - dereferenced_maf=\$(readlink -f ${maf}) - blarchive compress_files --input \$dereferenced_maf --log ${params.work_dir} - ln -s \${dereferenced_maf}.bz2 ${maf}.bz2 - """ - } diff --git a/module/intersect.nf b/module/intersect.nf index 1db101d6..95c63d1a 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,6 @@ +//include { compress_file_blarchive; generate_sha512sum } from './common' include { generate_sha512sum } from './common' -include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf; compress_MAF_blarchive } from './intersect-processes.nf' +include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, @@ -51,7 +52,9 @@ workflow intersect { compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf .map{ it -> ['SNV', it]} ) - compress_MAF_blarchive(convert_VCF_vcf2maf.out.concat_maf) +// compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf +// .map{ it -> ['MAF', it]} +// ) file_for_sha512 = intersect_VCFs_BCFtools.out.consensus_vcf .flatten() .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]} @@ -65,8 +68,8 @@ workflow intersect { .mix(compress_index_VCF.out.index_out .map{ it -> ["concat-${it[0]}-index", it[2]] } ) - .mix(compress_MAF_blarchive.out.concat_maf_bz2 - .map{ it -> ["concat-SNV-MAF", it]} - ) +// .mix(compress_file_blarchive.out.concat_maf_bz2 +// .map{ it -> ["concat-${it[0]}", it[1]]} +// ) generate_sha512sum(file_for_sha512) } diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index a5fc6de9..a338083f 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -300,29 +300,3 @@ process call_HighConfidenceSNV_SomaticSniper { --out-file "${params.output_filename}_hc.vcf" """ } - - process compress_readcount_blarchive { - container params.docker_image_blarchive - publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*readcount.bz2" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } - - input: - path readcount - - output: - path "*readcount.bz2", emit: readcount_bz2 - path ".command.*" - - script: - """ - set -euo pipefail - dereferenced_readcount=\$(readlink -f ${readcount}) - blarchive compress_files --input \$dereferenced_readcount --log ${params.work_dir} - ln -s \$dereferenced_readcount.bz2 ${readcount}.bz2 - """ - } diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index e001bac8..b585ec75 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -1,4 +1,4 @@ -include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper; compress_readcount_blarchive } from './somaticsniper-processes' +include { call_sSNV_SomaticSniper; convert_BAM2Pileup_SAMtools; create_IndelCandidate_SAMtools; apply_NormalIndelFilter_SomaticSniper; apply_TumorIndelFilter_SomaticSniper; create_ReadCountPosition_SomaticSniper; generate_ReadCount_bam_readcount; filter_FalsePositive_SomaticSniper; call_HighConfidenceSNV_SomaticSniper } from './somaticsniper-processes' include { rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF as compress_index_VCF_hc } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -15,6 +15,11 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel bgzip_extra_args: params.bgzip_extra_args, tabix_extra_args: params.tabix_extra_args ]) +include { compress_file_blarchive} from './common' addParams( + options: [ + output_dir: "${params.workflow_output_dir}/QC", + blarchive_extra_args: params.blarchive_extra_args + ]) workflow somaticsniper { take: @@ -51,7 +56,8 @@ workflow somaticsniper { generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index) filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount) call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass) - compress_readcount_blarchive(generate_ReadCount_bam_readcount.out.readcount) + compress_file_blarchive(generate_ReadCount_bam_readcount.out.readcount + .map{ it -> ['readcount', it] }) // rename_samples_BCFtools needs bgzipped input compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc .map{ it -> ['SNV', it] }) From f559ae4f7dbb7a49e5e91baef5eb5f1f1f8617ee Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sat, 19 Aug 2023 12:40:07 -0700 Subject: [PATCH 13/22] move bzip2 process to common still in progress --- module/common.nf | 5 +++-- module/somaticsniper.nf | 7 ++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/module/common.nf b/module/common.nf index 7d6ab605..a7e3e19a 100644 --- a/module/common.nf +++ b/module/common.nf @@ -66,7 +66,7 @@ process rename_samples_BCFtools { process compress_file_blarchive { container params.docker_image_blarchive - publishDir path: "${params.workflow_output_dir}/output", + publishDir path: params.blarchive_publishDir, mode: "copy", pattern: "*.bz2" publishDir path: "${params.workflow_log_output_dir}", @@ -85,7 +85,8 @@ process compress_file_blarchive { """ set -euo pipefail dereferenced_file=\$(readlink -f ${file_to_compress}) - blarchive compress_files --input \$dereferenced_file --log ${params.work_dir} + blarchive compress_files --input \$dereferenced_file \ + --log ${params.work_dir} \ ln -s \${dereferenced_file}.bz2 ${file_to_compress}.bz2 """ } diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index b585ec75..d927972d 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -15,11 +15,8 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel bgzip_extra_args: params.bgzip_extra_args, tabix_extra_args: params.tabix_extra_args ]) -include { compress_file_blarchive} from './common' addParams( - options: [ - output_dir: "${params.workflow_output_dir}/QC", - blarchive_extra_args: params.blarchive_extra_args - ]) +include { compress_file_blarchive} from './common' params( + params: [blarchive_publishDir : "${params.workflow_output_dir}/QC"]) workflow somaticsniper { take: From 6ac0ef19de1d5bfc164c3b1637b26e9e34f665b2 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sat, 19 Aug 2023 14:17:04 -0700 Subject: [PATCH 14/22] in progress --- module/common.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/module/common.nf b/module/common.nf index a7e3e19a..019b08e1 100644 --- a/module/common.nf +++ b/module/common.nf @@ -5,6 +5,7 @@ log.info """\ Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - docker_image_validate_params: ${params.docker_image_validate_params} +- docker_image_blarchive: ${params.docker_image_blarchive} """ process generate_sha512sum { From fe6c3ad4240443ed016fb12812bf291b5512803e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sun, 20 Aug 2023 10:07:59 -0700 Subject: [PATCH 15/22] bzip2 to common complete, pipeline may be hanging upon completion --- main.nf | 14 +++++++------- module/common.nf | 4 ++-- module/intersect-processes.nf | 4 +++- module/intersect.nf | 14 ++++++++------ module/somaticsniper-processes.nf | 2 -- module/somaticsniper.nf | 5 +++-- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/main.nf b/main.nf index b264b02c..3ce44f91 100755 --- a/main.nf +++ b/main.nf @@ -234,12 +234,12 @@ workflow { .mix(muse_idx_ch)) .collect() -// intersect( -// tool_vcfs, -// tool_indices, -// script_dir_ch, -// run_GetSampleName_Mutect2_normal.out.name_ch, -// run_GetSampleName_Mutect2_tumor.out.name_ch -// ) + intersect( + tool_vcfs, + tool_indices, + script_dir_ch, + run_GetSampleName_Mutect2_normal.out.name_ch, + run_GetSampleName_Mutect2_tumor.out.name_ch + ) } } diff --git a/module/common.nf b/module/common.nf index 019b08e1..2c381611 100644 --- a/module/common.nf +++ b/module/common.nf @@ -79,7 +79,7 @@ process compress_file_blarchive { tuple val(file_type), path(file_to_compress) output: - tuple val(file_type), path("*.bz2"), emit: file_bz2 + tuple val(file_type), path("*.bz2") path ".command.*" script: @@ -87,7 +87,7 @@ process compress_file_blarchive { set -euo pipefail dereferenced_file=\$(readlink -f ${file_to_compress}) blarchive compress_files --input \$dereferenced_file \ - --log ${params.work_dir} \ + --log ${params.work_dir} ln -s \${dereferenced_file}.bz2 ${file_to_compress}.bz2 """ } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index cbf99b78..3823c193 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -5,7 +5,9 @@ log.info """\ Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram} -- docker_image_blarchive: ${params.docker_image_blarchive} +Intersect Options: +- ncbi_build: ${params.ncbi_build} +- vcf2maf_extra_args: ${params.vcf2maf_extra_args} ==================================== """ process intersect_VCFs_BCFtools { diff --git a/module/intersect.nf b/module/intersect.nf index 95c63d1a..d0e71440 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,7 @@ -//include { compress_file_blarchive; generate_sha512sum } from './common' include { generate_sha512sum } from './common' +include { compress_file_blarchive} from './common' addParams( + blarchive_publishDir : "${params.workflow_output_dir}/output" + ) include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -52,10 +54,10 @@ workflow intersect { compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf .map{ it -> ['SNV', it]} ) -// compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf -// .map{ it -> ['MAF', it]} -// ) - file_for_sha512 = intersect_VCFs_BCFtools.out.consensus_vcf + compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf + .map{ it -> ['MAF', it]} + ) + file_for_sha512 = intersect_VCFs_BCFtools.out.intersect_vcf .flatten() .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]} .mix(intersect_VCFs_BCFtools.out.intersect_idx @@ -68,7 +70,7 @@ workflow intersect { .mix(compress_index_VCF.out.index_out .map{ it -> ["concat-${it[0]}-index", it[2]] } ) -// .mix(compress_file_blarchive.out.concat_maf_bz2 +// .mix(compress_file_blarchive.out // .map{ it -> ["concat-${it[0]}", it[1]]} // ) generate_sha512sum(file_for_sha512) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index a338083f..8b322c7c 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -6,8 +6,6 @@ log.info """\ Docker Images: - docker_image_somaticsniper: ${params.docker_image_somaticsniper} - docker_image_bam_readcount: ${params.docker_image_bam_readcount} -- docker_image_blarchive: ${params.docker_image_blarchive} - """ // Call SomaticSniper diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index d927972d..2d6c16e8 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -15,8 +15,9 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel bgzip_extra_args: params.bgzip_extra_args, tabix_extra_args: params.tabix_extra_args ]) -include { compress_file_blarchive} from './common' params( - params: [blarchive_publishDir : "${params.workflow_output_dir}/QC"]) +include { compress_file_blarchive} from './common' addParams( + blarchive_publishDir : "${params.workflow_output_dir}/QC" + ) workflow somaticsniper { take: From 3c71cbe76340af466e188a1b17406e9f8618c7a1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sun, 20 Aug 2023 13:10:05 -0700 Subject: [PATCH 16/22] final bz2, fix log output dirs, indentation --- main.nf | 4 +-- module/common.nf | 2 +- module/intersect-processes.nf | 48 +++++++++++++++++------------------ module/intersect.nf | 6 ++--- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/main.nf b/main.nf index 3ce44f91..4f14961b 100755 --- a/main.nf +++ b/main.nf @@ -83,8 +83,8 @@ include { muse } from './module/muse' addParams( [:])) include { intersect } from './module/intersect' addParams( - workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}", - workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}", + workflow_output_dir: "${params.output_dir_base}/Intersect-BCFtools-${params.BCFtools_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/Intersect-BCFtools-${params.BCFtools_version}", output_filename: generate_standard_filename("BCFtools-${params.BCFtools_version}", params.dataset_id, params.sample_id, diff --git a/module/common.nf b/module/common.nf index 2c381611..b9b10649 100644 --- a/module/common.nf +++ b/module/common.nf @@ -79,7 +79,7 @@ process compress_file_blarchive { tuple val(file_type), path(file_to_compress) output: - tuple val(file_type), path("*.bz2") + tuple val(file_type), path("*.bz2"), emit: compressed_file path ".command.*" script: diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 3823c193..6fac58de 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -26,7 +26,7 @@ process intersect_VCFs_BCFtools { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcfs @@ -68,30 +68,30 @@ process intersect_VCFs_BCFtools { """ } - process plot_VennDiagram_R { - container params.docker_image_r_VennDiagram - publishDir path: "${params.workflow_output_dir}/output", - mode: "copy", - pattern: "*.tiff" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } +process plot_VennDiagram_R { + container params.docker_image_r_VennDiagram + publishDir path: "${params.workflow_output_dir}/output", + mode: "copy", + pattern: "*.tiff" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } - input: - path script_dir - path isec + input: + path script_dir + path isec - output: - path ".command.*" - path "*.tiff" + output: + path ".command.*" + path "*.tiff" - script: - """ - set -euo pipefail - Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff - """ - } + script: + """ + set -euo pipefail + Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff + """ + } process concat_VCFs_BCFtools { container params.docker_image_BCFtools @@ -102,7 +102,7 @@ process concat_VCFs_BCFtools { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcfs @@ -137,7 +137,7 @@ process convert_VCF_vcf2maf { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcf diff --git a/module/intersect.nf b/module/intersect.nf index d0e71440..49e6caf9 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -70,8 +70,8 @@ workflow intersect { .mix(compress_index_VCF.out.index_out .map{ it -> ["concat-${it[0]}-index", it[2]] } ) -// .mix(compress_file_blarchive.out -// .map{ it -> ["concat-${it[0]}", it[1]]} -// ) + .mix(compress_file_blarchive.out.compressed_file + .map{ it -> ["concat-${it[0]}", it[1]]} + ) generate_sha512sum(file_for_sha512) } From 3bca0be1375ab09021fad7e750f10adfb6a9a0e5 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sun, 20 Aug 2023 13:14:26 -0700 Subject: [PATCH 17/22] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15000f27..b34b37ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels ### Changed +- Remove redundant directories in Intersect log output directories - Change compression of intersect MAF file to bzip2 - Update `README.md` - Use `set_env` from `pipeline-Nextflow-config` From ee8b4a1808261169733dc27b7078b02cd828b4d3 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 23 Aug 2023 13:56:07 -0700 Subject: [PATCH 18/22] update to blarchive v2.0.0 --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index 6d6909d8..4e05233d 100644 --- a/config/default.config +++ b/config/default.config @@ -19,7 +19,7 @@ params { GATK_version = "4.4.0.0" somaticsniper_version = "1.0.5.0" bam_readcount_version = "0.8.0" - blarchive_version = "dev" + blarchive_version = "2.0.0" strelka2_version = "2.9.10" manta_version = "1.6.0" MuSE_version = "2.0.2" From 204a08fbf3b4ccd357d950d1d4d26f5be25e6de2 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 24 Aug 2023 17:28:24 -0700 Subject: [PATCH 19/22] rm readcount from intermediate and add compress_file_blarchive to readcount.bz2 path --- module/somaticsniper-processes.nf | 4 ---- module/somaticsniper.nf | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index 8b322c7c..f7b6f5b1 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -205,10 +205,6 @@ process create_ReadCountPosition_SomaticSniper { // Recommend to use the same mapping quality -q setting as SomaticSniper process generate_ReadCount_bam_readcount { container params.docker_image_bam_readcount - publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.readcount", - enabled: params.save_intermediate_files publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 2d6c16e8..c0a3cb33 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -16,7 +16,7 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel tabix_extra_args: params.tabix_extra_args ]) include { compress_file_blarchive} from './common' addParams( - blarchive_publishDir : "${params.workflow_output_dir}/QC" + blarchive_publishDir : "${params.workflow_output_dir}/QC/compress_file_blarchive" ) workflow somaticsniper { From 8d66e1554da89c0e4f91161f7a3225a6d2dd314c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 28 Aug 2023 12:54:37 -0700 Subject: [PATCH 20/22] change readcount log folder name --- module/somaticsniper.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index c0a3cb33..48f46cf3 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -16,7 +16,7 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel tabix_extra_args: params.tabix_extra_args ]) include { compress_file_blarchive} from './common' addParams( - blarchive_publishDir : "${params.workflow_output_dir}/QC/compress_file_blarchive" + blarchive_publishDir : "${params.workflow_output_dir}/QC/generate_ReadCount_bam_readcount" ) workflow somaticsniper { From c09c3c8611e5dfc70beb5e51c2d8e73766da7827 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 30 Aug 2023 14:06:13 -0700 Subject: [PATCH 21/22] move compressed readcount output to intermediate --- module/common.nf | 3 ++- module/intersect.nf | 3 ++- module/somaticsniper.nf | 3 ++- test/config/a_mini-all-tools.config | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/module/common.nf b/module/common.nf index b9b10649..b5185a8e 100644 --- a/module/common.nf +++ b/module/common.nf @@ -69,7 +69,8 @@ process compress_file_blarchive { container params.docker_image_blarchive publishDir path: params.blarchive_publishDir, mode: "copy", - pattern: "*.bz2" + pattern: "*.bz2", + enabled: params.blarchive_enabled publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", diff --git a/module/intersect.nf b/module/intersect.nf index 49e6caf9..94803ba3 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,6 +1,7 @@ include { generate_sha512sum } from './common' include { compress_file_blarchive} from './common' addParams( - blarchive_publishDir : "${params.workflow_output_dir}/output" + blarchive_publishDir : "${params.workflow_output_dir}/output", + blarchive_enabled : true ) include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 48f46cf3..1fa95f31 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -16,7 +16,8 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel tabix_extra_args: params.tabix_extra_args ]) include { compress_file_blarchive} from './common' addParams( - blarchive_publishDir : "${params.workflow_output_dir}/QC/generate_ReadCount_bam_readcount" + blarchive_publishDir : "${params.workflow_output_dir}/intermediate/generate_ReadCount_bam_readcount", + blarchive_enabled : params.save_intermediate_files ) workflow somaticsniper { diff --git a/test/config/a_mini-all-tools.config b/test/config/a_mini-all-tools.config index 8c2c2e41..bf5f9a4a 100644 --- a/test/config/a_mini-all-tools.config +++ b/test/config/a_mini-all-tools.config @@ -17,7 +17,7 @@ params { dataset_id = 'TWGSAMIN' // setting params.exome to TRUE will add the '--exome' option when running manta and strelka2 and the -E option when running MuSE exome = false - save_intermediate_files = true + save_intermediate_files = false // module options bgzip_extra_args = '' From a1b797ec66ce35d253d1eb9bfb9e19b31e6caca0 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 30 Aug 2023 15:30:41 -0700 Subject: [PATCH 22/22] fix changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b34b37ce..e94a97cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Add compression of `SomaticSniper` `bam-readcount` QC output +- Add compression of `SomaticSniper` `bam-readcount` output and move to `intermediate` directory - Add `ncbi_build` parameter - Add conversion of concatenated VCF to MAF - Add concatenation of consensus variants to one VCF