From 2187c417de21b24c6a4c460048e52c7c26feeef7 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 29 Jun 2023 17:27:10 -0700 Subject: [PATCH 01/16] add r-script and docker --- config/default.config | 4 +++ metadata.yaml | 1 + module/intersect-processes.nf | 32 ++++++++++++++++++++--- module/intersect.nf | 7 +++++ r-scripts/plot-intersect.R | 48 +++++++++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 r-scripts/plot-intersect.R diff --git a/config/default.config b/config/default.config index 0f732270..77c03041 100644 --- a/config/default.config +++ b/config/default.config @@ -23,6 +23,7 @@ params { manta_version = "1.6.0" MuSE_version = "2.0" BCFtools_version = "1.17" + call_ssnv_r_version = "${manifest.version}" docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" @@ -32,6 +33,9 @@ params { docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}" + docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" + " + } diff --git a/metadata.yaml b/metadata.yaml index aea10576..34d89290 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -6,3 +6,4 @@ Contributors: ['Mao Tian', 'Bugh Caden', 'Helena Winata', 'Yash Patel', 'Sorel F Languages: ['Docker', 'Nextflow'] Dependencies: ['Docker', 'Nextflow'] Tools: ['GATK 4.4.0.0', 'SomaticSniper v1.0.5.0', 'SAMtools v1.16.1', 'Strelka2 v2.9.10', 'Manta v1.6.0', 'MuSE v2.0', BCFtools v1.17] +image_name: 'call-ssnv-r' diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 7bf8696d..f9c8794b 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -4,7 +4,8 @@ log.info """\ ==================================== Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - +- docker_image_r_scripts: ${params.docker_image_r_scripts} +==================================== """ process intersect_VCFs_BCFtools { container params.docker_image_BCFtools @@ -33,8 +34,7 @@ process intersect_VCFs_BCFtools { path "*.vcf.gz.tbi", emit: consensus_idx path ".command.*" path "isec-2-or-more" - path "isec-1-or-more/sites.txt" - path "isec-1-or-more/README.txt" + path "isec-1-or-more" emit: isec_dir script: vcf_list = vcfs.join(' ') @@ -45,3 +45,29 @@ process intersect_VCFs_BCFtools { bcftools isec --output-type z --prefix isec-1-or-more --regions-file ${call_region} ${vcf_list} """ } + + +process plot_venn_R + container params.docker_image_r_scripts + publishDir path: "${params.workflow_output_dir}/output", + mode: "copy", + pattern: "*.tiff" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } + + input: + path isec_dir + path script_dir + + output: + path ".command.*" + path "*.tiff" + + script: + """ + set -euo pipefail + Rscript ${script_dir}/plot_venn.R --isec_dir ${isec_dir} --dataset params.dataset_id + """ + } diff --git a/module/intersect.nf b/module/intersect.nf index 2e99f7a5..ce2fd046 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -2,6 +2,9 @@ include { generate_sha512sum } from './common' include { intersect_VCFs_BCFtools } from './intersect-processes.nf' workflow intersect { + // pass bin directory in project folder as channel into docker + script_dir_ch = Channel.fromPath("$projectDir/r-scripts", checkIfExists: true) + take: tool_vcfs tool_indices @@ -21,4 +24,8 @@ workflow intersect { .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]} ) generate_sha512sum(file_for_sha512) + plot_venn_R( + intersect_VCFs_BCFtools.out.isec_dir, + script_dir_ch + ) } diff --git a/r-scripts/plot-intersect.R b/r-scripts/plot-intersect.R new file mode 100644 index 00000000..1537a5b7 --- /dev/null +++ b/r-scripts/plot-intersect.R @@ -0,0 +1,48 @@ +# Script to plot the Venn diagram of the intersection of the VCF files +# Initial commit: Sorel Fitz-Gibbon 2023-06-29 + +## 1. Setup the environment ######################################################################## +library('argparse'); +library('BoutrosLab.utilities'); +library('VennDiagram'); + +# tmp: for testing +#args <- list(); +#args$path <- '/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/sfitz-intersect-vcfs/call-sSNV-6.0.0/CPCG0000000196-T001-P01' +#args$valid_algorithm <- 'NULL'; +#args$dataset.id <- 'CPCG'; +#args$output.dir <- paste(getwd(), paste(gsub(' |:', '-', Sys.time()), 'VCF-Comparison-Result', sep = '_'), sep = '/'); + +## 2. Parse the arguments ########################################################################## +parser <- ArgumentParser(); +parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); +parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); +args <- parser$parse_args(); + +### Main ########################################################## +algorithms <- readLines(paste0(args$isec_dir,'/README.txt')); +algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; +algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); +algorithms <- gsub('-.*', '', algorithms); +sites <- read.table(paste0(args$isec_dir,'/sites.txt'), header = FALSE, colClasses = 'character'); +split_col <- strsplit(as.character(sites$V5), ""); +sites$col1 <- sapply(split_col, "[", 1); +sites$col2 <- sapply(split_col, "[", 2); +sites$col3 <- sapply(split_col, "[", 3); +sites$col4 <- sapply(split_col, "[", 4); +sites$V5 <- NULL; +header <- c('chrom', 'pos', 'ref', 'alt', algorithms); +colnames(sites) <- header +variants <- paste(sites$chrom, sites$pos, sep = '_'); +tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) +tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)] + +VennDiagram::venn.diagram( + tool.variants.ordered, + filename = generate.filename(args$dataset, 'Venn-diagram', 'tiff' ), + fill = c("orange", "red", "green", "blue"), + lty = "dashed", + cex = 1, + cat.cex = 0.8, + cat.col = 'black' + ); From bd020d324be75f3d312db2c6827369812f64c733 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 30 Jun 2023 08:03:30 -0700 Subject: [PATCH 02/16] add Dockerfile --- Dockerfile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..ef5fc836 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +ARG MINIFORGE_VERSION=22.9.0-2 +ARG UBUNTU_VERSION=20.04 + +FROM condaforge/mambaforge:${MINIFORGE_VERSION} AS builder + +RUN mamba create -qy -p /usr/local \ + 'r-base>=4.2.1' \ + # R packages + r-argparse \ + r-VennDiagram + +# Copy from builder into final image +FROM ubuntu:${UBUNTU_VERSION} AS final +COPY --from=builder /usr/local /usr/local + +# Add a new user/group called bldocker +RUN groupadd -g 500001 bldocker \ + && useradd -r -u 500001 -g bldocker bldocker + +WORKDIR /usr/src +# where's a better place to get this package? +COPY r-scripts/BoutrosLab.utilities_1.9.10.tar.gz /usr/src +RUN R -e "install.packages('BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" +RUN rm BoutrosLab.utilities_1.9.10.tar.gz + +# Change the default user to bldocker from root +USER bldocker + +LABEL maintainer="Sorel Fitz-Gibbon " +LABEL org.opencontainers.image.source=https://github.com/uclahs-cds/call-ssnv-r From 82ec5d210b183d3bea64c067d36dbaba65b0b15c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 30 Jun 2023 13:02:48 -0700 Subject: [PATCH 03/16] debugging --- r-scripts/plot-venn.R | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 r-scripts/plot-venn.R diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R new file mode 100644 index 00000000..1537a5b7 --- /dev/null +++ b/r-scripts/plot-venn.R @@ -0,0 +1,48 @@ +# Script to plot the Venn diagram of the intersection of the VCF files +# Initial commit: Sorel Fitz-Gibbon 2023-06-29 + +## 1. Setup the environment ######################################################################## +library('argparse'); +library('BoutrosLab.utilities'); +library('VennDiagram'); + +# tmp: for testing +#args <- list(); +#args$path <- '/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/sfitz-intersect-vcfs/call-sSNV-6.0.0/CPCG0000000196-T001-P01' +#args$valid_algorithm <- 'NULL'; +#args$dataset.id <- 'CPCG'; +#args$output.dir <- paste(getwd(), paste(gsub(' |:', '-', Sys.time()), 'VCF-Comparison-Result', sep = '_'), sep = '/'); + +## 2. Parse the arguments ########################################################################## +parser <- ArgumentParser(); +parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); +parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); +args <- parser$parse_args(); + +### Main ########################################################## +algorithms <- readLines(paste0(args$isec_dir,'/README.txt')); +algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; +algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); +algorithms <- gsub('-.*', '', algorithms); +sites <- read.table(paste0(args$isec_dir,'/sites.txt'), header = FALSE, colClasses = 'character'); +split_col <- strsplit(as.character(sites$V5), ""); +sites$col1 <- sapply(split_col, "[", 1); +sites$col2 <- sapply(split_col, "[", 2); +sites$col3 <- sapply(split_col, "[", 3); +sites$col4 <- sapply(split_col, "[", 4); +sites$V5 <- NULL; +header <- c('chrom', 'pos', 'ref', 'alt', algorithms); +colnames(sites) <- header +variants <- paste(sites$chrom, sites$pos, sep = '_'); +tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) +tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)] + +VennDiagram::venn.diagram( + tool.variants.ordered, + filename = generate.filename(args$dataset, 'Venn-diagram', 'tiff' ), + fill = c("orange", "red", "green", "blue"), + lty = "dashed", + cex = 1, + cat.cex = 0.8, + cat.col = 'black' + ); From accf0d6ee89d58428b3464758616e47b0c37ebe1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 30 Jun 2023 13:04:01 -0700 Subject: [PATCH 04/16] debugging --- Dockerfile | 3 +-- config/default.config | 1 - module/intersect-processes.nf | 4 +-- r-scripts/plot-intersect.R | 48 ----------------------------------- 4 files changed, 3 insertions(+), 53 deletions(-) delete mode 100644 r-scripts/plot-intersect.R diff --git a/Dockerfile b/Dockerfile index ef5fc836..804e72b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,11 +17,10 @@ COPY --from=builder /usr/local /usr/local RUN groupadd -g 500001 bldocker \ && useradd -r -u 500001 -g bldocker bldocker -WORKDIR /usr/src # where's a better place to get this package? COPY r-scripts/BoutrosLab.utilities_1.9.10.tar.gz /usr/src RUN R -e "install.packages('BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" -RUN rm BoutrosLab.utilities_1.9.10.tar.gz +# RUN rm BoutrosLab.utilities_1.9.10.tar.gz # Change the default user to bldocker from root USER bldocker diff --git a/config/default.config b/config/default.config index 77c03041..00313b9b 100644 --- a/config/default.config +++ b/config/default.config @@ -34,7 +34,6 @@ params { docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}" docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" - " } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index f9c8794b..26429064 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -48,7 +48,7 @@ process intersect_VCFs_BCFtools { process plot_venn_R - container params.docker_image_r_scripts + container test-rvenn publishDir path: "${params.workflow_output_dir}/output", mode: "copy", pattern: "*.tiff" @@ -68,6 +68,6 @@ process plot_venn_R script: """ set -euo pipefail - Rscript ${script_dir}/plot_venn.R --isec_dir ${isec_dir} --dataset params.dataset_id + Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset params.dataset_id """ } diff --git a/r-scripts/plot-intersect.R b/r-scripts/plot-intersect.R deleted file mode 100644 index 1537a5b7..00000000 --- a/r-scripts/plot-intersect.R +++ /dev/null @@ -1,48 +0,0 @@ -# Script to plot the Venn diagram of the intersection of the VCF files -# Initial commit: Sorel Fitz-Gibbon 2023-06-29 - -## 1. Setup the environment ######################################################################## -library('argparse'); -library('BoutrosLab.utilities'); -library('VennDiagram'); - -# tmp: for testing -#args <- list(); -#args$path <- '/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/sfitz-intersect-vcfs/call-sSNV-6.0.0/CPCG0000000196-T001-P01' -#args$valid_algorithm <- 'NULL'; -#args$dataset.id <- 'CPCG'; -#args$output.dir <- paste(getwd(), paste(gsub(' |:', '-', Sys.time()), 'VCF-Comparison-Result', sep = '_'), sep = '/'); - -## 2. Parse the arguments ########################################################################## -parser <- ArgumentParser(); -parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); -parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); -args <- parser$parse_args(); - -### Main ########################################################## -algorithms <- readLines(paste0(args$isec_dir,'/README.txt')); -algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; -algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); -algorithms <- gsub('-.*', '', algorithms); -sites <- read.table(paste0(args$isec_dir,'/sites.txt'), header = FALSE, colClasses = 'character'); -split_col <- strsplit(as.character(sites$V5), ""); -sites$col1 <- sapply(split_col, "[", 1); -sites$col2 <- sapply(split_col, "[", 2); -sites$col3 <- sapply(split_col, "[", 3); -sites$col4 <- sapply(split_col, "[", 4); -sites$V5 <- NULL; -header <- c('chrom', 'pos', 'ref', 'alt', algorithms); -colnames(sites) <- header -variants <- paste(sites$chrom, sites$pos, sep = '_'); -tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) -tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)] - -VennDiagram::venn.diagram( - tool.variants.ordered, - filename = generate.filename(args$dataset, 'Venn-diagram', 'tiff' ), - fill = c("orange", "red", "green", "blue"), - lty = "dashed", - cex = 1, - cat.cex = 0.8, - cat.col = 'black' - ); From b1d6f5feb20a3b765e49d5a346fe8e61ef544d56 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 30 Jun 2023 14:59:52 -0700 Subject: [PATCH 05/16] lintr errors --- r-scripts/plot-venn.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R index 1537a5b7..464838fc 100644 --- a/r-scripts/plot-venn.R +++ b/r-scripts/plot-venn.R @@ -25,11 +25,11 @@ algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); algorithms <- gsub('-.*', '', algorithms); sites <- read.table(paste0(args$isec_dir,'/sites.txt'), header = FALSE, colClasses = 'character'); -split_col <- strsplit(as.character(sites$V5), ""); -sites$col1 <- sapply(split_col, "[", 1); -sites$col2 <- sapply(split_col, "[", 2); -sites$col3 <- sapply(split_col, "[", 3); -sites$col4 <- sapply(split_col, "[", 4); +split.col <- strsplit(as.character(sites$V5), ''); +sites$col1 <- sapply(split.col, '[', 1); +sites$col2 <- sapply(split.col, '[', 2); +sites$col3 <- sapply(split.col, '[', 3); +sites$col4 <- sapply(split.col, '[', 4); sites$V5 <- NULL; header <- c('chrom', 'pos', 'ref', 'alt', algorithms); colnames(sites) <- header @@ -40,8 +40,8 @@ tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing VennDiagram::venn.diagram( tool.variants.ordered, filename = generate.filename(args$dataset, 'Venn-diagram', 'tiff' ), - fill = c("orange", "red", "green", "blue"), - lty = "dashed", + fill = c('orange', 'red', 'green', 'blue'), + lty = 'dashed', cex = 1, cat.cex = 0.8, cat.col = 'black' From 86e1af52c11f05c9fa86b6d758145500c46450c5 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 30 Jun 2023 17:41:43 -0700 Subject: [PATCH 06/16] debug --- config/default.config | 4 ++-- module/intersect-processes.nf | 7 +++---- module/intersect.nf | 2 +- test/config/a_mini-all-tools.config | 1 - 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/config/default.config b/config/default.config index 00313b9b..852b7b36 100644 --- a/config/default.config +++ b/config/default.config @@ -23,7 +23,7 @@ params { manta_version = "1.6.0" MuSE_version = "2.0" BCFtools_version = "1.17" - call_ssnv_r_version = "${manifest.version}" + call_ssnv_r_version = "${->workflow.manifest.version}" docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" @@ -33,7 +33,7 @@ params { docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}" - docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" + docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${->params.call_ssnv_r_version}" } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 26429064..1308030a 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -34,7 +34,7 @@ process intersect_VCFs_BCFtools { path "*.vcf.gz.tbi", emit: consensus_idx path ".command.*" path "isec-2-or-more" - path "isec-1-or-more" emit: isec_dir + path "isec-1-or-more", emit: isec_dir script: vcf_list = vcfs.join(' ') @@ -46,9 +46,8 @@ process intersect_VCFs_BCFtools { """ } - -process plot_venn_R - container test-rvenn +process plot_venn_R { + container 'test-rvenn' publishDir path: "${params.workflow_output_dir}/output", mode: "copy", pattern: "*.tiff" diff --git a/module/intersect.nf b/module/intersect.nf index ce2fd046..1d5b016f 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,5 @@ include { generate_sha512sum } from './common' -include { intersect_VCFs_BCFtools } from './intersect-processes.nf' +include { intersect_VCFs_BCFtools; plot_venn_R } from './intersect-processes.nf' workflow intersect { // pass bin directory in project folder as channel into docker diff --git a/test/config/a_mini-all-tools.config b/test/config/a_mini-all-tools.config index 60d71945..ed70c7b6 100644 --- a/test/config/a_mini-all-tools.config +++ b/test/config/a_mini-all-tools.config @@ -38,5 +38,4 @@ params { // MuSE options dbSNP = '/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz' } - methods.setup() From dd08db6db6ae95f91d617c783079185506fd4261 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sat, 1 Jul 2023 15:05:51 -0700 Subject: [PATCH 07/16] minor edits --- Dockerfile | 2 +- r-scripts/plot-venn.R | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 804e72b2..3a32673b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ COPY --from=builder /usr/local /usr/local RUN groupadd -g 500001 bldocker \ && useradd -r -u 500001 -g bldocker bldocker -# where's a better place to get this package? +# BoutrosLab.utilities_1.9.10.tar.gz is soft-linked to /hot/resource/R-package/Bout...gz COPY r-scripts/BoutrosLab.utilities_1.9.10.tar.gz /usr/src RUN R -e "install.packages('BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" # RUN rm BoutrosLab.utilities_1.9.10.tar.gz diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R index 464838fc..bfb87186 100644 --- a/r-scripts/plot-venn.R +++ b/r-scripts/plot-venn.R @@ -1,5 +1,10 @@ # Script to plot the Venn diagram of the intersection of the VCF files # Initial commit: Sorel Fitz-Gibbon 2023-06-29 +# Input: +# -i, --isec_dir: The directory containing the output from BCFtools intersect +# -d, --dataset: The dataset ID passed from nextflow +# Output: +# - A Venn diagram of the intersection of the VCF files ## 1. Setup the environment ######################################################################## library('argparse'); From 58fd8b04f7e00f45e1fa2fc9d5ef27144137196e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 3 Jul 2023 15:07:14 -0700 Subject: [PATCH 08/16] docker call-ssnv-r integrated --- Dockerfile | 2 +- module/intersect-processes.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a32673b..54c3884d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN groupadd -g 500001 bldocker \ # BoutrosLab.utilities_1.9.10.tar.gz is soft-linked to /hot/resource/R-package/Bout...gz COPY r-scripts/BoutrosLab.utilities_1.9.10.tar.gz /usr/src -RUN R -e "install.packages('BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" +RUN R -e "install.packages('/usr/src/BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" # RUN rm BoutrosLab.utilities_1.9.10.tar.gz # Change the default user to bldocker from root diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 1308030a..8230b522 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -47,7 +47,7 @@ process intersect_VCFs_BCFtools { } process plot_venn_R { - container 'test-rvenn' + container params.docker_image_r_scripts publishDir path: "${params.workflow_output_dir}/output", mode: "copy", pattern: "*.tiff" From cc603878df0a264bff66b344862be4f066d28da9 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 7 Jul 2023 11:49:36 -0700 Subject: [PATCH 09/16] add regions to plot script and start per chr QC --- config/default.config | 2 +- module/intersect-processes.nf | 5 ++-- module/intersect.nf | 3 ++- r-scripts/plot-venn.R | 50 +++++++++++++++++++++++------------ 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/config/default.config b/config/default.config index 852b7b36..f90d4bb1 100644 --- a/config/default.config +++ b/config/default.config @@ -23,7 +23,7 @@ params { manta_version = "1.6.0" MuSE_version = "2.0" BCFtools_version = "1.17" - call_ssnv_r_version = "${->workflow.manifest.version}" + call_ssnv_r_version = "dev" docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 8230b522..c150addd 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -57,8 +57,9 @@ process plot_venn_R { saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } input: - path isec_dir path script_dir + path isec_dir + path call_region output: path ".command.*" @@ -67,6 +68,6 @@ process plot_venn_R { script: """ set -euo pipefail - Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset params.dataset_id + Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset ${params.dataset_id} --regions ${params.call_region} """ } diff --git a/module/intersect.nf b/module/intersect.nf index 1d5b016f..1b8238f9 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -25,7 +25,8 @@ workflow intersect { ) generate_sha512sum(file_for_sha512) plot_venn_R( + script_dir_ch, intersect_VCFs_BCFtools.out.isec_dir, - script_dir_ch + params.call_region ) } diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R index bfb87186..55264366 100644 --- a/r-scripts/plot-venn.R +++ b/r-scripts/plot-venn.R @@ -3,28 +3,42 @@ # Input: # -i, --isec_dir: The directory containing the output from BCFtools intersect # -d, --dataset: The dataset ID passed from nextflow +# -r, --regions: call regions (for single chromosome QC plotting) # Output: # - A Venn diagram of the intersection of the VCF files -## 1. Setup the environment ######################################################################## +## Setup the environment ########################################################################### library('argparse'); library('BoutrosLab.utilities'); library('VennDiagram'); # tmp: for testing #args <- list(); -#args$path <- '/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/sfitz-intersect-vcfs/call-sSNV-6.0.0/CPCG0000000196-T001-P01' -#args$valid_algorithm <- 'NULL'; -#args$dataset.id <- 'CPCG'; -#args$output.dir <- paste(getwd(), paste(gsub(' |:', '-', Sys.time()), 'VCF-Comparison-Result', sep = '_'), sep = '/'); +#args$isec_dir <- 'isec-1-or-more'; +#args$dataset<- 'CPCG'; +#args$regions_file <- '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'; -## 2. Parse the arguments ########################################################################## +## Parse the arguments ############################################################################# parser <- ArgumentParser(); parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); +parser$add_argument('-r', '--regions', help = 'call regions (for single chromosome QC plotting)', type = 'character'); args <- parser$parse_args(); -### Main ########################################################## +## Function: plot venn diagram ##################################################################### +plot.venn <- function(tool.variants, outfile) { + VennDiagram::venn.diagram( + tool.variants.ordered, + filename = outfile, + fill = c('orange', 'red', 'green', 'blue'), + lty = 'dashed', + cex = 1, + cat.cex = 0.8, + cat.col = 'black' + ); + } + +### Main ########################################################################################### algorithms <- readLines(paste0(args$isec_dir,'/README.txt')); algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); @@ -40,14 +54,16 @@ header <- c('chrom', 'pos', 'ref', 'alt', algorithms); colnames(sites) <- header variants <- paste(sites$chrom, sites$pos, sep = '_'); tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) -tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)] +tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)]; +plot.venn(tool.variants.ordered, generate.filename(args$dataset, 'Venn-diagram', 'tiff')); -VennDiagram::venn.diagram( - tool.variants.ordered, - filename = generate.filename(args$dataset, 'Venn-diagram', 'tiff' ), - fill = c('orange', 'red', 'green', 'blue'), - lty = 'dashed', - cex = 1, - cat.cex = 0.8, - cat.col = 'black' - ); +## Single chromosome QC ############################################################################ +regions <- read.table(args$regions, header = FALSE, colClasses = c('character', 'NULL', 'NULL')); +# keep tools ordered by number of variants in full set +tool.order <- names(tool.variants.ordered); +for chr in unique(regions$V1) { + chr.sites <- sites[sites$chrom == chr,]; + chr.variants <- paste(chr.sites$chrom, chr.sites$pos, sep = '_'); + chr.tool.variants <- lapply(chr.sites[, tool.order], function(x) chr.variants[x == 1]) + #... heatmap + } From 8450b7f752e18d5081feb246310febba4e9ba816 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 25 Jul 2023 15:23:50 -0700 Subject: [PATCH 10/16] moving per chr QC to another PR. still need to get BoutrosLab.utilities from github --- CHANGELOG.md | 7 ++---- config/default.config | 6 ++--- module/intersect-processes.nf | 43 +++++++++++++++++------------------ module/intersect.nf | 1 - r-scripts/plot-venn.R | 24 +++---------------- 5 files changed, 28 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41d201da..27b2df33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add variant intersection Venn diagram - Add regions filter to variant intersections - Add second BCFtools step to create full presence/absence variant table (including private) - Add workflow to create a `consensus.vcf` that includes SNVs found by two or more variant callers @@ -14,12 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels ### Changed -- Keep `bam-readcount` output in `SomaticSniper` QC folder -- reconfigure call_regions to intersect_regions -- Update to BCFtools v1.17 -======= - reconfigure intersect_regions to use all contigs except `decoy` -- reconfigure call_regions to intersect_regions +- reconfigure call_regions to intersect_regions - Update to BCFtools v1.17 - Keep `bam-readcount` output in `SomaticSniper` QC folder - Update `MuSE` to `v2.0.2` diff --git a/config/default.config b/config/default.config index cc9d3514..4f3a2d12 100644 --- a/config/default.config +++ b/config/default.config @@ -23,6 +23,7 @@ params { manta_version = "1.6.0" MuSE_version = "2.0.2" BCFtools_version = "1.17" + call_ssnv_r_version = "dev" docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" @@ -32,9 +33,7 @@ params { docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}" - docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${->params.call_ssnv_r_version}" - - + docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" } docker { @@ -44,7 +43,6 @@ docker { runOptions = "${uid_and_gid} ${all_group_ids}" } - process { executor = 'local' } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 02743757..fb902c1b 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -50,28 +50,27 @@ process intersect_VCFs_BCFtools { """ } -process plot_venn_R { - container params.docker_image_r_scripts - publishDir path: "${params.workflow_output_dir}/output", - mode: "copy", - pattern: "*.tiff" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } + process plot_venn_R { + container params.docker_image_r_scripts + publishDir path: "${params.workflow_output_dir}/output", + mode: "copy", + pattern: "*.tiff" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } - input: - path script_dir - path isec_dir - path call_region + input: + path script_dir + path isec_dir - output: - path ".command.*" - path "*.tiff" + output: + path ".command.*" + path "*.tiff" - script: - """ - set -euo pipefail - Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset ${params.dataset_id} --regions ${params.call_region} - """ - } + script: + """ + set -euo pipefail + Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset ${params.dataset_id} + """ + } diff --git a/module/intersect.nf b/module/intersect.nf index 4cd90475..c6d0da24 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -27,6 +27,5 @@ workflow intersect { plot_venn_R( script_dir_ch, intersect_VCFs_BCFtools.out.isec_dir, - params.call_region ) } diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R index 55264366..c51ff605 100644 --- a/r-scripts/plot-venn.R +++ b/r-scripts/plot-venn.R @@ -1,28 +1,20 @@ -# Script to plot the Venn diagram of the intersection of the VCF files +# Script to plot a Venn diagram of shared variants from the different SNV calling algorithms, using the output of BCFtools isec # Initial commit: Sorel Fitz-Gibbon 2023-06-29 # Input: # -i, --isec_dir: The directory containing the output from BCFtools intersect # -d, --dataset: The dataset ID passed from nextflow -# -r, --regions: call regions (for single chromosome QC plotting) # Output: -# - A Venn diagram of the intersection of the VCF files +# - A Venn diagram of shared variant counts from the BCFtools intersection of the VCF files ## Setup the environment ########################################################################### library('argparse'); library('BoutrosLab.utilities'); library('VennDiagram'); -# tmp: for testing -#args <- list(); -#args$isec_dir <- 'isec-1-or-more'; -#args$dataset<- 'CPCG'; -#args$regions_file <- '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'; - ## Parse the arguments ############################################################################# parser <- ArgumentParser(); parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); -parser$add_argument('-r', '--regions', help = 'call regions (for single chromosome QC plotting)', type = 'character'); args <- parser$parse_args(); ## Function: plot venn diagram ##################################################################### @@ -39,6 +31,7 @@ plot.venn <- function(tool.variants, outfile) { } ### Main ########################################################################################### +# Get intersection counts from BCFtools isec output and format for plotting algorithms <- readLines(paste0(args$isec_dir,'/README.txt')); algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)]; algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms); @@ -56,14 +49,3 @@ variants <- paste(sites$chrom, sites$pos, sep = '_'); tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)]; plot.venn(tool.variants.ordered, generate.filename(args$dataset, 'Venn-diagram', 'tiff')); - -## Single chromosome QC ############################################################################ -regions <- read.table(args$regions, header = FALSE, colClasses = c('character', 'NULL', 'NULL')); -# keep tools ordered by number of variants in full set -tool.order <- names(tool.variants.ordered); -for chr in unique(regions$V1) { - chr.sites <- sites[sites$chrom == chr,]; - chr.variants <- paste(chr.sites$chrom, chr.sites$pos, sep = '_'); - chr.tool.variants <- lapply(chr.sites[, tool.order], function(x) chr.variants[x == 1]) - #... heatmap - } From 1e9591819b8fbb632b3ea9602d5c66a43630adfd Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 1 Aug 2023 08:25:12 -0700 Subject: [PATCH 11/16] still working on Dockerfile --- Dockerfile | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 54c3884d..99eb9991 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,15 +12,43 @@ RUN mamba create -qy -p /usr/local \ # Copy from builder into final image FROM ubuntu:${UBUNTU_VERSION} AS final COPY --from=builder /usr/local /usr/local +ARG DEBIAN_FRONTEND=noninteractive # Add a new user/group called bldocker RUN groupadd -g 500001 bldocker \ && useradd -r -u 500001 -g bldocker bldocker -# BoutrosLab.utilities_1.9.10.tar.gz is soft-linked to /hot/resource/R-package/Bout...gz -COPY r-scripts/BoutrosLab.utilities_1.9.10.tar.gz /usr/src -RUN R -e "install.packages('/usr/src/BoutrosLab.utilities_1.9.10.tar.gz', repos = NULL, type = 'source')" -# RUN rm BoutrosLab.utilities_1.9.10.tar.gz +RUN apt-get update && \ + apt-get install -y --no-install-recommends libxml2 libxml2-dev libcurl4-gnutls-dev build-essential \ + libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev r-cran-rgl git libssl-dev r-cran-curl && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN R -q -e 'install.packages("devtools", dependencies = TRUE)' && \ + R -q -e 'devtools::install_github("uclahs-cds/public-R-BoutrosLab-utilities")' +## Install required tools to build R packages +#RUN apt-get update && apt-get install -y \ +# --no-install-recommends \ +# build-essential \ +# libcurl4-gnutls-dev \ +# libxml2-dev \ +# libssl-dev \ + +# +## Install required tools to build R packages +#RUN apt-get update && apt-get install -y \ +# --no-install-recommends \ +# build-essential \ +# libcurl4-gnutls-dev \ +# libxml2-dev \ +# libssl-dev \ +# git \ +# +## Clone BoutrosLab.utilities repository +#RUN git clone https://github.com/uclahs-cds/public-R-BoutrosLab-utilities.git /usr/src/BoutrosLab.utilities +# +## Build and install the package from the cloned repository +#RUN R CMD build /usr/src/BoutrosLab.utilities +#RUN R CMD INSTALL /usr/src/BoutrosLab.utilities/BoutrosLab.utilities_*.tar.gz # Change the default user to bldocker from root USER bldocker From 874f15c92c8003b3e5a6028fe23193eb49688437 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 1 Aug 2023 16:29:14 -0700 Subject: [PATCH 12/16] fix dockerfile and add comments to main.nf --- Dockerfile | 48 ++++++++++-------------------------------------- main.nf | 3 +++ 2 files changed, 13 insertions(+), 38 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99eb9991..bc6f8f86 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,52 +3,24 @@ ARG UBUNTU_VERSION=20.04 FROM condaforge/mambaforge:${MINIFORGE_VERSION} AS builder -RUN mamba create -qy -p /usr/local \ - 'r-base>=4.2.1' \ - # R packages - r-argparse \ - r-VennDiagram - # Copy from builder into final image -FROM ubuntu:${UBUNTU_VERSION} AS final +FROM ubuntu:${UBUNTU_VERSION} COPY --from=builder /usr/local /usr/local -ARG DEBIAN_FRONTEND=noninteractive -# Add a new user/group called bldocker -RUN groupadd -g 500001 bldocker \ - && useradd -r -u 500001 -g bldocker bldocker +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get install -y --no-install-recommends libxml2 libxml2-dev libcurl4-gnutls-dev build-essential \ + apt-get install -y --no-install-recommends libxml2 libxml2-dev libcurl4-gnutls-dev build-essential r-base r-base-dev \ libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev r-cran-rgl git libssl-dev r-cran-curl && \ + git libssl-dev r-cran-curl && \ apt-get clean && rm -rf /var/lib/apt/lists/* -RUN R -q -e 'install.packages("devtools", dependencies = TRUE)' && \ - R -q -e 'devtools::install_github("uclahs-cds/public-R-BoutrosLab-utilities")' -## Install required tools to build R packages -#RUN apt-get update && apt-get install -y \ -# --no-install-recommends \ -# build-essential \ -# libcurl4-gnutls-dev \ -# libxml2-dev \ -# libssl-dev \ - -# -## Install required tools to build R packages -#RUN apt-get update && apt-get install -y \ -# --no-install-recommends \ -# build-essential \ -# libcurl4-gnutls-dev \ -# libxml2-dev \ -# libssl-dev \ -# git \ -# -## Clone BoutrosLab.utilities repository -#RUN git clone https://github.com/uclahs-cds/public-R-BoutrosLab-utilities.git /usr/src/BoutrosLab.utilities -# -## Build and install the package from the cloned repository -#RUN R CMD build /usr/src/BoutrosLab.utilities -#RUN R CMD INSTALL /usr/src/BoutrosLab.utilities/BoutrosLab.utilities_*.tar.gz +RUN R -q -e 'install.packages(c("devtools", "argparse", "VennDiagram"))' +RUN R -q -e 'devtools::install_github("uclahs-cds/public-R-BoutrosLab-utilities")' + +# Add a new user/group called bldocker +RUN groupadd -g 500001 bldocker \ + && useradd -r -u 500001 -g bldocker bldocker # Change the default user to bldocker from root USER bldocker diff --git a/main.nf b/main.nf index 5b536791..82745a97 100755 --- a/main.nf +++ b/main.nf @@ -152,6 +152,7 @@ workflow { run_GetSampleName_Mutect2_tumor(tumor_input.tumor_bam) } + // Set empty channels so any unused tools don't cause failure at intersect step Channel.empty().set { somaticsniper_vcf_ch } Channel.empty().set { strelka2_vcf_ch } Channel.empty().set { mutect2_vcf_ch } @@ -209,6 +210,8 @@ workflow { muse.out.vcf.set { muse_vcf_ch } muse.out.idx.set { muse_idx_ch } } + + // Intersect all vcf files if (params.algorithm.size() > 1) { tool_vcfs = (somaticsniper_vcf_ch .mix(strelka2_vcf_ch) From 29e4fb381d91b7859052adf62785632d742e220f Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 2 Aug 2023 11:54:57 -0700 Subject: [PATCH 13/16] move and update Dockerfile --- Dockerfile => docker/plot-venn/Dockerfile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) rename Dockerfile => docker/plot-venn/Dockerfile (77%) diff --git a/Dockerfile b/docker/plot-venn/Dockerfile similarity index 77% rename from Dockerfile rename to docker/plot-venn/Dockerfile index bc6f8f86..ef47a4a3 100644 --- a/Dockerfile +++ b/docker/plot-venn/Dockerfile @@ -1,17 +1,11 @@ -ARG MINIFORGE_VERSION=22.9.0-2 ARG UBUNTU_VERSION=20.04 - -FROM condaforge/mambaforge:${MINIFORGE_VERSION} AS builder - -# Copy from builder into final image FROM ubuntu:${UBUNTU_VERSION} -COPY --from=builder /usr/local /usr/local ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends libxml2 libxml2-dev libcurl4-gnutls-dev build-essential r-base r-base-dev \ - libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev r-cran-rgl git libssl-dev r-cran-curl && \ + libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev r-cran-rgl \ git libssl-dev r-cran-curl && \ apt-get clean && rm -rf /var/lib/apt/lists/* From 887ab0b3e8af1249fd91617f56bb902061267b8e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 2 Aug 2023 11:57:13 -0700 Subject: [PATCH 14/16] update docker image parameter name --- config/default.config | 2 +- module/intersect-processes.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/default.config b/config/default.config index 4f3a2d12..01b47390 100644 --- a/config/default.config +++ b/config/default.config @@ -33,7 +33,7 @@ params { docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}" - docker_image_r_scripts = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" + docker_image_r_VennDiagram = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}" } docker { diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index fb902c1b..d5f5b700 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -4,7 +4,7 @@ log.info """\ ==================================== Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} -- docker_image_r_scripts: ${params.docker_image_r_scripts} +- docker_image_r_scripts: ${params.docker_image_r_VennDiagram} ==================================== """ process intersect_VCFs_BCFtools { @@ -51,7 +51,7 @@ process intersect_VCFs_BCFtools { } process plot_venn_R { - container params.docker_image_r_scripts + container params.docker_image_r_VennDiagram publishDir path: "${params.workflow_output_dir}/output", mode: "copy", pattern: "*.tiff" From d003dd7c7c468adece389cc026a36d86d307c734 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 2 Aug 2023 12:02:11 -0700 Subject: [PATCH 15/16] plot_venn_R -> plot_VennDiagram_R --- module/intersect-processes.nf | 2 +- module/intersect.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index d5f5b700..6a8edf8a 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -50,7 +50,7 @@ process intersect_VCFs_BCFtools { """ } - process plot_venn_R { + process plot_VennDiagram_R { container params.docker_image_r_VennDiagram publishDir path: "${params.workflow_output_dir}/output", mode: "copy", diff --git a/module/intersect.nf b/module/intersect.nf index c6d0da24..cb4e2c7b 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,5 @@ include { generate_sha512sum } from './common' -include { intersect_VCFs_BCFtools; plot_venn_R } from './intersect-processes.nf' +include { intersect_VCFs_BCFtools; plot_VennDiagram_R } from './intersect-processes.nf' workflow intersect { // pass bin directory in project folder as channel into docker @@ -24,7 +24,7 @@ workflow intersect { .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]} ) generate_sha512sum(file_for_sha512) - plot_venn_R( + plot_VennDiagram_R( script_dir_ch, intersect_VCFs_BCFtools.out.isec_dir, ) From 33a591568d814de0d847dcff4977744ecb0da262 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 2 Aug 2023 14:56:25 -0700 Subject: [PATCH 16/16] rm BL-utils and move script_dir_ch definition --- config/F2.config | 2 +- docker/plot-venn/Dockerfile | 18 +++++++++--------- main.nf | 16 +++++++++++++--- module/intersect-processes.nf | 4 ++-- module/intersect.nf | 4 +--- r-scripts/plot-venn.R | 5 ++--- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/config/F2.config b/config/F2.config index b7334aec..aaea5be5 100644 --- a/config/F2.config +++ b/config/F2.config @@ -1,4 +1,4 @@ -// Other processes after create_IndelCandidate_SAMtools will only run one at a time, so +// Other processes will only run one at a time, so // we don't need to control their resources. // The configuration below forces processes to run one at a time by needing // the total memory available on a lowmem node. diff --git a/docker/plot-venn/Dockerfile b/docker/plot-venn/Dockerfile index ef47a4a3..12e5afcf 100644 --- a/docker/plot-venn/Dockerfile +++ b/docker/plot-venn/Dockerfile @@ -1,16 +1,16 @@ +ARG MINIFORGE_VERSION=22.9.0-2 ARG UBUNTU_VERSION=20.04 -FROM ubuntu:${UBUNTU_VERSION} -ARG DEBIAN_FRONTEND=noninteractive +FROM condaforge/mambaforge:${MINIFORGE_VERSION} AS builder -RUN apt-get update && \ - apt-get install -y --no-install-recommends libxml2 libxml2-dev libcurl4-gnutls-dev build-essential r-base r-base-dev \ - libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev r-cran-rgl \ - git libssl-dev r-cran-curl && \ - apt-get clean && rm -rf /var/lib/apt/lists/* +RUN mamba create -qy -p /usr/local \ + 'r-base>=4.2.1' \ + r-argparse \ + r-VennDiagram -RUN R -q -e 'install.packages(c("devtools", "argparse", "VennDiagram"))' -RUN R -q -e 'devtools::install_github("uclahs-cds/public-R-BoutrosLab-utilities")' +# Copy from builder into final image +FROM ubuntu:${UBUNTU_VERSION} AS final +COPY --from=builder /usr/local /usr/local # Add a new user/group called bldocker RUN groupadd -g 500001 bldocker \ diff --git a/main.nf b/main.nf index 82745a97..d55b9b3b 100755 --- a/main.nf +++ b/main.nf @@ -84,8 +84,11 @@ include { muse } from './module/muse' addParams( include { intersect } from './module/intersect' addParams( workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}", - workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}" - ) + workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}", + output_filename: generate_standard_filename("Consensus", + params.dataset_id, + params.sample_id, + [:])) // Returns the index file for the given bam or vcf def indexFile(bam_or_vcf) { @@ -117,12 +120,18 @@ Channel } .set { normal_input } + script_dir_ch = Channel.fromPath( + "$projectDir/r-scripts", + checkIfExists: true + ) + workflow { reference_ch = Channel.from( params.reference, params.reference_index, params.reference_dict ) + // Input file validation if (params.tumor_only_mode) { file_to_validate = reference_ch @@ -227,7 +236,8 @@ workflow { intersect( tool_vcfs, - tool_indices + tool_indices, + script_dir_ch ) } } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 6a8edf8a..91e50374 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -4,7 +4,7 @@ log.info """\ ==================================== Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} -- docker_image_r_scripts: ${params.docker_image_r_VennDiagram} +- docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram} ==================================== """ process intersect_VCFs_BCFtools { @@ -71,6 +71,6 @@ process intersect_VCFs_BCFtools { script: """ set -euo pipefail - Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset ${params.dataset_id} + Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --outfile ${params.output_filename}_Venn-diagram.tiff """ } diff --git a/module/intersect.nf b/module/intersect.nf index cb4e2c7b..c0084dec 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -2,12 +2,10 @@ include { generate_sha512sum } from './common' include { intersect_VCFs_BCFtools; plot_VennDiagram_R } from './intersect-processes.nf' workflow intersect { - // pass bin directory in project folder as channel into docker - script_dir_ch = Channel.fromPath("$projectDir/r-scripts", checkIfExists: true) - take: tool_vcfs tool_indices + script_dir_ch main: intersect_VCFs_BCFtools( diff --git a/r-scripts/plot-venn.R b/r-scripts/plot-venn.R index c51ff605..501264e7 100644 --- a/r-scripts/plot-venn.R +++ b/r-scripts/plot-venn.R @@ -8,13 +8,12 @@ ## Setup the environment ########################################################################### library('argparse'); -library('BoutrosLab.utilities'); library('VennDiagram'); ## Parse the arguments ############################################################################# parser <- ArgumentParser(); parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character'); -parser$add_argument('-d', '--dataset', help = 'The dataset ID passed from nextflow', type = 'character'); +parser$add_argument('-o', '--outfile', help = 'Output filename', type = 'character'); args <- parser$parse_args(); ## Function: plot venn diagram ##################################################################### @@ -48,4 +47,4 @@ colnames(sites) <- header variants <- paste(sites$chrom, sites$pos, sep = '_'); tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1]) tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)]; -plot.venn(tool.variants.ordered, generate.filename(args$dataset, 'Venn-diagram', 'tiff')); +plot.venn(tool.variants.ordered, args$outfile);