Skip to content

Commit

Permalink
Merge pull request #218 from uclahs-cds/sfitz-plot-intersections
Browse files Browse the repository at this point in the history
Sfitz plot intersections
  • Loading branch information
sorelfitzgibbon authored Aug 2, 2023
2 parents 33dc9e1 + 33a5915 commit f70f244
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Add variant intersection Venn diagram
- Add regions filter to variant intersections
- Add second BCFtools step to create full presence/absence variant table (including private)
- Add workflow to create a `consensus.vcf` that includes SNVs found by two or more variant callers
Expand Down
2 changes: 1 addition & 1 deletion config/F2.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Other processes after create_IndelCandidate_SAMtools will only run one at a time, so
// Other processes will only run one at a time, so
// we don't need to control their resources.
// The configuration below forces processes to run one at a time by needing
// the total memory available on a lowmem node.
Expand Down
4 changes: 2 additions & 2 deletions config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
manta_version = "1.6.0"
MuSE_version = "2.0.2"
BCFtools_version = "1.17"
call_ssnv_r_version = "dev"
docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}"
docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}"
docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}"
Expand All @@ -32,7 +33,7 @@ params {
docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}"
docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}"
docker_image_BCFtools = "${-> params.docker_container_registry}/bcftools:${params.BCFtools_version}"

docker_image_r_VennDiagram = "${-> params.docker_container_registry}/call-ssnv-r:${params.call_ssnv_r_version}"
}

docker {
Expand All @@ -42,7 +43,6 @@ docker {
runOptions = "${uid_and_gid} ${all_group_ids}"
}


process {
executor = 'local'
}
23 changes: 23 additions & 0 deletions docker/plot-venn/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
ARG MINIFORGE_VERSION=22.9.0-2
ARG UBUNTU_VERSION=20.04

FROM condaforge/mambaforge:${MINIFORGE_VERSION} AS builder

RUN mamba create -qy -p /usr/local \
'r-base>=4.2.1' \
r-argparse \
r-VennDiagram

# Copy from builder into final image
FROM ubuntu:${UBUNTU_VERSION} AS final
COPY --from=builder /usr/local /usr/local

# Add a new user/group called bldocker
RUN groupadd -g 500001 bldocker \
&& useradd -r -u 500001 -g bldocker bldocker

# Change the default user to bldocker from root
USER bldocker

LABEL maintainer="Sorel Fitz-Gibbon <sfitzgibbon@mednet.ucla.edu>"
LABEL org.opencontainers.image.source=https://github.com/uclahs-cds/call-ssnv-r
19 changes: 16 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ include { muse } from './module/muse' addParams(

include { intersect } from './module/intersect' addParams(
workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}"
)
workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}",
output_filename: generate_standard_filename("Consensus",
params.dataset_id,
params.sample_id,
[:]))

// Returns the index file for the given bam or vcf
def indexFile(bam_or_vcf) {
Expand Down Expand Up @@ -117,12 +120,18 @@ Channel
}
.set { normal_input }

script_dir_ch = Channel.fromPath(
"$projectDir/r-scripts",
checkIfExists: true
)

workflow {
reference_ch = Channel.from(
params.reference,
params.reference_index,
params.reference_dict
)

// Input file validation
if (params.tumor_only_mode) {
file_to_validate = reference_ch
Expand Down Expand Up @@ -152,6 +161,7 @@ workflow {
run_GetSampleName_Mutect2_tumor(tumor_input.tumor_bam)
}

// Set empty channels so any unused tools don't cause failure at intersect step
Channel.empty().set { somaticsniper_vcf_ch }
Channel.empty().set { strelka2_vcf_ch }
Channel.empty().set { mutect2_vcf_ch }
Expand Down Expand Up @@ -209,6 +219,8 @@ workflow {
muse.out.vcf.set { muse_vcf_ch }
muse.out.idx.set { muse_idx_ch }
}

// Intersect all vcf files
if (params.algorithm.size() > 1) {
tool_vcfs = (somaticsniper_vcf_ch
.mix(strelka2_vcf_ch)
Expand All @@ -224,7 +236,8 @@ workflow {

intersect(
tool_vcfs,
tool_indices
tool_indices,
script_dir_ch
)
}
}
31 changes: 28 additions & 3 deletions module/intersect-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ log.info """\
====================================
Docker Images:
- docker_image_BCFtools: ${params.docker_image_BCFtools}
- docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram}
====================================
"""
process intersect_VCFs_BCFtools {
container params.docker_image_BCFtools
Expand Down Expand Up @@ -33,8 +34,7 @@ process intersect_VCFs_BCFtools {
path "*.vcf.gz.tbi", emit: consensus_idx
path ".command.*"
path "isec-2-or-more"
path "isec-1-or-more/sites.txt"
path "isec-1-or-more/README.txt"
path "isec-1-or-more", emit: isec_dir

script:
vcf_list = vcfs.join(' ')
Expand All @@ -49,3 +49,28 @@ process intersect_VCFs_BCFtools {
bcftools isec --output-type z --prefix isec-1-or-more ${regions_command} ${vcf_list}
"""
}

process plot_VennDiagram_R {
container params.docker_image_r_VennDiagram
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "*.tiff"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" }

input:
path script_dir
path isec_dir

output:
path ".command.*"
path "*.tiff"

script:
"""
set -euo pipefail
Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --outfile ${params.output_filename}_Venn-diagram.tiff
"""
}
7 changes: 6 additions & 1 deletion module/intersect.nf
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
include { generate_sha512sum } from './common'
include { intersect_VCFs_BCFtools } from './intersect-processes.nf'
include { intersect_VCFs_BCFtools; plot_VennDiagram_R } from './intersect-processes.nf'

workflow intersect {
take:
tool_vcfs
tool_indices
script_dir_ch

main:
intersect_VCFs_BCFtools(
Expand All @@ -21,4 +22,8 @@ workflow intersect {
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]}
)
generate_sha512sum(file_for_sha512)
plot_VennDiagram_R(
script_dir_ch,
intersect_VCFs_BCFtools.out.isec_dir,
)
}
50 changes: 50 additions & 0 deletions r-scripts/plot-venn.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Script to plot a Venn diagram of shared variants from the different SNV calling algorithms, using the output of BCFtools isec
# Initial commit: Sorel Fitz-Gibbon 2023-06-29
# Input:
# -i, --isec_dir: The directory containing the output from BCFtools intersect
# -d, --dataset: The dataset ID passed from nextflow
# Output:
# - A Venn diagram of shared variant counts from the BCFtools intersection of the VCF files

## Setup the environment ###########################################################################
library('argparse');
library('VennDiagram');

## Parse the arguments #############################################################################
parser <- ArgumentParser();
parser$add_argument('-i', '--isec_dir', help = 'The directory containing the output from BCFtools intersect', type = 'character');
parser$add_argument('-o', '--outfile', help = 'Output filename', type = 'character');
args <- parser$parse_args();

## Function: plot venn diagram #####################################################################
plot.venn <- function(tool.variants, outfile) {
VennDiagram::venn.diagram(
tool.variants.ordered,
filename = outfile,
fill = c('orange', 'red', 'green', 'blue'),
lty = 'dashed',
cex = 1,
cat.cex = 0.8,
cat.col = 'black'
);
}

### Main ###########################################################################################
# Get intersection counts from BCFtools isec output and format for plotting
algorithms <- readLines(paste0(args$isec_dir,'/README.txt'));
algorithms <- algorithms[grep(paste0('^', args$isec_dir), algorithms)];
algorithms <- gsub(paste0(args$isec_dir,'.*\t'), '', algorithms);
algorithms <- gsub('-.*', '', algorithms);
sites <- read.table(paste0(args$isec_dir,'/sites.txt'), header = FALSE, colClasses = 'character');
split.col <- strsplit(as.character(sites$V5), '');
sites$col1 <- sapply(split.col, '[', 1);
sites$col2 <- sapply(split.col, '[', 2);
sites$col3 <- sapply(split.col, '[', 3);
sites$col4 <- sapply(split.col, '[', 4);
sites$V5 <- NULL;
header <- c('chrom', 'pos', 'ref', 'alt', algorithms);
colnames(sites) <- header
variants <- paste(sites$chrom, sites$pos, sep = '_');
tool.variants <- lapply(sites[, algorithms], function(x) variants[x == 1])
tool.variants.ordered <- tool.variants[order(lengths(tool.variants), decreasing = TRUE)];
plot.venn(tool.variants.ordered, args$outfile);
1 change: 0 additions & 1 deletion test/config/a_mini-all-tools.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,4 @@ params {
// MuSE options
dbSNP = '/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz'
}

methods.setup()

0 comments on commit f70f244

Please sign in to comment.