Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sfitz concat vcf #213

Merged
merged 18 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Add concatenation of consensus variants to one VCF
- Add variant intersection Venn diagram
- Add regions filter to variant intersections
- Add second BCFtools step to create full presence/absence variant table (including private)
Expand Down
7 changes: 5 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ include { muse } from './module/muse' addParams(

include { intersect } from './module/intersect' addParams(
workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}"
)
workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}",
output_filename: generate_standard_filename("Intersect",
tyamaguchi-ucla marked this conversation as resolved.
Show resolved Hide resolved
params.dataset_id,
params.sample_id,
[:]))

// Returns the index file for the given bam or vcf
def indexFile(bam_or_vcf) {
Expand Down
30 changes: 30 additions & 0 deletions module/intersect-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,33 @@ process intersect_VCFs_BCFtools {
Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --dataset ${params.dataset_id}
"""
}

process concat_VCFs_BCFtools {
container params.docker_image_BCFtools
publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}",
mode: "copy",
pattern: "*concat.vcf",
enabled: params.save_intermediate_files
Copy link
Contributor Author

@sorelfitzgibbon sorelfitzgibbon Jul 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this intermediate file will be used by vcf2maf (and has to be uncompressed)

publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" }

input:
path vcfs
path indices

output:
path "*concat.vcf", emit: concat_vcf
path ".command.*"

script:
vcf_list = vcfs.join(' ')
"""
set -euo pipefail
# BCFtools concat to create a single VCF with all nfiles +2 variants
# output header is a uniquified concatenation of all headers
# output `INFO` `FORMAT` `NORMAL` and `TUMOR` fields are from the first listed VCF that has the variant
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved
bcftools concat --output-type v --output ${params.output_filename}_SNV-concat.vcf --allow-overlaps --rm-dups all ${vcf_list}
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved
"""
}
30 changes: 25 additions & 5 deletions module/intersect.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
include { generate_sha512sum } from './common'
include { intersect_VCFs_BCFtools; plot_venn_R } from './intersect-processes.nf'
include { intersect_VCFs_BCFtools; plot_venn_R; concat_VCFs_BCFtools } from './intersect-processes.nf'
include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams(
options: [
output_dir: params.workflow_output_dir,
log_output_dir: params.workflow_log_output_dir,
bgzip_extra_args: params.bgzip_extra_args,
tabix_extra_args: params.tabix_extra_args
])

workflow intersect {
// pass bin directory in project folder as channel into docker
Expand All @@ -16,16 +23,29 @@ workflow intersect {
params.intersect_regions,
params.intersect_regions_index
)
plot_venn_R(
script_dir_ch,
intersect_VCFs_BCFtools.out.isec_dir
)
concat_VCFs_BCFtools(
intersect_VCFs_BCFtools.out.consensus_vcf,
intersect_VCFs_BCFtools.out.consensus_idx
)
compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf
.map{ it -> ['SNV', it]}
)
file_for_sha512 = intersect_VCFs_BCFtools.out.consensus_vcf
.flatten()
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]}
.mix(intersect_VCFs_BCFtools.out.consensus_idx
.flatten()
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]}
)
.mix(compress_index_VCF.out.index_out
.map{ it -> ["intersect-${it[0]}-vcf", it[1]] }
)
.mix(compress_index_VCF.out.index_out
.map{ it -> ["intersect-${it[0]}-index", it[2]] }
)
generate_sha512sum(file_for_sha512)
plot_venn_R(
script_dir_ch,
intersect_VCFs_BCFtools.out.isec_dir,
)
}