-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
submitting PR for bwa for dragen duplex
- Loading branch information
Showing
8 changed files
with
475 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"AlignRawReadsBwaAln.CopyUmiTask.bloodbiopsydocker":"${}","AlignRawReadsBwaAln.GetBwaVersion.bwa_path":"/usr/gitc/bwa","AlignRawReadsBwaAln.GetBwaVersion.preemptible_attempts":"${}","AlignRawReadsBwaAln.MBATask.bwa_tool":"bwa","AlignRawReadsBwaAln.MBATask.bwa_version":"0.7.15-r1140","AlignRawReadsBwaAln.MBATask.compression_level":"${workspace.compression_level}","AlignRawReadsBwaAln.MBATask.cpu":"${}","AlignRawReadsBwaAln.MBATask.disk_size":"${250}","AlignRawReadsBwaAln.MBATask.extra_mem":"${}","AlignRawReadsBwaAln.MBATask.gatk_docker":"${}","AlignRawReadsBwaAln.MBATask.mba_extra_args":"${}","AlignRawReadsBwaAln.MBATask.preemptible_tries":"${}","AlignRawReadsBwaAln.MBATask.sort_order":"${}","AlignRawReadsBwaAln.bwa_alignment.cpu":"${8}","AlignRawReadsBwaAln.bwa_alignment.diskSpaceGb":"${500}","AlignRawReadsBwaAln.bwa_alignment.memoryGb":"${32}","AlignRawReadsBwaAln.extract_umis":"${true}","AlignRawReadsBwaAln.gitc_docker":"us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135","AlignRawReadsBwaAln.input_bam":"${this.bam_file}","AlignRawReadsBwaAln.input_bam_index":"${this.bai_file}","AlignRawReadsBwaAln.ref_alt":"${workspace.reference_alt}","AlignRawReadsBwaAln.ref_amb":"${workspace.reference_amb}","AlignRawReadsBwaAln.ref_ann":"${workspace.reference_ann}","AlignRawReadsBwaAln.ref_bwt":"${workspace.reference_bwt}","AlignRawReadsBwaAln.ref_dict":"${workspace.reference_dict}","AlignRawReadsBwaAln.ref_fai":"${workspace.reference_index}","AlignRawReadsBwaAln.ref_fasta":"${workspace.reference}","AlignRawReadsBwaAln.ref_pac":"${workspace.reference_pac}","AlignRawReadsBwaAln.ref_sa":"${workspace.reference_sa}","AlignRawReadsBwaAln.revertsam_task.additional_args":"-RHC false","AlignRawReadsBwaAln.revertsam_task.disk_buffer":"${}","AlignRawReadsBwaAln.revertsam_task.docker_override":"${}","AlignRawReadsBwaAln.revertsam_task.gatk_path":"${}","AlignRawReadsBwaAln.revertsam_task.maxRetries":"${}","AlignRawReadsBwaAln.revertsam_task.mem":"${}","AlignRawReadsBwaAln.revertsam_task.preemptible_count":"${}","AlignRawReadsBwaAln.revertsam_task.sort_order":"${}","AlignRawReadsBwaAln.revertsam_task.threads":"${}","AlignRawReadsBwaAln.sample_name":"${this.sample_id}","AlignRawReadsBwaAln.samtofastq_task.disk_space":"${}","AlignRawReadsBwaAln.samtofastq_task.docker_override":"${}","AlignRawReadsBwaAln.samtofastq_task.gatk_override":"${}","AlignRawReadsBwaAln.samtofastq_task.memory":"${}","AlignRawReadsBwaAln.samtofastq_task.num_preempt":"${0}","AlignRawReadsBwaAln.samtofastq_task.num_threads":"${}","AlignRawReadsBwaAln.sortbam.diskgb_buffer":"${200}"} |
143 changes: 143 additions & 0 deletions
143
Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import "./subworkflows/CopyUmiFromReadName.wdl" as CopyUmiFromReadName | ||
import "./subworkflows/RevertSam.wdl" as RevertSam | ||
import "./subworkflows/BwaAlignment.wdl" as bwa_aln | ||
import "./subworkflows/MergeBamAlignment.wdl" as MergeBamAlignment | ||
import "./subworkflows/SamToFastq.wdl" as samtofastq | ||
|
||
workflow AlignRawReadsBwaAln { | ||
File input_bam | ||
File input_bam_index | ||
Boolean extract_umis | ||
String sample_name | ||
String? gitc_docker | ||
String gitc_docker_or_default = select_first([gitc_docker, "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"]) | ||
File ref_fasta | ||
File ref_fai | ||
File ref_dict | ||
File ref_alt | ||
File ref_amb | ||
File ref_ann | ||
File ref_bwt | ||
File ref_pac | ||
File ref_sa | ||
|
||
call GetBwaVersion { | ||
input: gitc_docker = gitc_docker_or_default | ||
} | ||
|
||
if(extract_umis){ | ||
call CopyUmiFromReadName.CopyUmiTask as CopyUmiTask { | ||
input: bam_file = input_bam, | ||
bam_index = input_bam_index, | ||
base_name = sample_name | ||
} | ||
} | ||
|
||
call RevertSam.RevertSam as revertsam_task { | ||
input: input_bam = select_first([CopyUmiTask.umi_extracted_bam, input_bam]), | ||
base_name = sample_name, | ||
ref_fasta = ref_fasta, | ||
ref_fasta_index = ref_fai, | ||
ref_fasta_dict = ref_dict | ||
} | ||
|
||
call samtofastq.samtofastq as samtofastq_task { | ||
input: input_bam = revertsam_task.output_bam | ||
} | ||
|
||
scatter(i in range(length(samtofastq_task.firstEndFastqs))){ | ||
call bwa_aln.BwaAlignment as bwa_alignment { | ||
input: refFasta = ref_fasta, | ||
refFastaIndex = ref_fai, | ||
refFastaDict = ref_dict, | ||
ref_alt = ref_alt, | ||
ref_amb = ref_amb, | ||
ref_ann = ref_ann, | ||
ref_bwt = ref_bwt, | ||
ref_pac = ref_pac, | ||
ref_sa = ref_sa, | ||
firstEndFastq = samtofastq_task.firstEndFastqs[i], | ||
secondEndFastq = samtofastq_task.secondEndFastqs[i], | ||
sampleName = sample_name, | ||
gitc_docker = gitc_docker_or_default | ||
} | ||
} | ||
|
||
call MergeBamAlignment.MergeBamAlignmentTask as MBATask { | ||
input: mapped_bam = bwa_alignment.raw_aligned_bam, | ||
unmapped_bam = revertsam_task.output_bam, | ||
bwa_commandline = bwa_alignment.bwa_command, | ||
ref_fasta = ref_fasta, | ||
ref_fasta_index = ref_fai, | ||
ref_dict = ref_dict, | ||
output_bam_basename = sample_name | ||
} | ||
|
||
call sortbam { | ||
input: input_bam = MBATask.output_bam, | ||
output_bam_basename = sample_name | ||
} | ||
} | ||
|
||
task GetBwaVersion { | ||
String gitc_docker | ||
String bwa_path | ||
Int? preemptible_attempts | ||
|
||
command { | ||
${bwa_path} 2>&1 | \ | ||
grep -e '^Version' | \ | ||
sed 's/Version: //' | ||
} | ||
runtime { | ||
docker: gitc_docker | ||
memory: "1 GB" | ||
maxRetries: 3 | ||
preemptible: select_first([preemptible_attempts, 2]) | ||
} | ||
output { | ||
String version = read_string(stdout()) | ||
} | ||
} | ||
|
||
task sortbam { | ||
File input_bam | ||
String output_bam_basename | ||
Int? preemptible_tries = 1 | ||
Int? compression_level = 2 | ||
Int? diskgb_buffer | ||
Int diskSpaceGb = 50 + select_first([diskgb_buffer, 0]) | ||
Float? extra_mem | ||
Float memory = 10 + select_first([extra_mem, 0]) | ||
|
||
command <<< | ||
|
||
|
||
set -euxo pipefail | ||
|
||
|
||
java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \ | ||
SortSam \ | ||
INPUT=${input_bam} \ | ||
OUTPUT=${output_bam_basename}.bam \ | ||
SORT_ORDER="coordinate" \ | ||
CREATE_INDEX=true \ | ||
CREATE_MD5_FILE=true \ | ||
MAX_RECORDS_IN_RAM=300000 | ||
|
||
>>> | ||
runtime { | ||
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" | ||
disks: "local-disk ${diskSpaceGb} HDD" | ||
bootDiskSizeGb: 12 | ||
memory: memory + " GB" | ||
preemptible: select_first([preemptible_tries]) | ||
} | ||
|
||
output { | ||
File output_bam = "${output_bam_basename}.bam" | ||
File output_bam_index = "${output_bam_basename}.bai" | ||
File output_bam_md5 = "${output_bam_basename}.bam.md5" | ||
} | ||
|
||
} |
58 changes: 58 additions & 0 deletions
58
Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/BwaAlignment.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
workflow BwaAlignmentTest { | ||
call BwaAlignment | ||
} | ||
|
||
task BwaAlignment { | ||
File refFasta | ||
File refFastaIndex | ||
File refFastaDict | ||
File ref_alt | ||
File ref_amb | ||
File ref_ann | ||
File ref_bwt | ||
File ref_pac | ||
File ref_sa | ||
File firstEndFastq | ||
String fq1 = basename(firstEndFastq) | ||
String basename1 = basename(firstEndFastq, ".fastq.gz") | ||
File secondEndFastq | ||
String fq2 = basename(secondEndFastq) | ||
String basename2 = basename(secondEndFastq, ".fastq.gz") | ||
String sampleName | ||
String gitc_docker | ||
Int memoryGb | ||
Int diskSpaceGb | ||
Int cpu | ||
|
||
command <<< | ||
|
||
mv ${firstEndFastq} ./${fq1} | ||
mv ${secondEndFastq} ./${fq2} | ||
|
||
/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq1} -f ./${basename1}.sai | ||
export bwa_cmd="/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq1}" -f ./"${basename1}".sai\;" | ||
|
||
/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq2} -f ./${basename2}.sai | ||
export bwa_cmd=$bwa_cmd" /usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq2}" -f ./"${basename2}".sai\;" | ||
|
||
/usr/gitc/bwa sampe -t ${cpu} -P -T ${refFasta} ./${basename1}.sai ./${basename2}.sai ./${fq1} ./${fq2} -f ./${sampleName}.aligned.sam | ||
export bwa_cmd=$bwa_cmd" /usr/gitc/bwa sampe -P "${refFasta}" ./"${basename1}".sai ./"${basename2}".sai ./"${fq1}" ./"${fq2}" -f ./"${sampleName}".aligned.sam" | ||
echo $bwa_cmd > bwa_cmd.txt | ||
|
||
samtools sort -n ${sampleName}.aligned.sam -o ${sampleName}.aligned.bam | ||
|
||
>>> | ||
|
||
output { | ||
File raw_aligned_bam = "${sampleName}.aligned.bam" | ||
String bwa_command = read_string("bwa_cmd.txt") | ||
} | ||
|
||
runtime { | ||
docker: gitc_docker | ||
memory: "${memoryGb} GB" | ||
cpu: "${cpu}" | ||
disks: "local-disk ${diskSpaceGb} HDD" | ||
} | ||
|
||
} |
46 changes: 46 additions & 0 deletions
46
Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
workflow CopyUmiFromReadName { | ||
call CopyUmiTask | ||
} | ||
|
||
task CopyUmiTask { | ||
String? bloodbiopsydocker = "us.gcr.io/tag-team-160914/liquidbiopsy:0.0.4.5" | ||
String base_name | ||
String? fgbio_override | ||
File bam_file | ||
File bam_index | ||
Boolean? remove_umi_from_read_name = true | ||
|
||
Int? preemptible = 2 | ||
Int? maxRetries = 1 | ||
Int? disk_pad | ||
Int disk_size = ceil(size(bam_file, "GB") * 5) + select_first([disk_pad,0]) | ||
Float? extra_mem | ||
Float mem = 25 + select_first([extra_mem, 0]) | ||
Int? cpu = 4 | ||
Int compute_mem = ceil(mem) * 1000 - 500 | ||
|
||
command { | ||
export FGBIO_LOCAL_JAR=${default="/usr/fgbio-2.0.2.jar" fgbio_override} | ||
|
||
java -Xmx${compute_mem}m -jar $FGBIO_LOCAL_JAR \ | ||
CopyUmiFromReadName \ | ||
-i ${bam_file} \ | ||
-o ${base_name}.bam \ | ||
--remove-umi ${remove_umi_from_read_name} | ||
} | ||
|
||
output { | ||
File umi_extracted_bam = "${base_name}.bam" | ||
File umi_extracted_bam_index = "${base_name}.bai" | ||
} | ||
|
||
runtime { | ||
docker: select_first([bloodbiopsydocker]) | ||
disks: "local-disk " + disk_size + " HDD, /cromwell_root/tmp 500 HDD" | ||
memory: mem + " GB" | ||
maxRetries: select_first([maxRetries]) | ||
preemptible: select_first([preemptible]) | ||
cpu: select_first([cpu]) | ||
} | ||
|
||
} |
123 changes: 123 additions & 0 deletions
123
Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/MergeBamAlignment.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
workflow RunMBA{ | ||
File sample_name | ||
|
||
call MergeBamAlignmentTask{ | ||
input: output_bam_basename = sample_name | ||
} | ||
|
||
call sortbam { | ||
input: input_bam = MergeBamAlignmentTask.output_bam, | ||
output_bam_basename = sample_name | ||
} | ||
} | ||
|
||
task MergeBamAlignmentTask { | ||
Array[File] mapped_bam | ||
File unmapped_bam | ||
Array[String] bwa_commandline | ||
String bwa_version | ||
String bwa_tool | ||
String output_bam_basename | ||
File ref_fasta | ||
File ref_fasta_index | ||
File ref_dict | ||
Int? extra_mem | ||
String? mba_extra_args | ||
Int? memGb = 64 + select_first([extra_mem,0]) | ||
String? sort_order = "coordinate" | ||
|
||
Float disk_size | ||
Int compression_level | ||
Int? preemptible_tries = 1 | ||
String? gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" | ||
Int? cpu = 16 | ||
|
||
command <<< | ||
set -o pipefail | ||
set -e | ||
|
||
/gatk/gatk \ | ||
MergeBamAlignment \ | ||
--VALIDATION_STRINGENCY SILENT \ | ||
--EXPECTED_ORIENTATIONS FR \ | ||
--ATTRIBUTES_TO_RETAIN X0 \ | ||
--ATTRIBUTES_TO_REMOVE NM \ | ||
--ATTRIBUTES_TO_REMOVE MD \ | ||
--ALIGNED_BAM ${sep=" --ALIGNED_BAM " mapped_bam} \ | ||
--UNMAPPED_BAM ${unmapped_bam} \ | ||
--OUTPUT ${output_bam_basename}.bam \ | ||
--REFERENCE_SEQUENCE ${ref_fasta} \ | ||
--PAIRED_RUN true \ | ||
--SORT_ORDER ${sort_order} \ | ||
--IS_BISULFITE_SEQUENCE false \ | ||
--ALIGNED_READS_ONLY false \ | ||
--CLIP_ADAPTERS false \ | ||
--MAX_RECORDS_IN_RAM 2000000 \ | ||
--ADD_MATE_CIGAR true \ | ||
--MAX_INSERTIONS_OR_DELETIONS -1 \ | ||
--PRIMARY_ALIGNMENT_STRATEGY MostDistant \ | ||
--PROGRAM_RECORD_ID "${bwa_tool}" \ | ||
--PROGRAM_GROUP_VERSION "${bwa_version}" \ | ||
--PROGRAM_GROUP_COMMAND_LINE "${sep=' / ' bwa_commandline}" \ | ||
--PROGRAM_GROUP_NAME "${bwa_tool}" \ | ||
--ADD_PG_TAG_TO_READS false \ | ||
${mba_extra_args} | ||
|
||
du --block-size=kB ${output_bam_basename}.bam | \ | ||
awk -F "kB" '{print $1/1000000}' > output_bam_size.txt | ||
>>> | ||
runtime { | ||
preemptible: select_first([preemptible_tries]) | ||
memory: memGb + " GB" | ||
bootDiskSizeGb: 12 | ||
docker: select_first([gatk_docker]) | ||
cpu: select_first([cpu]) | ||
disks: "local-disk " + ceil(disk_size) + " HDD" | ||
} | ||
output { | ||
File output_bam = "${output_bam_basename}.bam" | ||
Float output_bam_size = read_float("output_bam_size.txt") | ||
} | ||
} | ||
|
||
task sortbam { | ||
File input_bam | ||
String output_bam_basename | ||
Int? preemptible_tries = 1 | ||
Int? compression_level = 2 | ||
Int? diskgb_buffer | ||
Int diskSpaceGb = 50 + select_first([diskgb_buffer, 0]) | ||
Float? extra_mem | ||
Float memory = 10 + select_first([extra_mem, 0]) | ||
|
||
command <<< | ||
|
||
|
||
set -euxo pipefail | ||
|
||
|
||
java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \ | ||
SortSam \ | ||
INPUT=${input_bam} \ | ||
OUTPUT=${output_bam_basename}.bam \ | ||
SORT_ORDER="coordinate" \ | ||
CREATE_INDEX=true \ | ||
CREATE_MD5_FILE=true \ | ||
MAX_RECORDS_IN_RAM=300000 | ||
|
||
>>> | ||
runtime { | ||
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" | ||
disks: "local-disk ${diskSpaceGb} HDD" | ||
bootDiskSizeGb: 12 | ||
memory: memory + " GB" | ||
preemptible: select_first([preemptible_tries]) | ||
} | ||
|
||
output { | ||
File output_bam = "${output_bam_basename}.bam" | ||
File output_bam_index = "${output_bam_basename}.bai" | ||
File output_bam_md5 = "${output_bam_basename}.bam.md5" | ||
} | ||
|
||
} |
Oops, something went wrong.