-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add files for new module Signature Profiler of CODEC pipeline and mod…
…ified the dockstore.yml to add this pipeline to Public workflows
- Loading branch information
1 parent
aaa259e
commit e0dc64b
Showing
6 changed files
with
170 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Signature Profiling WDL for CODEC Mutlist Output | ||
CODEC pipeline: SingleSampleCODEC pipeline provides text files for discovered mutations. | ||
|
||
This workflow summarizes and plots mutation spectrums in 96 trinucleotide contexts and generate Mutation Matrix that will be later used to subtract SBS(Single Base Substitution) signatures from SNVs with database reference from COSMIC(https://cancer.sanger.ac.uk/signatures/). | ||
|
||
The Signature Profiling tool is from https://github.com/AlexandrovLab/SigProfilerAssignment and has been implanted to the docker image. | ||
|
||
The output of this WDL includes: | ||
1) SpectrumPlots | ||
2) MutationMetrics | ||
3) SignatureCount | ||
4) SignatureProportionPDF | ||
5) SignatureStackedPlot | ||
6) TMBPlot | ||
7) DecomposedSignatureProbabilities | ||
|
||
|
||
### Citation | ||
Díaz-Gay et al. 2023 Bioinformatics and Tate et al. 2019 Nucleic Acids Research |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"SigProfiler.GenomeFasta":"${workspace.referenceData_hg38_ref_fasta}","SigProfiler.MutlistFiles":"${this.samples.variants_called}","SigProfiler.mutlist_to_96_contexts.GenomeFastaIndex":"${workspace.referenceData_hg38_ref_fasta_index}"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
version 1.0 | ||
|
||
workflow SigProfiler { | ||
input { | ||
Array[File] MutlistFiles | ||
File GenomeFasta | ||
} | ||
|
||
call mutlist_to_96_contexts { | ||
input: | ||
MutlistFiles = MutlistFiles, | ||
GenomeFasta = GenomeFasta | ||
} | ||
call sigprofiler_analysis { | ||
input: | ||
MutationMetrics = mutlist_to_96_contexts.MutationMetrics | ||
|
||
} | ||
call PlotSignatures { | ||
input: | ||
SignatureCount = sigprofiler_analysis.SignatureCount | ||
} | ||
|
||
output { | ||
File MutationMetrics = mutlist_to_96_contexts.MutationMetrics | ||
File SpectrumPlots = mutlist_to_96_contexts.SpectrumPlots | ||
File DecomposedSignatureProbabilities = sigprofiler_analysis.DecomposedSignatureProbabilities | ||
File SignatureStackedPlot = sigprofiler_analysis.SignatureStackedPlot | ||
File TMBPlot = sigprofiler_analysis.TMBPlot | ||
File SignatureCount = sigprofiler_analysis.SignatureCount | ||
File SignatureProportionPDF = PlotSignatures.signature_proportions_pdf | ||
} | ||
} | ||
|
||
|
||
|
||
task mutlist_to_96_contexts { | ||
input { | ||
Array[File] MutlistFiles | ||
File GenomeFasta | ||
File GenomeFastaIndex | ||
} | ||
|
||
command { | ||
Rscript /scripts/96_contexts_mutations.R "~{sep=' ' MutlistFiles}" ~{GenomeFasta} | ||
} | ||
|
||
output { | ||
File MutationMetrics = "trinuc_mutation_metrics.txt" | ||
File SpectrumPlots = "all_sample_spectrums.pdf" | ||
} | ||
|
||
runtime { | ||
docker: "us.gcr.io/tag-public/sigprofiler:v1" | ||
memory: "8 GB" | ||
disks: "local-disk 20 HDD" | ||
} | ||
} | ||
|
||
task sigprofiler_analysis { | ||
input { | ||
File MutationMetrics | ||
String OutputFolder = "SigProfiler-output" | ||
} | ||
|
||
command { | ||
python3 <<EOF | ||
import sys | ||
from SigProfilerMatrixGenerator import install as genInstall | ||
genInstall.install("GRCh38") | ||
from SigProfilerAssignment import Analyzer as Analyze | ||
Analyze.cosmic_fit(samples="~{MutationMetrics}", | ||
output="~{OutputFolder}", | ||
input_type="matrix", | ||
genome_build="GRCh38", | ||
cosmic_version=3.3) | ||
EOF | ||
} | ||
output { | ||
File DecomposedSignatureProbabilities = "~{OutputFolder}/Assignment_Solution/Activities/Decomposed_MutationType_Probabilities.txt" | ||
File SignatureStackedPlot = "~{OutputFolder}/Assignment_Solution/Activities/Assignment_Solution_Activity_Plots.pdf" | ||
File TMBPlot = "~{OutputFolder}/Assignment_Solution/Activities/Assignment_Solution_TMB_plot.pdf" | ||
File SignatureCount = "~{OutputFolder}/Assignment_Solution/Activities/Assignment_Solution_Activities.txt" | ||
} | ||
runtime { | ||
docker: "us.gcr.io/tag-public/sigprofiler:v1" | ||
memory: "8 GB" | ||
disks: "local-disk 20 HDD" | ||
} | ||
} | ||
task PlotSignatures { | ||
input { | ||
File SignatureCount | ||
} | ||
command { | ||
python3 <<EOF | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
SigCounts = pd.read_csv("${SignatureCount}", sep='\t', header=0) | ||
SigCounts = pd.DataFrame(SigCounts) | ||
# Calculate proportions | ||
signature_cols = SigCounts.columns[1:] # Exclude the 'Samples' column | ||
SigCounts[signature_cols] = SigCounts[signature_cols].div(SigCounts[signature_cols].sum(axis=1), axis=0) | ||
# Reshape the data | ||
SigCounts_long = SigCounts.melt(id_vars=["Samples"], var_name="Signature", value_name="Proportion") | ||
SigCounts_long = SigCounts_long[SigCounts_long["Proportion"] > 0] | ||
# Plot the data | ||
plt.figure(figsize=(16, 9)) | ||
sns.scatterplot(data=SigCounts_long, x="Samples", y="Signature", size="Proportion", sizes=(20, 200), legend=False) | ||
plt.xticks(rotation=90) | ||
plt.xlabel("Sample Name", fontsize=16) | ||
plt.ylabel("Signature", fontsize=16) | ||
plt.title("Signature Proportions by Sample", fontsize=20, pad = 20) | ||
plt.grid(axis='y') | ||
ax = plt.gca() | ||
ax.spines['top'].set_visible(False) | ||
ax.spines['right'].set_visible(False) | ||
ax.spines['left'].set_visible(False) | ||
ax.spines['bottom'].set_visible(False) | ||
plt.tight_layout() | ||
plt.savefig("signature_proportions.pdf", format="pdf") | ||
EOF | ||
} | ||
output { | ||
File signature_proportions_pdf = "signature_proportions.pdf" | ||
} | ||
runtime { | ||
docker: "us.gcr.io/tag-public/sigprofiler:v1" | ||
memory: "8 GB" | ||
disks: "local-disk 20 HDD" | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.