Skip to content

Commit

Permalink
Adding last steps for novel miR identification and quantification usi…
Browse files Browse the repository at this point in the history
…ng 2-pass approach.
  • Loading branch information
skchronicles committed Sep 20, 2024
1 parent 0d74b75 commit 26e989b
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
29 changes: 29 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ rule all:
join(workpath, "fastqc", "{sample}_trimmed_fastqc.zip"),
sample=samples
),
# Two-pass approach to quantify novel miRNAs,
# In the 1st pass, we concatenate all cleaned
# reads across all samples, align them to the
# reference genome, and run mirdeep2 to get a
# joint callset of novel miRs in fasta format.
# In the 2nd pass, we quantify novel miRs for
# each sample using the novel miR fasta that
# was generated in the first pass.
# First-pass rules for novel miR quantification,
# @imported from `rule mirdeep2_novel_p1_concatenate` in rules/novel.smk
join(workpath, "novel", "cohort_trimmed_cleaned.fa"),
# @imported from `rule mirdeep2_novel_p1_mapper` in rules/novel.smk
join(workpath, "novel", "mapper", "cohort_mapped.arf"),
join(workpath, "novel", "mapper", "cohort_collapsed.fa"),
# @imported from `rule mirdeep2_novel_p1_run` in rules/novel.smk
join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.tsv"),
join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.tsv"),
# Second-pass rules for novel miR quantification,
# @imported from `rule mirdeep2_novel_p2_quantifier` in rules/novel.smk
expand(
join(workpath, "novel", "counts", "{sample}_novel_miRNA_expressed.tsv"),
sample=samples
),
expand(
join(workpath, "novel", "counts", "{sample}_novel_mature_miRNA_expression.tsv"),
sample=samples
),
join(workpath, "novel", "counts", "miRDeep2_novel_mature_miRNA_counts.tsv"),
# Aggregated quality-control report,
# @imported from `rule multiqc` in rules/qc.smk
join(workpath, "reports", "multiqc_report.html"),
Expand All @@ -106,3 +134,4 @@ include: join("rules", "trim.smk")
include: join("rules", "align.smk")
include: join("rules", "quant.smk")
include: join("rules", "qc.smk")
include: join("rules", "novel.smk")
67 changes: 67 additions & 0 deletions workflow/rules/novel.smk
Original file line number Diff line number Diff line change
Expand Up @@ -296,4 +296,71 @@ rule mirdeep2_novel_p2_quantifier:
-quit
)
ln -sf "${{exp}}" {output.mirna}
"""


rule mirdeep2_novel_p2_mature_expression:
"""
Data-processing step to calculate avergae expression across the same mature novel miRNA.
The relationship between mature to precursor novel miRNA is 1:many. This step averages
the expression of multiple precursor miRNA pointing to the same mature miRNA to find
average novel mature miRNA expression.
@Input:
Known novel miRNA expression (scatter)
@Ouput:
Average novel mature miRNA expression
"""
input:
mirna = join(workpath, "novel", "counts", "{sample}_novel_miRNA_expressed.tsv"),
output:
avg_exp = join(workpath, "novel", "counts", "{sample}_novel_mature_miRNA_expression.tsv"),
params:
rname = "novel_matrexp",
container: config['images']['mir-seek'],
threads: int(allocated("threads", "mirdeep2_novel_p2_mature_expression", cluster)),
shell: """
# Removes comment character from
# header and calculates average
# mature miRNA expression
head -1 {input.mirna} \\
| sed '1 s/^#//g' \\
| cut -f1,2 \\
> {output.avg_exp}
# Find average expression due to
# 1:many relationship between
# mature and precursor miRNA
tail -n+2 {input.mirna} \\
| cut -f1,2 \\
| awk -F '\\t' -v OFS='\\t' '{{seen[$1]+=$2; count[$1]++}} END {{for (x in seen) print x, seen[x]/count[x]}}' \\
>> {output.avg_exp}
"""


rule mirdeep2_novel_p2_merge_results:
"""
Data-processing step to aggreagte per-sample mature miRNA
counts into a counts matrix.
@Input:
Average novel mature miRNA expression files (gather)
@Output:
Average novel mature miRNA counts matrix
"""
input:
counts = expand(join(workpath, "novel", "counts", "{sample}_novel_mature_miRNA_expression.tsv"), sample=samples),
output:
matrix = join(workpath, "novel", "counts", "miRDeep2_novel_mature_miRNA_counts.tsv"),
params:
rname = "novel_mirmatrix",
script = join("workflow", "scripts", "create_matrix.py"),
container: config['images']['mir-seek'],
threads: int(allocated("threads", "mirdeep2_novel_p2_merge_results", cluster)),
shell: """
# Create counts matrix of mature miRNAs
{params.script} \\
--input {input.counts} \\
--output {output.matrix} \\
--join-on miRNA \\
--extract read_count \\
--clean-suffix '_novel_mature_miRNA_expression.tsv' \\
--nan-values 0.0
"""

0 comments on commit 26e989b

Please sign in to comment.