From d5476d93b60e0d14c9e94c7ceae4c1e3a994b644 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Fri, 27 Sep 2024 16:55:40 -0400 Subject: [PATCH] Remove star sequence concatenation from mature fasta file --- workflow/Snakefile | 4 ++-- workflow/rules/novel.smk | 24 ++++++++---------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 738bc93..5659a44 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -127,11 +127,11 @@ rule all: # @imported from `rule mirdeep2_novel_p1_run` in rules/novel.smk # This rule is only run if `identify_novel_mirs` is set to True. provided( - [join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.tsv")], + [join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.fa")], identify_novel_mirs ), provided( - [join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.tsv")], + [join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.fa")], identify_novel_mirs ), # Second-pass rules for novel miR quantification, diff --git a/workflow/rules/novel.smk b/workflow/rules/novel.smk index abfd268..45e2364 100644 --- a/workflow/rules/novel.smk +++ b/workflow/rules/novel.smk @@ -151,8 +151,8 @@ rule mirdeep2_novel_p1_run: arf = join(workpath, "novel", "mapper", "cohort_mapped.arf"), collapsed = join(workpath, "novel", "mapper", "cohort_collapsed.fa"), output: - mature = join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.tsv"), - hairpin = join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.tsv"), + mature = join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.fa"), + hairpin = join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.fa"), log: report = join(workpath, "novel", "pass1", "mirdeep2.log") params: @@ -211,18 +211,10 @@ rule mirdeep2_novel_p1_run: -print \\ -quit ) - star=$( - find "${{tmp}}/" \\ - -type f \\ - -iname "novel_star_*.fa" \\ - -print \\ - -quit - ) - # Rename the identifier to contain the - # suffix _star to indicate if its a star - # sequence before merging the two files - sed -i '/^>.*/s/$/_star/' "${{star}}" - cat "${{mature}}" "${{star}}" > {output.mature} + + # Create a symbolic link to the novel + # mature sequences for quantification + ln -sf "${{mature}}" {output.mature} # Create a symbolic link to the novel # hairpin sequences for quantification @@ -257,8 +249,8 @@ rule mirdeep2_novel_p2_quantifier: input: arf = join(workpath, "mirdeep2", "mapper", "{sample}_mapped.arf"), collapsed = join(workpath, "mirdeep2", "mapper", "{sample}_collapsed.fa"), - mature = join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.tsv"), - hairpin = join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.tsv"), + mature = join(workpath, "novel", "pass1", "cohort_novel_mature_miRNA.fa"), + hairpin = join(workpath, "novel", "pass1", "cohort_novel_hairpin_miRNA.fa"), output: mirna = join(workpath, "novel", "counts", "{sample}_novel_miRNA_expressed.tsv"), params: