Skip to content

Commit

Permalink
Merge pull request #251 from danilodileo/other-review
Browse files Browse the repository at this point in the history
Update aesthetic in main workflow and sub workflow
  • Loading branch information
danilodileo authored Jan 26, 2024
2 parents 58a128d + 8696cba commit 75f0338
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 49 deletions.
4 changes: 3 additions & 1 deletion modules/local/eggnog/mapper.nf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ process EGGNOG_MAPPER {
-i $input
gzip ${prefix}.emapper.*
zgrep -v '^##' ${prefix}.emapper.annotations |sed 's/^#//' | sed '/^query/s/.*/\\L&/' | sed 's/query/orf/' | gzip -c > ${prefix}.emapper.tsv.gz
zgrep -v '^##' ${prefix}.emapper.annotations | \\
sed 's/^#// ; /^query/s/.*/\\L&/ ; s/query/orf/' | \\
gzip -c > ${prefix}.emapper.tsv.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/hmmclassify.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ workflow HMMCLASSIFY {

HMMRANK (
ch_hmmclassify
.map { it[0] }
.map { meta, hmm, seqdb -> meta }
.distinct()
.combine ( HMMER_HMMSEARCH.out.target_summary.collect { it[1] } )
.combine ( HMMER_HMMSEARCH.out.target_summary.collect { meta, summary -> summary } )
.map { [ it[0], it[1..-1] ] }
)
ch_versions = ch_versions.mix(HMMRANK.out.versions)
Expand Down
14 changes: 7 additions & 7 deletions subworkflows/local/prokka_subsets.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,24 @@ workflow PROKKA_SUBSETS {
main:
ch_versions = Channel.empty()

PROKKA ( contigs.map{ it[1] }.splitFasta(size: batchsize, file: true).map { contigs -> [ [ id: contigs.getBaseName() ], contigs] }, [], [] )
PROKKA ( contigs.map{ meta, contigs -> contigs }.splitFasta(size: batchsize, file: true).map { contigs -> [ [ id: contigs.getBaseName() ], contigs] }, [], [] )
ch_versions = ch_versions.mix(PROKKA.out.versions)
ch_log = PROKKA.out.txt.collect()

contigs.map{ [ id:"${it[0].id}.prokka" ] }
.combine(PROKKA.out.gff.collect { it[1] }.map { [ it ] })
contigs.map{ meta, contigs -> [ id:"${meta.id}.prokka" ] }
.combine(PROKKA.out.gff.collect { meta, gff -> gff }.map { [ it ] })
.set { ch_gff }
GFF_CAT ( ch_gff )
ch_versions = ch_versions.mix(GFF_CAT.out.versions)

contigs.map{ [ id:"${it[0].id}.prokka" ] }
.combine(PROKKA.out.faa.collect { it[1] }.map { [ it ] })
contigs.map{ meta, contigs -> [ id:"${meta.id}.prokka" ] }
.combine(PROKKA.out.faa.collect { meta, protein -> protein }.map { [ it ] })
.set { ch_faa }
FAA_CAT ( ch_faa )
ch_versions = ch_versions.mix(FAA_CAT.out.versions)

contigs.map{ [ id:"${it[0].id}.prokka" ] }
.combine(PROKKA.out.ffn.collect { it[1] }.map { [ it ] })
contigs.map{ meta, contigs -> [ id:"${meta.id}.prokka" ] }
.combine(PROKKA.out.ffn.collect { meta, fnn -> fnn }.map { [ it ] })
.set { ch_ffn }
FFN_CAT ( ch_ffn )
ch_versions = ch_versions.mix(FFN_CAT.out.versions)
Expand Down
82 changes: 43 additions & 39 deletions workflows/metatdenovo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,20 @@ workflow METATDENOVO {
params.skip_trimming
)
ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions)

FASTQC_TRIMGALORE.out.trim_log.collect { it[1] }
ch_collect_stats = ch_cat_fastq.collect { it[0].id }.map { [ [ id:"${assembler}.${orf_caller}" ], it ] }

ch_collect_stats = ch_cat_fastq.collect { meta, fasta -> meta.id }.map { [ [ id:"${assembler}.${orf_caller}" ], it ] }
if ( params.skip_trimming ) {
ch_collect_stats
.map { [ it[0], it[1], [] ] }
.map { meta, samples -> [ meta, samples, [] ] }
.set { ch_collect_stats }

} else {
if ( params.se_reads ) {
ch_collect_stats
.combine(FASTQC_TRIMGALORE.out.trim_log.collect { it[1] }.map { [ it ] })
.combine(FASTQC_TRIMGALORE.out.trim_log.collect { meta, report -> report }.map { [ it ] })
.set { ch_collect_stats }
} else {
ch_collect_stats
.combine(FASTQC_TRIMGALORE.out.trim_log.collect { it[1][0] }.map { [ it ] })
.combine(FASTQC_TRIMGALORE.out.trim_log.collect { meta, report -> report[0] }.map { [ it ] })
.set { ch_collect_stats }
}
}
Expand All @@ -210,17 +208,17 @@ workflow METATDENOVO {
if ( params.sequence_filter ) {
BBMAP_BBDUK ( FASTQC_TRIMGALORE.out.reads, params.sequence_filter )
ch_clean_reads = BBMAP_BBDUK.out.reads
ch_bbduk_logs = BBMAP_BBDUK.out.log.collect { it[1] }.map { [ it ] }
ch_versions = ch_versions.mix(BBMAP_BBDUK.out.versions)
ch_bbduk_logs = BBMAP_BBDUK.out.log.collect { meta, log -> log }.map { [ it ] }
ch_versions = ch_versions.mix(BBMAP_BBDUK.out.versions.first())
ch_collect_stats
.combine(ch_bbduk_logs)
.set {ch_collect_stats}
ch_multiqc_files = ch_multiqc_files.mix(BBMAP_BBDUK.out.log.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(BBMAP_BBDUK.out.log.collect{ meta, log -> log })
} else {
ch_clean_reads = FASTQC_TRIMGALORE.out.reads
ch_bbduk_logs = Channel.empty()
ch_collect_stats
.map { [ it[0], it[1], it[2], [] ] }
.map { meta, samples, report -> [ meta, samples, report, [] ] }
.set { ch_collect_stats }
}

Expand All @@ -246,17 +244,19 @@ workflow METATDENOVO {
if ( params.se_reads ) {
if ( params.bbnorm ) {
BBMAP_BBNORM(ch_single_end.collect { meta, fastq -> fastq }.map {[ [id:'all_samples', single_end:true], it ] } )
ch_se_reads_to_assembly = BBMAP_BBNORM.out.fastq.map { it[1] }
ch_se_reads_to_assembly = BBMAP_BBNORM.out.fastq.map { meta, fasta -> fasta }
ch_pe_reads_to_assembly = Channel.empty()
ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions)
} else {
ch_se_reads_to_assembly = ch_single_end.map { meta, fastq -> fastq }
ch_pe_reads_to_assembly = Channel.empty()
}
}
else if ( params.bbnorm ) {
BBMAP_BBNORM(ch_interleaved.collect { meta, fastq -> fastq }.map {[ [id:'all_samples', single_end:true], it ] } )
ch_pe_reads_to_assembly = BBMAP_BBNORM.out.fastq.map { it[1] }
ch_pe_reads_to_assembly = BBMAP_BBNORM.out.fastq.map { meta, fasta -> fasta }
ch_se_reads_to_assembly = Channel.empty()
ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions)
} else {
ch_pe_reads_to_assembly = ch_interleaved.map { meta, fastq -> fastq }
ch_se_reads_to_assembly = Channel.empty()
Expand All @@ -276,6 +276,7 @@ workflow METATDENOVO {
ch_pe_reads_to_assembly.collect().ifEmpty([]),
ch_se_reads_to_assembly.collect().ifEmpty([])
)
ch_versions = ch_versions.mix(WRITESPADESYAML.out.versions)
// 2. Call the module with a channel with all fastq files plus the yaml
Channel.empty()
.mix(ch_pe_reads_to_assembly)
Expand All @@ -292,6 +293,7 @@ workflow METATDENOVO {
ch_versions = ch_versions.mix(SPADES.out.versions)
FORMATSPADES( ch_assembly )
ch_assembly_contigs = FORMATSPADES.out.assembly
ch_versions = ch_versions.mix(FORMATSPADES.out.versions)
} else if ( assembler == 'megahit' ) {
MEGAHIT_INTERLEAVED(
ch_pe_reads_to_assembly.collect().ifEmpty([]),
Expand All @@ -308,6 +310,7 @@ workflow METATDENOVO {
if ( params.min_contig_length > 0 ) {
SEQTK_SEQ_CONTIG_FILTER ( ch_assembly_contigs )
ch_assembly_contigs = SEQTK_SEQ_CONTIG_FILTER.out.fastx
ch_versions = ch_versions.mix(SEQTK_SEQ_CONTIG_FILTER.out.versions)
}

//
Expand All @@ -321,19 +324,19 @@ workflow METATDENOVO {
//
if ( params.orf_caller == 'prokka' ) {
PROKKA_SUBSETS(ch_assembly_contigs, params.prokka_batchsize)
UNPIGZ_GFF(PROKKA_SUBSETS.out.gff.map { [ [id: "${params.orf_caller}.${it[0].id}"], it[1] ] })
UNPIGZ_GFF(PROKKA_SUBSETS.out.gff.map { meta, gff -> [ [id: "${params.orf_caller}.${meta}"], gff ] })
ch_versions = ch_versions.mix(PROKKA_SUBSETS.out.versions)
ch_gff = UNPIGZ_GFF.out.unzipped
ch_protein = PROKKA_SUBSETS.out.faa
ch_multiqc_files = ch_multiqc_files.mix(PROKKA_SUBSETS.out.prokka_log.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(PROKKA_SUBSETS.out.prokka_log.collect{ meta, log -> log } )
}

//
// MODULE: Run PRODIGAL on assembly output.
//
if ( orf_caller == 'prodigal' ) {
PRODIGAL( ch_assembly_contigs.map { [ [id: "${assembler}.${orf_caller}"], it[1] ] } )
UNPIGZ_GFF(PRODIGAL.out.gff.map { [ [id: "${it[0].id}.${orf_caller}"], it[1] ] })
PRODIGAL( ch_assembly_contigs.map { meta, contigs -> [ [id: "${assembler}.${orf_caller}"], contigs ] } )
UNPIGZ_GFF(PRODIGAL.out.gff.map { meta, gff -> [ [id: "${meta.id}.${orf_caller}"], gff ] })
ch_gff = UNPIGZ_GFF.out.unzipped
ch_protein = PRODIGAL.out.faa
ch_versions = ch_versions.mix(PRODIGAL.out.versions)
Expand All @@ -343,7 +346,7 @@ workflow METATDENOVO {
// SUBWORKFLOW: run TRANSDECODER. Orf caller alternative for eukaryotes.
//
if ( orf_caller == 'transdecoder' ) {
TRANSDECODER ( ch_assembly_contigs.map { [ [id: "transdecoder.${it[0].id}" ], it[1] ] } )
TRANSDECODER ( ch_assembly_contigs.map { meta, contigs -> [ [id: "transdecoder.${meta.id}" ], contigs ] } )
ch_gff = TRANSDECODER.out.gff
ch_protein = TRANSDECODER.out.pep
ch_versions = ch_versions.mix(TRANSDECODER.out.versions)
Expand All @@ -362,7 +365,7 @@ workflow METATDENOVO {
//
// MODULE: Create a BBMap index
//
BBMAP_INDEX(ch_assembly_contigs.map { it[1] })
BBMAP_INDEX(ch_assembly_contigs.map { meta, contigs -> contigs })
ch_versions = ch_versions.mix(BBMAP_INDEX.out.versions)

//
Expand All @@ -376,7 +379,7 @@ workflow METATDENOVO {
//
ch_hmmrs
.combine(ch_protein)
.map { [ [ id: "${assembler}.${orf_caller}" ], it[0], it[2] ] }
.map { hmm, meta, protein ->[ [ id: "${assembler}.${orf_caller}" ], hmm, protein ] }
.set { ch_hmmclassify }
HMMCLASSIFY ( ch_hmmclassify )
ch_versions = ch_versions.mix(HMMCLASSIFY.out.versions)
Expand All @@ -389,11 +392,11 @@ workflow METATDENOVO {
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)

BAM_SORT_STATS_SAMTOOLS.out.bam
.combine(ch_gff.map { it[1] } )
.combine(ch_gff.map { meta, bam -> bam } )
.set { ch_featurecounts }

ch_collect_stats
.combine(BAM_SORT_STATS_SAMTOOLS.out.idxstats.collect { it[1]}.map { [ it ] })
.combine(BAM_SORT_STATS_SAMTOOLS.out.idxstats.collect { meta, idxstats -> idxstats }.map { [ it ] } )
.set { ch_collect_stats }

FEATURECOUNTS_CDS ( ch_featurecounts)
Expand All @@ -403,14 +406,14 @@ workflow METATDENOVO {
// MODULE: Collect featurecounts output counts in one table
//
FEATURECOUNTS_CDS.out.counts
.collect() { it[1] }
.map { [ [ id:"${assembler}.${orf_caller}" ], it ] }
.collect() { meta, featurecounts -> featurecounts }
.map { featurecounts -> [ [ id:"${assembler}.${orf_caller}" ], featurecounts ] }
.set { ch_collect_feature }

COLLECT_FEATURECOUNTS ( ch_collect_feature )
ch_versions = ch_versions.mix(COLLECT_FEATURECOUNTS.out.versions)
ch_fcs_for_stats = COLLECT_FEATURECOUNTS.out.counts.collect { it[1]}.map { [ it ] }
ch_fcs_for_summary = COLLECT_FEATURECOUNTS.out.counts.map { it[1]}
ch_fcs_for_stats = COLLECT_FEATURECOUNTS.out.counts.collect { meta, tsv -> tsv }.map { [ it ] }
ch_fcs_for_summary = COLLECT_FEATURECOUNTS.out.counts.map { meta, tsv -> tsv }
ch_collect_stats
.combine(ch_fcs_for_stats)
.set { ch_collect_stats }
Expand All @@ -424,7 +427,7 @@ workflow METATDENOVO {
ch_merge_tables = EGGNOG.out.sumtable
} else {
ch_protein
.map { [ it[0], [] ] }
.map { meta, protein -> [ meta, [] ] }
.set { ch_merge_tables }
}

Expand All @@ -434,17 +437,17 @@ workflow METATDENOVO {
//
if( !params.skip_kofamscan ) {
ch_protein
.map { [ it[0], it[1] ] }
.map { meta, protein -> [ meta, protein ] }
.set { ch_kofamscan }
KOFAMSCAN( ch_kofamscan, ch_fcs_for_summary)
ch_versions = ch_versions.mix(KOFAMSCAN.out.versions)
ch_kofamscan_summary = KOFAMSCAN.out.kofamscan_summary.collect().map { it[1] }
ch_kofamscan_summary = KOFAMSCAN.out.kofamscan_summary.collect().map { meta, tsv -> tsv }
ch_merge_tables
.combine( ch_kofamscan_summary )
.set { ch_merge_tables }
} else {
ch_merge_tables
.map { [ it[0], it[1], [] ] }
.map { meta, tsv -> [ meta, tsv, [] ] }
.set { ch_merge_tables }
}

Expand Down Expand Up @@ -476,18 +479,18 @@ workflow METATDENOVO {
.set { ch_eukulele_db }
}
ch_protein
.map {[ [ id:"${it[0].id}" ], it[1] ] }
.map { meta, protein -> [ [ id:"${meta.id}" ], protein ] }
.combine( ch_eukulele_db )
.set { ch_eukulele }
SUB_EUKULELE( ch_eukulele, ch_fcs_for_summary )
ch_taxonomy_summary = SUB_EUKULELE.out.taxonomy_summary.collect().map { it[1] }
ch_taxonomy_summary = SUB_EUKULELE.out.taxonomy_summary.collect().map { meta, tsv -> tsv }
ch_versions = ch_versions.mix(SUB_EUKULELE.out.versions)
ch_merge_tables
.combine( ch_taxonomy_summary )
.set { ch_merge_tables }
} else {
ch_merge_tables
.map { [ it[0], it[1], it[2], [] ] }
.map { meta, tsv1, tsv2 -> [ meta, tsv1, tsv2, [] ] }
.set { ch_merge_tables }
}

Expand All @@ -497,11 +500,12 @@ workflow METATDENOVO {
if( !params.skip_eggnog || !params.skip_eukulele || !params.skip_kofamscan) {
MERGE_TABLES ( ch_merge_tables )
ch_collect_stats
.combine(MERGE_TABLES.out.merged_table.collect{ it[1]}.map { [ it ] })
.combine(MERGE_TABLES.out.merged_table.collect{ meta, tblout -> tblout }.map { [ it ] })
.set { ch_collect_stats }
ch_versions = ch_versions.mix(MERGE_TABLES.out.versions)
} else {
ch_collect_stats
.map { [ it[0], it[1], it[2], it[3], it[4], it[5], [] ] }
.map { meta, samples, report, tsv, idxstats, counts -> [ meta, samples, report, tsv, idxstats, counts, [] ] }
.set { ch_collect_stats }
}

Expand All @@ -522,10 +526,10 @@ workflow METATDENOVO {
ch_methods_description = Channel.value(methods_description)

ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(TRANSRATE.out.assembly_qc.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(BAM_SORT_STATS_SAMTOOLS.out.idxstats.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(FEATURECOUNTS_CDS.out.summary.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.trim_zip.collect{ meta, zip -> zip })
ch_multiqc_files = ch_multiqc_files.mix(TRANSRATE.out.assembly_qc.collect{ meta, tbl -> tbl })
ch_multiqc_files = ch_multiqc_files.mix(BAM_SORT_STATS_SAMTOOLS.out.idxstats.collect{ meta, idxstats -> idxstats })
ch_multiqc_files = ch_multiqc_files.mix(FEATURECOUNTS_CDS.out.summary.collect{ meta, summary -> summary })


MULTIQC (
Expand Down

0 comments on commit 75f0338

Please sign in to comment.