diff --git a/CHANGELOG.md b/CHANGELOG.md index e6e05ffe..8c1aae43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.1 - [2024-04-02] + +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Gzip user provided assembly files to avoid overwriting by assuming they're already zipped. + ## v1.0.0 - [date] Initial release of nf-core/metatdenovo, created with the [nf-core](https://nf-co.re/) template. diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4c8e1f76..e0817b8b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/metatdenovo + This report has been generated by the nf-core/metatdenovo analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-metatdenovo-methods-description": order: -1000 diff --git a/modules.json b/modules.json index 7ad9351c..34e00d49 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,11 @@ "git_sha": "9e71d8519dfbfc328c078bba14d4bd4c99e39a94", "installed_by": ["modules"] }, + "pigz/compress": { + "branch": "master", + "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "installed_by": ["modules"] + }, "prodigal": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml new file mode 100644 index 00000000..7551d187 --- /dev/null +++ b/modules/nf-core/pigz/compress/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "pigz_compress" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf new file mode 100644 index 00000000..152e7006 --- /dev/null +++ b/modules/nf-core/pigz/compress/main.nf @@ -0,0 +1,45 @@ +process PIGZ_COMPRESS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(raw_file) + + output: + tuple val(meta), path("$archive"), emit: archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + # Note: needs --stdout for pigz to avoid the following issue: + # pigz: skipping: ${raw_file} is a symbolic link + pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + touch ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml new file mode 100644 index 00000000..42efd735 --- /dev/null +++ b/modules/nf-core/pigz/compress/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pigz_compress" +description: Compresses files with pigz. +keywords: + - compress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - archive: + type: file + description: The compressed file + pattern: "*.gz" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@leoisl" +maintainers: + - "@leoisl" diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test new file mode 100644 index 00000000..248d40fb --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + name "Test Process PIGZ_COMPRESS" + script "../main.nf" + process "PIGZ_COMPRESS" + + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/compress" + + test("sarscov2 - genome - fasta") { + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - genome - fasta - stub") { + options "-stub-run" + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.archive[0][1]).name).match() } + ) + } + } +} diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap new file mode 100644 index 00000000..6e50456f --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ], + "archive": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] + } + ], + "timestamp": "2023-12-11T22:39:53.350546" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + "genome.fasta.gz" + ], + "timestamp": "2023-12-11T22:52:24.309192" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml new file mode 100644 index 00000000..42c46bfa --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/compress: + - "modules/nf-core/pigz/compress/**" diff --git a/nextflow.config b/nextflow.config index 266de3d6..8af2eac3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -288,7 +288,7 @@ manifest { description = """Assembly and annotation of metatranscriptomic data, both prokaryotic and eukaryotic""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0.0' + version = '1.0.1' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index ed156e20..accea5f2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -326,7 +326,7 @@ "type": "string", "enum": ["gtdb", "phylodb", "marmmetsp", "mmetsp", "eukprot"], "description": "EUKulele database.", - "help_text": "This option allows the user to specify which database (or set of databases, separated by comma) to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", + "help_text": "This option allows the user to specify which database to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", "fa_icon": "far fa-file-code" }, "eukulele_dbpath": { diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index ea51fb76..48d53c54 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -121,6 +121,7 @@ include { CAT_FASTQ } from '../modules/nf-core/ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { PIGZ_COMPRESS as PIGZ_ASSEMBLY } from '../modules/nf-core/pigz/compress/main' // // SUBWORKFLOWS: Installed directly from nf-core/modules @@ -267,9 +268,15 @@ workflow METATDENOVO { // MODULE: Run Megahit or RNAspades on all interleaved fastq files // if ( params.assembly ) { - Channel - .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) - .set { ch_assembly_contigs } + // If the input assembly is not gzipped, do that since all downstream calls assume this + if ( ! params.assembly.endsWith('.gz') ) { + PIGZ_ASSEMBLY(Channel.fromPath(params.assembly).map { [ [ id:params.assembly ], it ] } ) + PIGZ_ASSEMBLY.out.archive.first().set { ch_assembly_contigs } + } else { + Channel + .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) + .set { ch_assembly_contigs } + } } else if ( assembler == 'rnaspades' ) { // 1. Write a yaml file for Spades WRITESPADESYAML ( @@ -303,7 +310,9 @@ workflow METATDENOVO { .map { [ [ id: 'megahit' ], it ] } .set { ch_assembly_contigs } ch_versions = ch_versions.mix(MEGAHIT_INTERLEAVED.out.versions) - } else { error 'Assembler not specified!' } + } else { + error 'Assembler not specified!' + } // If the user asked for length filtering, perform that with SEQTK_SEQ (the actual length parameter is used in modules.config) if ( params.min_contig_length > 0 ) { @@ -469,11 +478,11 @@ workflow METATDENOVO { // SUBWORKFLOW: Eukulele // ch_eukulele_db = Channel.empty() - if( !params.skip_eukulele){ + if( ! params.skip_eukulele ) { // Create a channel for EUKulele either with a named database or not. The latter means a user-provided database in a directory. if ( params.eukulele_db ) { Channel - .fromList ( params.eukulele_db.split(',') ) + .of ( params.eukulele_db ) .map { [ it, file(params.eukulele_dbpath) ] } .set { ch_eukulele_db } } else {