From 0b3da14468c2669947e9f185a0b953e0d2099c20 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Fri, 9 Feb 2024 10:43:05 +0100 Subject: [PATCH] Finalize eggnog download/map rework --- modules/local/eggnog/download.nf | 13 ++++++++----- modules/local/eggnog/mapper.nf | 1 + nextflow.config | 1 - nextflow_schema.json | 9 +-------- subworkflows/local/eggnog.nf | 20 ++++++-------------- workflows/metatdenovo.nf | 5 +---- 6 files changed, 17 insertions(+), 32 deletions(-) diff --git a/modules/local/eggnog/download.nf b/modules/local/eggnog/download.nf index b070f538..ffca10e4 100644 --- a/modules/local/eggnog/download.nf +++ b/modules/local/eggnog/download.nf @@ -8,22 +8,25 @@ process EGGNOG_DOWNLOAD { 'biocontainers/eggnog-mapper:2.1.9--pyhdfd78af_0' }" input: - //path "eggnog_dbpath" output: - path("eggnog_db") , emit: db - path "versions.yml", emit: versions + path "eggnog.db" , emit: eggnog_db + path "eggnog_proteins.dmnd" , emit: dmnd + path "eggnog.taxa.db" , emit: taxa_db + path "eggnog.taxa.db.traverse.pkl", emit: pkl + path "*" , emit: all + path "versions.yml" , emit: versions, optional: true // Optional to allow skipping if this is the only file that's missing script: def args = task.ext.args ?: '' """ - mkdir eggnog_db + #mkdir eggnog_db download_eggnog_data.py \\ $args \\ -y \\ - --data_dir eggnog_db + --data_dir . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/eggnog/mapper.nf b/modules/local/eggnog/mapper.nf index 6e0dd2fe..59701917 100644 --- a/modules/local/eggnog/mapper.nf +++ b/modules/local/eggnog/mapper.nf @@ -10,6 +10,7 @@ process EGGNOG_MAPPER { input: tuple val(meta), path(fasta) path(db) + path(eggnog_db) // Marker purpose only; to make execution wait for the download process to finish output: tuple val(meta), path("*.emapper.hits.gz") , emit: hits diff --git a/nextflow.config b/nextflow.config index 2770b02f..365f764e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,6 @@ params { // Eggnog options eggnog_dbpath = 'eggnog' - create_eggnogdb = false skip_eggnog = false // KOfamscan options diff --git a/nextflow_schema.json b/nextflow_schema.json index d3e30390..ed156e20 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -264,14 +264,7 @@ "default": "eggnog", "description": "Specify EGGNOG database path", "fa_icon": "far fa-file-code", - "help_text": "This parameter specifies where you have an EGGNOG database, or, where it will be created using the --create_eggnog_db parameter." - }, - "create_eggnogdb": { - "type": "boolean", - "default": false, - "fa_icon": "fas fa-forward", - "description": "create an EGGNOG database", - "help_text": "Use this parameter if you want to download the EGGNOG database." + "help_text": "This parameter specifies where you have an EGGNOG database, or, where it will be created using the --create_eggnog_db parameter. The directory must exist." }, "skip_kofamscan": { "type": "boolean", diff --git a/subworkflows/local/eggnog.nf b/subworkflows/local/eggnog.nf index 553fe333..2d892848 100644 --- a/subworkflows/local/eggnog.nf +++ b/subworkflows/local/eggnog.nf @@ -11,24 +11,16 @@ workflow EGGNOG { faa collect_fcs dbpath - createdb main: ch_versions = Channel.empty() - if ( createdb ) { - // Not allowed, or? - if ( ! file(dbpath).exists() ) { - file(dbpath).mkdir() - } - EGGNOG_DOWNLOAD( ) - ch_dbpath = EGGNOG_DOWNLOAD.out.db - ch_versions = ch_versions.mix ( EGGNOG_DOWNLOAD.out.versions ) - } else { - ch_dbpath = Channel.fromPath(dbpath, checkIfExists: true) - } - - EGGNOG_MAPPER ( faa, ch_dbpath ) + ch_dbpath = Channel.fromPath(dbpath, checkIfExists: true) + + EGGNOG_DOWNLOAD() + ch_versions = ch_versions.mix ( EGGNOG_DOWNLOAD.out.versions ) + + EGGNOG_MAPPER ( faa, ch_dbpath, EGGNOG_DOWNLOAD.out.eggnog_db ) ch_versions = ch_versions.mix ( EGGNOG_MAPPER.out.versions ) EGGNOG_SUM ( EGGNOG_MAPPER.out.emappertsv, collect_fcs ) diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index 0a7dccf7..1d5d9fbe 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -420,11 +420,8 @@ workflow METATDENOVO { // // SUBWORKFLOW: run eggnog_mapper on the ORF-called amino acid sequences // - //ch_eggnog_dbpath = Channel.fromPath(params.eggnog_dbpath, checkIfExists: true) - //ch_create_eggnogdb = Channel.of(params.create_eggnogdb) if ( ! params.skip_eggnog ) { - //EGGNOG(ch_protein, ch_fcs_for_summary, ch_eggnog_dbpath, ch_create_eggnogdb ) - EGGNOG(ch_protein, ch_fcs_for_summary, params.eggnog_dbpath, params.create_eggnogdb ) + EGGNOG(ch_protein, ch_fcs_for_summary, params.eggnog_dbpath) ch_versions = ch_versions.mix(EGGNOG.out.versions) ch_merge_tables = EGGNOG.out.sumtable } else {