From a076464ceb8d1d9ad70fa4e4e3751c17c1c21cd9 Mon Sep 17 00:00:00 2001 From: Taylor Falk Date: Tue, 5 Sep 2023 15:42:05 -0400 Subject: [PATCH 1/5] issue-168 added eggnog handling to kofam and checked for contents --- nextflow.config | 6 +++--- subworkflows/local/eggnog.nf | 2 +- subworkflows/local/kofamscan.nf | 26 ++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index e6fd1dad..3191b249 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { // KOfamscan options skip_kofamscan = false - kofam_dir = './kofam' + kofam_dir = './kofam/' // CAT cat = false @@ -67,7 +67,7 @@ params { // Eukulele options eukulele_db = null skip_eukulele = false - eukulele_dbpath = './eukulele' + eukulele_dbpath = './eukulele/' eukulele_method = 'mets' // MultiQC options multiqc_config = null @@ -94,7 +94,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten diff --git a/subworkflows/local/eggnog.nf b/subworkflows/local/eggnog.nf index 83cf3485..b9aa6ff6 100644 --- a/subworkflows/local/eggnog.nf +++ b/subworkflows/local/eggnog.nf @@ -26,7 +26,7 @@ workflow EGGNOG { if ( ! eggnogfile.exists() ) { EGGNOG_DOWNLOAD() - ch_dpath = EGGNOG_DOWNLOAD.out.db + ch_dbpath = EGGNOG_DOWNLOAD.out.db ch_versions = ch_versions.mix ( EGGNOG_DOWNLOAD.out.versions ) } else { diff --git a/subworkflows/local/kofamscan.nf b/subworkflows/local/kofamscan.nf index a5619b7c..90723b44 100644 --- a/subworkflows/local/kofamscan.nf +++ b/subworkflows/local/kofamscan.nf @@ -15,12 +15,30 @@ workflow KOFAMSCAN { main: ch_versions = Channel.empty() - - KOFAMSCAN_DOWNLOAD ( kofam_dir ) - KOFAMSCAN_SCAN( kofamscan, KOFAMSCAN_DOWNLOAD.out.ko_list, KOFAMSCAN_DOWNLOAD.out.koprofiles ) + String directoryName = kofam_dir + File directory = new File(directoryName) + String kofamdb = directoryName + "ko_list" + File kolistfile = new File(kofamdb) + + if ( ! directory.exists() ) { + directory.mkdir() + } + + if ( ! kolistfile.exists() ) { + KOFAMSCAN_DOWNLOAD ( kofam_dir ) + ch_dbpath = KOFAMSCAN_DOWNLOAD.out.ko_list + ch_profiles = KOFAMSCAN_DOWNLOAD.out.koprofiles + + ch_versions = ch_versions.mix ( KOFAMSCAN_DOWNLOAD.out.versions ) + } else { + ch_dbpath = Channel.fromPath(kolistfile) + ch_profiles = Channel.fromPath(kofam_dir + "profiles") + } + + KOFAMSCAN_SCAN( kofamscan, ch_dbpath, ch_profiles ) ch_versions = ch_versions.mix(KOFAMSCAN_SCAN.out.versions) - + SUM_KOFAMSCAN( KOFAMSCAN_SCAN.out.kout, fcs ) From ba85c9c13e225665baba01c8f9207c53fe13c5c0 Mon Sep 17 00:00:00 2001 From: Taylor Falk Date: Tue, 5 Sep 2023 16:30:38 -0400 Subject: [PATCH 2/5] issue-168 removed mkdirs from main script --- subworkflows/local/eukulele.nf | 14 ++++++++++++-- workflows/metatdenovo.nf | 5 +---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/eukulele.nf b/subworkflows/local/eukulele.nf index a483f57d..6636a500 100644 --- a/subworkflows/local/eukulele.nf +++ b/subworkflows/local/eukulele.nf @@ -10,12 +10,22 @@ include { SUM_TAXONOMY } from '../../modules/local/sum_taxo workflow SUB_EUKULELE { take: - eukulele // Channel: val(meta), path(fasta), val(database), path(directory) + eukulele // Channel: val(meta), path(fasta), val(database), path(directory) collect_fcs main: ch_versions = Channel.empty() + String directoryName = eukulele + File directory = new File(directoryName) + String eukdb = directoryName + "**/reference.pep.fa" + File eukpepfa = new File(eukdb) + + if ( ! directory.exists() ) { + directory.mkdir() + } + + EUKULELE_DOWNLOAD ( eukulele.filter{ it[2] }.map { [ it[2], it[3] ] } ) ch_download = EUKULELE_DOWNLOAD.out.db @@ -23,7 +33,7 @@ workflow SUB_EUKULELE { .mix ( EUKULELE_DOWNLOAD.out.db ) .mix(eukulele.filter{ ! it[2] }.map { [ [], it[3] ] } ) .merge( eukulele.map{ [ it[0], it[1] ] } ) - .map { [ [ id: "${it[2].id}.${it[0]}" ], it[3], it[0], it[1] ] } + .map { [ [ id: "${it[2].id}.${it[0]}" ], it[3], it[0], it[1] ] } .set { ch_eukulele } EUKULELE_SEARCH( ch_eukulele ) diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index 9f2df9cd..c318c02c 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -286,7 +286,7 @@ workflow METATDENOVO { ch_se_reads_to_assembly = ch_interleaved_se.map { meta, fastq -> fastq } ch_pe_reads_to_assembly = Channel.empty() } - } + } else if ( params.bbnorm ) { BBMAP_BBNORM(ch_interleaved.collect { meta, fastq -> fastq }.map {[ [id:'all_samples', single_end:true], it ] } ) ch_pe_reads_to_assembly = BBMAP_BBNORM.out.fastq.map { it[1] } @@ -452,8 +452,6 @@ workflow METATDENOVO { // SUBWORKFLOW: run kofamscan on the ORF-called amino acid sequences // if( !params.skip_kofamscan ) { - File kofam_dir = new File(params.kofam_dir) - if ( ! kofam_dir.exists() ) { kofam_dir.mkdir() } ch_aa //.map { [ [ id:"${it[0].id}" ], it[1] ] } .map { [ it[0], it[1] ] } @@ -468,7 +466,6 @@ workflow METATDENOVO { ch_merge_tables .map { [ it[0], it[1], [] ] } .set { ch_merge_tables } - } // set up contig channel to use in CAT and TransRate From 60f6d73e8719b42b17978c4108a50fc3834f45e9 Mon Sep 17 00:00:00 2001 From: Taylor Falk Date: Thu, 21 Sep 2023 16:34:31 -0400 Subject: [PATCH 3/5] issue-168 added some logic to download submodules to try and avoid downloads if a database is present and complete locally --- subworkflows/local/eggnog.nf | 2 +- subworkflows/local/eukulele.nf | 24 +++++++++++++++++------- subworkflows/local/kofamscan.nf | 4 ++-- workflows/metatdenovo.nf | 23 ++++++++++------------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/eggnog.nf b/subworkflows/local/eggnog.nf index b9aa6ff6..16369570 100644 --- a/subworkflows/local/eggnog.nf +++ b/subworkflows/local/eggnog.nf @@ -17,7 +17,7 @@ workflow EGGNOG { String directoryName = eggnog_dbpath File directory = new File(directoryName) - String eggnogDB = eggnog_dbpath + "eggnog.db" + String eggnogDB = eggnog_dbpath + "/eggnog.db" File eggnogfile = new File(eggnogDB) if ( ! directory.exists() ) { diff --git a/subworkflows/local/eukulele.nf b/subworkflows/local/eukulele.nf index 6636a500..138a37d1 100644 --- a/subworkflows/local/eukulele.nf +++ b/subworkflows/local/eukulele.nf @@ -16,21 +16,31 @@ workflow SUB_EUKULELE { main: ch_versions = Channel.empty() - String directoryName = eukulele + String directoryName = params.eukulele_dbpath File directory = new File(directoryName) - String eukdb = directoryName + "**/reference.pep.fa" - File eukpepfa = new File(eukdb) + // get files in euk directory, and checks if there is a reference.pep.fa in the + // first one. Not the most robust but if this fails it will simply download + // the database anyways. + List euk_files = [] + new File(directoryName).eachFile() { + file-> euk_files.add(file) + } + String eukdb = euk_files.get(0).toString().plus("/reference.pep.fa") + File eukpepfa = new File(eukdb) if ( ! directory.exists() ) { directory.mkdir() } - - EUKULELE_DOWNLOAD ( eukulele.filter{ it[2] }.map { [ it[2], it[3] ] } ) - ch_download = EUKULELE_DOWNLOAD.out.db + if ( ! eukpepfa.exists() ) { + EUKULELE_DOWNLOAD ( eukulele.filter{ it[2] }.map { [ it[2], it[3] ] } ) + ch_download = EUKULELE_DOWNLOAD.out.db + } else { + ch_download = Channel.fromPath(directory) + } Channel.empty() - .mix ( EUKULELE_DOWNLOAD.out.db ) + .mix ( ch_download ) .mix(eukulele.filter{ ! it[2] }.map { [ [], it[3] ] } ) .merge( eukulele.map{ [ it[0], it[1] ] } ) .map { [ [ id: "${it[2].id}.${it[0]}" ], it[3], it[0], it[1] ] } diff --git a/subworkflows/local/kofamscan.nf b/subworkflows/local/kofamscan.nf index 90723b44..2bbe1669 100644 --- a/subworkflows/local/kofamscan.nf +++ b/subworkflows/local/kofamscan.nf @@ -18,7 +18,7 @@ workflow KOFAMSCAN { String directoryName = kofam_dir File directory = new File(directoryName) - String kofamdb = directoryName + "ko_list" + String kofamdb = directoryName + "/ko_list" File kolistfile = new File(kofamdb) if ( ! directory.exists() ) { @@ -33,7 +33,7 @@ workflow KOFAMSCAN { ch_versions = ch_versions.mix ( KOFAMSCAN_DOWNLOAD.out.versions ) } else { ch_dbpath = Channel.fromPath(kolistfile) - ch_profiles = Channel.fromPath(kofam_dir + "profiles") + ch_profiles = Channel.fromPath(kofam_dir + "/profiles") } KOFAMSCAN_SCAN( kofamscan, ch_dbpath, ch_profiles ) diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index c318c02c..5debdd06 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -456,7 +456,7 @@ workflow METATDENOVO { //.map { [ [ id:"${it[0].id}" ], it[1] ] } .map { [ it[0], it[1] ] } .set { ch_kofamscan } - KOFAMSCAN( ch_kofamscan, Channel.fromPath(params.kofam_dir), ch_fcs_for_summary) + KOFAMSCAN( ch_kofamscan, params.kofam_dir, ch_fcs_for_summary) ch_versions = ch_versions.mix(KOFAMSCAN.out.versions) ch_kofamscan_summary = KOFAMSCAN.out.kofamscan_summary.collect().map { it[1] } ch_merge_tables @@ -506,18 +506,15 @@ workflow METATDENOVO { // SUBWORKFLOW: Eukulele // if( !params.skip_eukulele){ - File directory = new File(params.eukulele_dbpath) - if ( ! directory.exists() ) { directory.mkdir() } - ch_directory = Channel.fromPath( directory ) - ch_aa - .map {[ [ id:"${it[0].id}" ], it[1] ] } - .combine( ch_eukulele_db ) - .set { ch_eukulele } - SUB_EUKULELE( ch_eukulele, ch_fcs_for_summary ) - ch_taxonomy_summary = SUB_EUKULELE.out.taxonomy_summary.collect().map { it[1] } - ch_merge_tables - .combine( ch_taxonomy_summary ) - .set { ch_merge_tables } + ch_aa + .map {[ [ id:"${it[0].id}" ], it[1] ] } + .combine( ch_eukulele_db ) + .set { ch_eukulele } + SUB_EUKULELE( ch_eukulele, ch_fcs_for_summary ) + ch_taxonomy_summary = SUB_EUKULELE.out.taxonomy_summary.collect().map { it[1] } + ch_merge_tables + .combine( ch_taxonomy_summary ) + .set { ch_merge_tables } } else { ch_merge_tables .map { [ it[0], it[1], it[2], [] ] } From 06b6b7bd4ab0f80519ce22be09dcf923b88c8f30 Mon Sep 17 00:00:00 2001 From: Taylor Falk Date: Thu, 21 Sep 2023 17:31:37 -0400 Subject: [PATCH 4/5] issue-168 specified the euk db to maintain consistency with download output --- subworkflows/local/eukulele.nf | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/eukulele.nf b/subworkflows/local/eukulele.nf index 138a37d1..c3d7a23c 100644 --- a/subworkflows/local/eukulele.nf +++ b/subworkflows/local/eukulele.nf @@ -25,8 +25,8 @@ workflow SUB_EUKULELE { new File(directoryName).eachFile() { file-> euk_files.add(file) } - String eukdb = euk_files.get(0).toString().plus("/reference.pep.fa") - File eukpepfa = new File(eukdb) + String eukdb = euk_files.get(0).toString() + File eukpepfa = new File(eukdb.plus("/reference.pep.fa")) if ( ! directory.exists() ) { directory.mkdir() @@ -36,7 +36,12 @@ workflow SUB_EUKULELE { EUKULELE_DOWNLOAD ( eukulele.filter{ it[2] }.map { [ it[2], it[3] ] } ) ch_download = EUKULELE_DOWNLOAD.out.db } else { - ch_download = Channel.fromPath(directory) + // tuple val("${db}"), path("${directory}/${db}"), emit: db + // mimic download output with subdir and db name + String db = eukdb.split("/")[0] + Channel.empty() + .map { tuple val(db), Channel.fromPath(eukdb) } + .set { ch_download } } Channel.empty() From 49c363b3d305d4789d2a75d82412761612548638 Mon Sep 17 00:00:00 2001 From: Taylor Falk Date: Thu, 21 Sep 2023 17:40:50 -0400 Subject: [PATCH 5/5] issue-168 added details in docs about skipping db downloads --- docs/usage.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index f64f6b51..00049270 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,10 +129,14 @@ These options are: - [kofamscan](https://github.com/takaram/kofam_scan) (`--kofam_dir`) +- [EUKulele](https://github.com/AlexanderLabWHOI/EUKulele) (`--eukulele_dbpath`) + All the options can run in the same time (e.g. `nextflow run main.nf -profile test,docker --eggnog --hmmdir hmms/ --rundbcan`) but each program has its own options that you will need to read carefully before running the pipeline. You can find more information about the different options in the [parameters page](https://nf-co.re/metatdenovo/parameters). For details about individual programs used, see their respective home pages. +If an Eggnog, kofam, or EUKulele database is already available, they can be specified with the above commands to skip the automatic download that the pipeline performs. + If you don't want run eggNOG-mapper, you will need to add the flag `--skip_eggnog`, otherwise metatdenovo will run the program automatically. ## Example pipeline command with some common features