From 06b0a5ecad62e75ba0197685aee0d87ee59ba9a7 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 16:56:00 +0100 Subject: [PATCH 01/16] test with diamond --- nextflow.config | 9 ++++- pipeline.nf | 90 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/nextflow.config b/nextflow.config index 20e5e04..82920dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,7 +19,14 @@ process{ queue='biocore-el7,long-sl7' time='48h' cpus='8' - container="ncbi/blast:2.9.0" + container="ncbi/blast:2.10.0" + } + + withLabel: diamond { + queue='biocore-el7,long-sl7' + time='48h' + cpus='8' + container="quay.io/biocontainers/diamond:0.9.30--h56fc30b_0" } withLabel: blastannotator { diff --git a/pipeline.nf b/pipeline.nf index 4643bd2..1447d6e 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -156,6 +156,12 @@ if(params.oboFile == "" || params.oboFile == null ) { obofile=file(params.oboFile) +// TODO: To change for different aligners +diamond = false + +if(params.diamond=="TRUE"||params.diamond=="true") { + diamond = true +} if (params.blastFile == "" || params.blastFile == null ){ @@ -163,21 +169,85 @@ if (params.blastFile == "" || params.blastFile == null ){ db_name = file(params.blastDB_path).name db_path = file(params.blastDB_path).parent -process blast{ +// Handling Database formatting +formatdbDetect = false - label 'blast' +if ( diamond ) { - // publishDir "results", mode: 'copy' + formatDbFileName = db_path+"/"+db_name+".dmnd" + formatDbFile = file(formatDbFileName) + if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { + formatdbDetect = true + } + + if ( formatdbDetect == false ) { + + process diamondFormat{ + + label 'diamond' + + output: + file "${dbname}_formatdb" into formatdb + + """ + diamond makedb --in ${db_path}/${db_name} --db "${dbname}_formatdb" + """ + } + + } + +} else { + // TODO Need to detect if formatted with BLAST + // formatDbFileName = db_path+"/"+db_name+".dmnd" + // formatDbFile = file(formatDbFileName) + // if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { + // formatdbDetect = true + // } + // For now exists true + formatdbDetect = true + formatdb = Channel.fromPath( params.blastDB_path ) +} - input: - file seq from seq_file6 - output: - file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) +if ( diamond == true ) { + + process diamond{ + + label 'diamond' + + input: + file seq from seq_file6 + file formatdb_file from formatdb + + output: + file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) + + """ + diamond blastp --db ${formatdb_file}--query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}" + """ + } + +} else { + + process blast{ + + label 'blast' + + // publishDir "results", mode: 'copy' + + input: + file seq from seq_file6 + file formatdb_file from formatdb + + + output: + file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) + + """ + blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out "blastXml${seq}" -outfmt 5 + """ + } - """ - blastp -db ${db_path}/${db_name} -query $seq -num_threads 8 -evalue 0.00001 -out "blastXml${seq}" -outfmt 5 - """ } } else { From ce3c110e2363c7685fd53a7a51c0a01438d9e704 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 17:30:25 +0100 Subject: [PATCH 02/16] db_name --- pipeline.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index 1447d6e..19df7eb 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -187,10 +187,10 @@ if ( diamond ) { label 'diamond' output: - file "${dbname}_formatdb" into formatdb + file "${db_name}_formatdb" into formatdb """ - diamond makedb --in ${db_path}/${db_name} --db "${dbname}_formatdb" + diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb" """ } From e2b1c2ea164d63bf1e39223fcdb7a4a36f4815ee Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 17:35:40 +0100 Subject: [PATCH 03/16] diamond --- pipeline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index 19df7eb..c02e3cb 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -187,7 +187,7 @@ if ( diamond ) { label 'diamond' output: - file "${db_name}_formatdb" into formatdb + file "${db_name}_formatdb.dmnd" into formatdb """ diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb" From 0cf2bab1485ccf08c4f5bb3343e91c31c8cc1ea4 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 17:37:11 +0100 Subject: [PATCH 04/16] typo diamond --- pipeline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index c02e3cb..d101b41 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -223,7 +223,7 @@ if ( diamond == true ) { file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) """ - diamond blastp --db ${formatdb_file}--query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}" + diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}" """ } From 1469bfac9a7983f14b4375fcfeb8c2180bd8a6c7 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 17:57:57 +0100 Subject: [PATCH 05/16] formatdb for BLAST --- pipeline.nf | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index d101b41..3e17fc0 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -194,18 +194,39 @@ if ( diamond ) { """ } + } else { + + formatdb = Channel.fromPath( params.blastDB_path ) + } } else { - // TODO Need to detect if formatted with BLAST - // formatDbFileName = db_path+"/"+db_name+".dmnd" - // formatDbFile = file(formatDbFileName) - // if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { - // formatdbDetect = true - // } - // For now exists true - formatdbDetect = true - formatdb = Channel.fromPath( params.blastDB_path ) + + formatDbFileName = db_path+"/"+db_name+"*.phr" + formatDbFile = file(formatDbFileName) + if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { + formatdbDetect = true + } + + if ( formatdbDetect == false ) { + + process blastFormat{ + + label 'blast' + + output: + file "${db_name}.p*" into formatdb + + """ + makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name} + """ + } + + } else { + + formatdb = Channel.fromPath( params.blastDB_path ) + + } } From 06ac4ceda12bf7c04b752b5048270ff29befc3ce Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 18:22:45 +0100 Subject: [PATCH 06/16] fix --- pipeline.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index 3e17fc0..70e6312 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -202,9 +202,10 @@ if ( diamond ) { } else { + // TODO: This needs more work formatDbFileName = db_path+"/"+db_name+"*.phr" formatDbFile = file(formatDbFileName) - if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { + if ( formatDbFile.size() > 0 ) { formatdbDetect = true } From fa53e941cffd2fd5bbdc7d98b65ab1c36c73f288 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 21:15:27 +0100 Subject: [PATCH 07/16] some checking --- pipeline.nf | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index 3e17fc0..8f1a21b 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -170,17 +170,17 @@ db_name = file(params.blastDB_path).name db_path = file(params.blastDB_path).parent // Handling Database formatting -formatdbDetect = false +formatdbDetect = "false" if ( diamond ) { formatDbFileName = db_path+"/"+db_name+".dmnd" formatDbFile = file(formatDbFileName) if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { - formatdbDetect = true + formatdbDetect = "true" } - if ( formatdbDetect == false ) { + if ( formatdbDetect == "false" ) { process diamondFormat{ @@ -203,16 +203,18 @@ if ( diamond ) { } else { formatDbFileName = db_path+"/"+db_name+"*.phr" - formatDbFile = file(formatDbFileName) - if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { - formatdbDetect = true + formatDbFile = FileNameFinder().getFileNames( formatDbFileName ) + // println( formatDbFile.size() ) + if ( formatDbFile.size() > 0 ) { + formatdbDetect = "true" } - - if ( formatdbDetect == false ) { + +println( formatdbDetect ) + if ( formatdbDetect == "false" ) { + + // println( "TUR" ) process blastFormat{ - - label 'blast' output: file "${db_name}.p*" into formatdb @@ -224,8 +226,9 @@ if ( diamond ) { } else { - formatdb = Channel.fromPath( params.blastDB_path ) - + // println( "HERE" ) + formatdb = params.blastDB_path + } } From 802b4763a63a2655527e6f82c4fa190fb14cbd7c Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 22:53:48 +0100 Subject: [PATCH 08/16] detect files in Groovy --- pipeline.nf | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index 8679ef0..fc2801f 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -201,12 +201,14 @@ if ( diamond ) { } } else { - - // TODO: This needs more work - formatDbFileName = db_path+"/"+db_name+"*.phr" - formatDbFile = FileNameFinder().getFileNames( formatDbFileName ) - // println( formatDbFile.size() ) - if ( formatDbFile.size() > 0 ) { + + formatDbDir = file( db_path ) + filter = ~/${db_name}.*.phr/ + def fcount = 0 + formatDbDir.list().eachFileMatch( filter ) { it -> + fcount = fcount + 1 + } + if ( fcount > 0 ) { formatdbDetect = "true" } From 378d7dd4c1bb6a03a026b4985a719e0f972ea411 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 23:43:58 +0100 Subject: [PATCH 09/16] handling blast --- pipeline.nf | 194 ++++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index fc2801f..3cb39e2 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -165,139 +165,139 @@ if(params.diamond=="TRUE"||params.diamond=="true") { if (params.blastFile == "" || params.blastFile == null ){ -// program-specific parameters -db_name = file(params.blastDB_path).name -db_path = file(params.blastDB_path).parent - -// Handling Database formatting -formatdbDetect = "false" - -if ( diamond ) { - - formatDbFileName = db_path+"/"+db_name+".dmnd" - formatDbFile = file(formatDbFileName) - if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { - formatdbDetect = "true" - } + // program-specific parameters + db_name = file(params.blastDB_path).name + db_path = file(params.blastDB_path).parent - if ( formatdbDetect == "false" ) { + // Handling Database formatting + formatdbDetect = "false" - process diamondFormat{ + if ( diamond ) { - label 'diamond' + formatDbFileName = db_path+"/"+db_name+".dmnd" + formatDbFile = file(formatDbFileName) + if ( formatDbFile.exists() && formatDbFile.size() > 0 ) { + formatdbDetect = "true" + } - output: - file "${db_name}_formatdb.dmnd" into formatdb + if ( formatdbDetect == "false" ) { + + process diamondFormat{ + + label 'diamond' + + output: + file "${db_name}_formatdb.dmnd" into formatdb + + """ + diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb" + """ + } - """ - diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb" - """ } - + } else { + + formatDbDir = file( db_path ) + filter = ~/${db_name}.*.phr/ + def fcount = 0 + formatDbDir.list().eachFileMatch( filter ) { it -> + fcount = fcount + 1 + } + if ( fcount > 0 ) { + formatdbDetect = "true" + } - formatdb = Channel.fromPath( params.blastDB_path ) - - } + println( formatdbDetect ) + if ( formatdbDetect == "false" ) { -} else { + // println( "TUR" ) - formatDbDir = file( db_path ) - filter = ~/${db_name}.*.phr/ - def fcount = 0 - formatDbDir.list().eachFileMatch( filter ) { it -> - fcount = fcount + 1 - } - if ( fcount > 0 ) { - formatdbDetect = "true" + process blastFormat{ + + label 'blast' + + output: + file "${db_name}.p*" into formatdb + + """ + makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name} + """ + } + + } } - -println( formatdbDetect ) - if ( formatdbDetect == "false" ) { - - // println( "TUR" ) - - process blastFormat{ - label 'blast' + if ( diamond == true ) { + process diamond{ + + label 'diamond' + + input: + file seq from seq_file6 + file formatdb_file from formatdb + output: - file "${db_name}.p*" into formatdb + file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) - """ - makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name} - """ + if ( formatdbDetect == "false" ) { + command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" + } else { + command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" + } + + command + } - - } else { - - // println( "HERE" ) - formatdb = params.blastDB_path - } -} - - -if ( diamond == true ) { - - process diamond{ + } else { - label 'diamond' + process blast{ - input: - file seq from seq_file6 - file formatdb_file from formatdb + label 'blast' + + // publishDir "results", mode: 'copy' + + input: + file seq from seq_file6 + file formatdb_file from formatdb - output: - file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) + + output: + file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) + + + if ( formatdbDetect == "false" ) { + command = "blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5" + } else { + command = "blastp -db ${db_path}/${db_name} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5" + } + + command + } - """ - diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}" - """ } } else { - process blast{ + blastInput=file(params.blastFile) - label 'blast' + process convertBlast{ // publishDir "results", mode: 'copy' input: - file seq from seq_file6 - file formatdb_file from formatdb - + file blastFile from blastInput output: - file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) + file("*.xml") into (blastXmlResults1, blastXmlResults2, blastXmlResults3) """ - blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out "blastXml${seq}" -outfmt 5 + hugeBlast2XML.pl -blast $blastFile -n 1000 -out blast.res """ + } - -} - -} else { - -blastInput=file(params.blastFile) - -process convertBlast{ - - // publishDir "results", mode: 'copy' - - input: - file blastFile from blastInput - - output: - file("*.xml") into (blastXmlResults1, blastXmlResults2, blastXmlResults3) - - """ - hugeBlast2XML.pl -blast $blastFile -n 1000 -out blast.res - """ - -} } if (params.kolist != "" || params.kolist != null ){ From cb8088d26918c18a0b9d4e26e6ae9d1e2782adcb Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 23:49:30 +0100 Subject: [PATCH 10/16] script --- pipeline.nf | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index 3cb39e2..6cccc09 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -240,14 +240,15 @@ if (params.blastFile == "" || params.blastFile == null ){ output: file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) - - if ( formatdbDetect == "false" ) { - command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" - } else { - command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" - } - - command + + script: + if ( formatdbDetect == "false" ) { + command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" + } else { + command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}" + } + + command } @@ -263,11 +264,10 @@ if (params.blastFile == "" || params.blastFile == null ){ file seq from seq_file6 file formatdb_file from formatdb - output: file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3) - + script: if ( formatdbDetect == "false" ) { command = "blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5" } else { From c8a70415677895777512aa590e811ec617221817 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 23:51:17 +0100 Subject: [PATCH 11/16] fix access --- pipeline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index 6cccc09..754b3e7 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -201,7 +201,7 @@ if (params.blastFile == "" || params.blastFile == null ){ formatDbDir = file( db_path ) filter = ~/${db_name}.*.phr/ def fcount = 0 - formatDbDir.list().eachFileMatch( filter ) { it -> + formatDbDir.eachFileMatch( filter ) { it -> fcount = fcount + 1 } if ( fcount > 0 ) { From 21f05ec011796c5e1de67b6f39f6753a03630554 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Mon, 16 Mar 2020 23:56:12 +0100 Subject: [PATCH 12/16] avoid missing formatdb --- pipeline.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pipeline.nf b/pipeline.nf index 754b3e7..05c29f9 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -194,6 +194,8 @@ if (params.blastFile == "" || params.blastFile == null ){ """ } + } else { + formatdb = params.blastDB_path } } else { @@ -225,6 +227,8 @@ if (params.blastFile == "" || params.blastFile == null ){ """ } + } else { + formatdb = params.blastDB_path } } From b1e9f264ce04ada97c0c788660edfbeb8f790899 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Tue, 17 Mar 2020 09:53:31 +0100 Subject: [PATCH 13/16] params additional --- main_configuration.config | 2 ++ pipeline.nf | 2 ++ 2 files changed, 4 insertions(+) diff --git a/main_configuration.config b/main_configuration.config index fe86d82..7a8c279 100644 --- a/main_configuration.config +++ b/main_configuration.config @@ -4,6 +4,8 @@ params { gffclean = "true" gffstats = "true" evalue = "0.00001" + blastFile = "" + diamond = "false" blastDB_path = "/nfs/db/ncbi/201908/blastdb/db/nr" speciesName = "P.vulgaris" chunkSize = 25 diff --git a/pipeline.nf b/pipeline.nf index 05c29f9..a621603 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -114,7 +114,9 @@ log.info "General parameters" log.info "------------------" log.info "Protein sequence file : ${params.proteinFile}" log.info "Annotation file : ${params.gffFile}" +if ( ${params.blastFile} != "" ) { log.info "BLAST results file : ${params.blastFile}" +} log.info "Species name : ${params.speciesName}" log.info "KEGG species : ${params.kegg_species}" if ( mysql ) { From e2c956494869261e3a0019984ac887eab040270a Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Tue, 17 Mar 2020 09:54:16 +0100 Subject: [PATCH 14/16] Update stuff --- TODO.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/TODO.md b/TODO.md index 06b836a..dd022af 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,4 @@ * Split KEGG retrieval and upload -* Add Format option for DIAMOND and BLAST -* Add DIAMOND option * Include some testing and CI * Add PANNZER in analysis * Add PhylomeDB in analysis @@ -13,6 +11,6 @@ * Allow reports from KEGG orthologs (number of potential orthologs from KEGG species) * blast_hit reconsider * Allow more flexibility for input parameters batch -* Consider other programs for BLAST process or similar annotation processes: e.g. [GHOSTZ](http://www.bi.cs.titech.ac.jp/ghostz/) and [Argot2.5](http://www.medcomp.medicina.unipd.it/Argot2-5/) +* Generalize and consider other programs for BLAST process or similar annotation processes: e.g. [GHOSTZ](http://www.bi.cs.titech.ac.jp/ghostz/) and [Argot2.5](http://www.medcomp.medicina.unipd.it/Argot2-5/) * Add option to detect if possible contamination from BLAST (inspiration from MEGAN) * Allow more customization of chunks for programs. Fallback one and program specific From ff7722cab8ada7519d01b11f4dbf1401a5a68d50 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Tue, 17 Mar 2020 10:08:18 +0100 Subject: [PATCH 15/16] params and a bit more doc --- README.md | 10 +++++++--- main_configuration.config | 2 +- pipeline.nf | 8 ++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d3f1bc0..83d3977 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ More information can be found in the [Nextflow documentation](https://www.nextfl ## Pipeline steps * **blast**: it perfoms BLAST search against defined database from input files +* **diamond**: the same as above but using DIAMOND ( ```diamond = "true"``` in config file ) * **ipscn**: it performs InterProScan analyses from input files * **signalP**: it performs signalP analyses from input files * **targetP**: it performs targetP analyses from input files @@ -69,6 +70,11 @@ More information can be found in the [Nextflow documentation](https://www.nextfl * **generateResultFiles**: it generates report files * **generateGFF3File**: if GFF provided as input, it provides a modified GFF with additional information +### Formatted databases + +* For BLAST: ```blastDbPath = "/path/to/db"``` It looks for formatted database files (normally named db.p* for protein type based ones), otherwise it will try to format FASTA file with that name +* For DIAMOND: ```blastDbPath = "/path/to/db"``` It looks for a single formatted database file (normally named db.dmnd), otherwise it will try to format the FASTA file with that name (gzip compressed files accepted) + ### About blast_annotator Retrieval of GO terms from BLAST results can be performed either from [BLAST2GO](https://www.blast2go.com/) results or from other methods as far as a BLAST2GO-compatible output format is provided. @@ -81,12 +87,10 @@ We recommend installing either [Docker](https://www.docker.com/) of [Singularity The software used all along this pipeline is encapsulated in, at least, 4 containers: -Whenever possible, we try to provide necessary images in a public repository (e.g. Docker hub). However, for some software that includes privative components, we suggest to build the container image by yourself. +As written down in ```nextflow.config``` file, whenever possible, we try to provide necessary images in a public repository (e.g. [Docker hub](https://hub.docker.com/) or quay.io from [Biocontainers](https://biocontainers.pro/)). However, for some software that includes privative components, we suggest to build the container image by yourself. -* [NCBI Blast](https://hub.docker.com/r/ncbi/blast) * [SignalP and TargetP](https://github.com/biocorecrg/sigtarp_docker) (user needs to build this) * [Interproscan and 3rd party tools](https://github.com/biocorecrg/interproscan_docker) (user needs to build this) -* [Environment for annotation scripts](https://hub.docker.com/r/guigolab/fa-nf) ## How to build a container diff --git a/main_configuration.config b/main_configuration.config index 7a8c279..e81e7e1 100644 --- a/main_configuration.config +++ b/main_configuration.config @@ -6,7 +6,7 @@ params { evalue = "0.00001" blastFile = "" diamond = "false" - blastDB_path = "/nfs/db/ncbi/201908/blastdb/db/nr" + blastDbpath = "/nfs/db/ncbi/201908/blastdb/db/nr" speciesName = "P.vulgaris" chunkSize = 25 chunkWebSize = 100 diff --git a/pipeline.nf b/pipeline.nf index a621603..b6fd744 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -168,8 +168,8 @@ if(params.diamond=="TRUE"||params.diamond=="true") { if (params.blastFile == "" || params.blastFile == null ){ // program-specific parameters - db_name = file(params.blastDB_path).name - db_path = file(params.blastDB_path).parent + db_name = file(params.blastDbPath).name + db_path = file(params.blastDbPath).parent // Handling Database formatting formatdbDetect = "false" @@ -197,7 +197,7 @@ if (params.blastFile == "" || params.blastFile == null ){ } } else { - formatdb = params.blastDB_path + formatdb = params.blastDbPath } } else { @@ -230,7 +230,7 @@ if (params.blastFile == "" || params.blastFile == null ){ } } else { - formatdb = params.blastDB_path + formatdb = params.blastDbPath } } From b6822cda2299a2e99f76994b6d0b00acb96f15e1 Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Tue, 17 Mar 2020 16:23:01 +0100 Subject: [PATCH 16/16] GenBank conversion option --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index dd022af..a234b60 100644 --- a/TODO.md +++ b/TODO.md @@ -6,8 +6,8 @@ * Venn Diagrams --- +* Allow conversion from GenBank https://metacpan.org/pod/bp_genbank2gff3.pl * In reports, put select distincts again tables -* Check why CDSearch why starts later * Allow reports from KEGG orthologs (number of potential orthologs from KEGG species) * blast_hit reconsider * Allow more flexibility for input parameters batch