From 06b0a5ecad62e75ba0197685aee0d87ee59ba9a7 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 16:56:00 +0100
Subject: [PATCH 01/16] test with diamond

---
 nextflow.config |  9 ++++-
 pipeline.nf     | 90 +++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 20e5e04..82920dd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -19,7 +19,14 @@ process{
                 queue='biocore-el7,long-sl7'
                 time='48h'
                 cpus='8'
-                container="ncbi/blast:2.9.0"
+                container="ncbi/blast:2.10.0"
+        }
+        
+        withLabel: diamond {
+                queue='biocore-el7,long-sl7'
+                time='48h'
+                cpus='8'
+                container="quay.io/biocontainers/diamond:0.9.30--h56fc30b_0"
         }
         
         withLabel: blastannotator {
diff --git a/pipeline.nf b/pipeline.nf
index 4643bd2..1447d6e 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -156,6 +156,12 @@ if(params.oboFile == "" ||  params.oboFile == null ) {
 
 obofile=file(params.oboFile)
 
+// TODO: To change for different aligners
+diamond = false
+
+if(params.diamond=="TRUE"||params.diamond=="true") {
+ diamond = true
+}
 
 if (params.blastFile == "" ||  params.blastFile == null ){
 
@@ -163,21 +169,85 @@ if (params.blastFile == "" ||  params.blastFile == null ){
 db_name = file(params.blastDB_path).name
 db_path = file(params.blastDB_path).parent
 
-process blast{
+// Handling Database formatting
+formatdbDetect = false
 
- label 'blast'
+if ( diamond ) {
 
- // publishDir "results", mode: 'copy'
+ formatDbFileName = db_path+"/"+db_name+".dmnd"
+ formatDbFile = file(formatDbFileName)
+ if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
+  formatdbDetect = true
+ }
+ 
+ if ( formatdbDetect == false ) {
+ 
+  process diamondFormat{
+ 
+   label 'diamond'
+  
+   output:
+   file "${dbname}_formatdb" into formatdb
+  
+   """
+    diamond makedb --in ${db_path}/${db_name} --db "${dbname}_formatdb"
+   """
+  }
+ 
+ }
+ 
+} else {
+ // TODO Need to detect if formatted with BLAST
+ // formatDbFileName = db_path+"/"+db_name+".dmnd"
+ // formatDbFile = file(formatDbFileName)
+ // if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
+ //  formatdbDetect = true
+ // }
+ // For now exists true
+ formatdbDetect = true
+ formatdb = Channel.fromPath( params.blastDB_path )
+}
 
- input:
- file seq from seq_file6
 
- output:
- file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+if ( diamond == true ) {
+
+ process diamond{
+ 
+  label 'diamond'
+  
+  input:
+  file seq from seq_file6
+  file formatdb_file from formatdb
+ 
+  output:
+  file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+ 
+  """
+   diamond blastp --db ${formatdb_file}--query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}"
+  """
+ }
+
+} else {
+
+ process blast{
+ 
+  label 'blast'
+ 
+  // publishDir "results", mode: 'copy'
+ 
+  input:
+  file seq from seq_file6
+  file formatdb_file from formatdb
+
+ 
+  output:
+  file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+ 
+  """
+   blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out "blastXml${seq}" -outfmt 5
+  """
+ }
 
- """
-  blastp -db ${db_path}/${db_name} -query $seq -num_threads 8 -evalue  0.00001 -out "blastXml${seq}" -outfmt 5
- """
 }
 
 } else {

From ce3c110e2363c7685fd53a7a51c0a01438d9e704 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 17:30:25 +0100
Subject: [PATCH 02/16] db_name

---
 pipeline.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index 1447d6e..19df7eb 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -187,10 +187,10 @@ if ( diamond ) {
    label 'diamond'
   
    output:
-   file "${dbname}_formatdb" into formatdb
+   file "${db_name}_formatdb" into formatdb
   
    """
-    diamond makedb --in ${db_path}/${db_name} --db "${dbname}_formatdb"
+    diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb"
    """
   }
  

From e2b1c2ea164d63bf1e39223fcdb7a4a36f4815ee Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 17:35:40 +0100
Subject: [PATCH 03/16] diamond

---
 pipeline.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.nf b/pipeline.nf
index 19df7eb..c02e3cb 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -187,7 +187,7 @@ if ( diamond ) {
    label 'diamond'
   
    output:
-   file "${db_name}_formatdb" into formatdb
+   file "${db_name}_formatdb.dmnd" into formatdb
   
    """
     diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb"

From 0cf2bab1485ccf08c4f5bb3343e91c31c8cc1ea4 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 17:37:11 +0100
Subject: [PATCH 04/16] typo diamond

---
 pipeline.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.nf b/pipeline.nf
index c02e3cb..d101b41 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -223,7 +223,7 @@ if ( diamond == true ) {
   file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
  
   """
-   diamond blastp --db ${formatdb_file}--query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}"
+   diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}"
   """
  }
 

From 1469bfac9a7983f14b4375fcfeb8c2180bd8a6c7 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 17:57:57 +0100
Subject: [PATCH 05/16] formatdb for BLAST

---
 pipeline.nf | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index d101b41..3e17fc0 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -194,18 +194,39 @@ if ( diamond ) {
    """
   }
  
+ } else {
+ 
+  formatdb = Channel.fromPath( params.blastDB_path )
+
  }
  
 } else {
- // TODO Need to detect if formatted with BLAST
- // formatDbFileName = db_path+"/"+db_name+".dmnd"
- // formatDbFile = file(formatDbFileName)
- // if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
- //  formatdbDetect = true
- // }
- // For now exists true
- formatdbDetect = true
- formatdb = Channel.fromPath( params.blastDB_path )
+
+ formatDbFileName = db_path+"/"+db_name+"*.phr"
+ formatDbFile = file(formatDbFileName)
+ if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
+   formatdbDetect = true
+ }
+ 
+ if ( formatdbDetect == false ) {
+
+  process blastFormat{
+ 
+   label 'blast'
+  
+   output:
+   file "${db_name}.p*" into formatdb
+  
+   """
+    makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name}
+   """
+  }
+
+ } else {
+
+  formatdb = Channel.fromPath( params.blastDB_path )
+  
+ }
 }
 
 

From 06ac4ceda12bf7c04b752b5048270ff29befc3ce Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 18:22:45 +0100
Subject: [PATCH 06/16] fix

---
 pipeline.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipeline.nf b/pipeline.nf
index 3e17fc0..70e6312 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -202,9 +202,10 @@ if ( diamond ) {
  
 } else {
 
+ // TODO: This needs more work
  formatDbFileName = db_path+"/"+db_name+"*.phr"
  formatDbFile = file(formatDbFileName)
- if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
+ if ( formatDbFile.size() > 0 ) {
    formatdbDetect = true
  }
  

From fa53e941cffd2fd5bbdc7d98b65ab1c36c73f288 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toni.hermoso@crg.eu>
Date: Mon, 16 Mar 2020 21:15:27 +0100
Subject: [PATCH 07/16] some checking

---
 pipeline.nf | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index 3e17fc0..8f1a21b 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -170,17 +170,17 @@ db_name = file(params.blastDB_path).name
 db_path = file(params.blastDB_path).parent
 
 // Handling Database formatting
-formatdbDetect = false
+formatdbDetect = "false"
 
 if ( diamond ) {
 
  formatDbFileName = db_path+"/"+db_name+".dmnd"
  formatDbFile = file(formatDbFileName)
  if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
-  formatdbDetect = true
+  formatdbDetect = "true"
  }
  
- if ( formatdbDetect == false ) {
+ if ( formatdbDetect == "false" ) {
  
   process diamondFormat{
  
@@ -203,16 +203,18 @@ if ( diamond ) {
 } else {
 
  formatDbFileName = db_path+"/"+db_name+"*.phr"
- formatDbFile = file(formatDbFileName)
- if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
-   formatdbDetect = true
+ formatDbFile = FileNameFinder().getFileNames( formatDbFileName )
+ // println( formatDbFile.size() )
+ if ( formatDbFile.size() > 0 ) {
+   formatdbDetect = "true"
  }
- 
- if ( formatdbDetect == false ) {
+
+println( formatdbDetect ) 
+ if ( formatdbDetect == "false" ) {
+
+  // println( "TUR" )
 
   process blastFormat{
- 
-   label 'blast'
   
    output:
    file "${db_name}.p*" into formatdb
@@ -224,8 +226,9 @@ if ( diamond ) {
 
  } else {
 
-  formatdb = Channel.fromPath( params.blastDB_path )
-  
+  // println( "HERE" )
+  formatdb = params.blastDB_path
+ 
  }
 }
 

From 802b4763a63a2655527e6f82c4fa190fb14cbd7c Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 22:53:48 +0100
Subject: [PATCH 08/16] detect files in Groovy

---
 pipeline.nf | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index 8679ef0..fc2801f 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -201,12 +201,14 @@ if ( diamond ) {
  }
  
 } else {
-
- // TODO: This needs more work
- formatDbFileName = db_path+"/"+db_name+"*.phr"
- formatDbFile = FileNameFinder().getFileNames( formatDbFileName )
- // println( formatDbFile.size() )
- if ( formatDbFile.size() > 0 ) {
+ 
+ formatDbDir = file( db_path ) 
+ filter =  ~/${db_name}.*.phr/
+ def fcount = 0
+ formatDbDir.list().eachFileMatch( filter ) { it ->
+  fcount = fcount + 1
+ }
+ if ( fcount > 0 ) {
    formatdbDetect = "true"
  }
 

From 378d7dd4c1bb6a03a026b4985a719e0f972ea411 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 23:43:58 +0100
Subject: [PATCH 09/16] handling blast

---
 pipeline.nf | 194 ++++++++++++++++++++++++++--------------------------
 1 file changed, 97 insertions(+), 97 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index fc2801f..3cb39e2 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -165,139 +165,139 @@ if(params.diamond=="TRUE"||params.diamond=="true") {
 
 if (params.blastFile == "" ||  params.blastFile == null ){
 
-// program-specific parameters
-db_name = file(params.blastDB_path).name
-db_path = file(params.blastDB_path).parent
-
-// Handling Database formatting
-formatdbDetect = "false"
-
-if ( diamond ) {
-
- formatDbFileName = db_path+"/"+db_name+".dmnd"
- formatDbFile = file(formatDbFileName)
- if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
-  formatdbDetect = "true"
- }
+ // program-specific parameters
+ db_name = file(params.blastDB_path).name
+ db_path = file(params.blastDB_path).parent
  
- if ( formatdbDetect == "false" ) {
+ // Handling Database formatting
+ formatdbDetect = "false"
  
-  process diamondFormat{
+ if ( diamond ) {
  
-   label 'diamond'
+  formatDbFileName = db_path+"/"+db_name+".dmnd"
+  formatDbFile = file(formatDbFileName)
+  if ( formatDbFile.exists() && formatDbFile.size() > 0 ) {
+   formatdbDetect = "true"
+  }
   
-   output:
-   file "${db_name}_formatdb.dmnd" into formatdb
+  if ( formatdbDetect == "false" ) {
+  
+   process diamondFormat{
+  
+    label 'diamond'
+   
+    output:
+    file "${db_name}_formatdb.dmnd" into formatdb
+   
+    """
+     diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb"
+    """
+   }
   
-   """
-    diamond makedb --in ${db_path}/${db_name} --db "${db_name}_formatdb"
-   """
   }
- 
+  
  } else {
+  
+  formatDbDir = file( db_path ) 
+  filter =  ~/${db_name}.*.phr/
+  def fcount = 0
+  formatDbDir.list().eachFileMatch( filter ) { it ->
+   fcount = fcount + 1
+  }
+  if ( fcount > 0 ) {
+    formatdbDetect = "true"
+  }
  
-  formatdb = Channel.fromPath( params.blastDB_path )
-
- }
+  println( formatdbDetect ) 
+  if ( formatdbDetect == "false" ) {
  
-} else {
+   // println( "TUR" )
  
- formatDbDir = file( db_path ) 
- filter =  ~/${db_name}.*.phr/
- def fcount = 0
- formatDbDir.list().eachFileMatch( filter ) { it ->
-  fcount = fcount + 1
- }
- if ( fcount > 0 ) {
-   formatdbDetect = "true"
+   process blastFormat{
+  
+    label 'blast'
+  
+    output:
+    file "${db_name}.p*" into formatdb
+   
+    """
+     makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name}
+    """
+   }
+ 
+  }
  }
-
-println( formatdbDetect ) 
- if ( formatdbDetect == "false" ) {
-
-  // println( "TUR" )
-
-  process blastFormat{
  
-   label 'blast'
+ if ( diamond == true ) {
  
+  process diamond{
+  
+   label 'diamond'
+   
+   input:
+   file seq from seq_file6
+   file formatdb_file from formatdb
+  
    output:
-   file "${db_name}.p*" into formatdb
+   file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
   
-   """
-    makeblastdb -dbtype prot -in ${db_path}/${db_name} -parse_seqids -out ${db_name}
-   """
+    if ( formatdbDetect == "false" ) {
+     command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
+    } else {
+     command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
+    }
+    
+    command
+   
   }
-
- } else {
-
-  // println( "HERE" )
-  formatdb = params.blastDB_path
  
- }
-}
-
-
-if ( diamond == true ) {
-
- process diamond{
+ } else {
  
-  label 'diamond'
+  process blast{
   
-  input:
-  file seq from seq_file6
-  file formatdb_file from formatdb
+   label 'blast'
+  
+   // publishDir "results", mode: 'copy'
+  
+   input:
+   file seq from seq_file6
+   file formatdb_file from formatdb
  
-  output:
-  file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+  
+   output:
+   file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+  
+  
+   if ( formatdbDetect == "false" ) {
+    command = "blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5"
+   } else {
+    command = "blastp -db ${db_path}/${db_name} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5"
+   }
+  
+   command
+  }
  
-  """
-   diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out "blastXml${seq}"
-  """
  }
 
 } else {
 
- process blast{
+ blastInput=file(params.blastFile)
  
-  label 'blast'
+ process convertBlast{
  
   // publishDir "results", mode: 'copy'
  
   input:
-  file seq from seq_file6
-  file formatdb_file from formatdb
-
+  file blastFile from blastInput
  
   output:
-  file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
+  file("*.xml") into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
  
   """
-   blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out "blastXml${seq}" -outfmt 5
+   hugeBlast2XML.pl -blast $blastFile -n 1000 -out blast.res
   """
+ 
  }
-
-}
-
-} else {
-
-blastInput=file(params.blastFile)
-
-process convertBlast{
-
- // publishDir "results", mode: 'copy'
-
- input:
- file blastFile from blastInput
-
- output:
- file("*.xml") into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
-
- """
-  hugeBlast2XML.pl -blast $blastFile -n 1000 -out blast.res
- """
-
-}
 }
 
 if (params.kolist != "" ||  params.kolist != null ){

From cb8088d26918c18a0b9d4e26e6ae9d1e2782adcb Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 23:49:30 +0100
Subject: [PATCH 10/16] script

---
 pipeline.nf | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index 3cb39e2..6cccc09 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -240,14 +240,15 @@ if (params.blastFile == "" ||  params.blastFile == null ){
   
    output:
    file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
-  
-    if ( formatdbDetect == "false" ) {
-     command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
-    } else {
-     command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
-    }
-    
-    command
+ 
+   script:
+   if ( formatdbDetect == "false" ) {
+    command = "diamond blastp --db ${formatdb_file} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
+   } else {
+    command = "diamond blastp --db ${db_path}/${db_name} --query $seq --outfmt 5 --threads ${task.cpus} --evalue ${evalue} --out blastXml${seq}"
+   }
+   
+   command
    
   }
  
@@ -263,11 +264,10 @@ if (params.blastFile == "" ||  params.blastFile == null ){
    file seq from seq_file6
    file formatdb_file from formatdb
  
-  
    output:
    file "blastXml${seq}" into (blastXmlResults1, blastXmlResults2, blastXmlResults3)
   
-  
+   script:
    if ( formatdbDetect == "false" ) {
     command = "blastp -db ${formatdb_file} -query $seq -num_threads ${task.cpus} -evalue ${evalue} -out blastXml${seq} -outfmt 5"
    } else {

From c8a70415677895777512aa590e811ec617221817 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 23:51:17 +0100
Subject: [PATCH 11/16] fix access

---
 pipeline.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.nf b/pipeline.nf
index 6cccc09..754b3e7 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -201,7 +201,7 @@ if (params.blastFile == "" ||  params.blastFile == null ){
   formatDbDir = file( db_path ) 
   filter =  ~/${db_name}.*.phr/
   def fcount = 0
-  formatDbDir.list().eachFileMatch( filter ) { it ->
+  formatDbDir.eachFileMatch( filter ) { it ->
    fcount = fcount + 1
   }
   if ( fcount > 0 ) {

From 21f05ec011796c5e1de67b6f39f6753a03630554 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Mon, 16 Mar 2020 23:56:12 +0100
Subject: [PATCH 12/16] avoid missing formatdb

---
 pipeline.nf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pipeline.nf b/pipeline.nf
index 754b3e7..05c29f9 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -194,6 +194,8 @@ if (params.blastFile == "" ||  params.blastFile == null ){
     """
    }
   
+  } else {
+   formatdb = params.blastDB_path
   }
   
  } else {
@@ -225,6 +227,8 @@ if (params.blastFile == "" ||  params.blastFile == null ){
     """
    }
  
+  } else {
+   formatdb = params.blastDB_path
   }
  }
  

From b1e9f264ce04ada97c0c788660edfbeb8f790899 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Tue, 17 Mar 2020 09:53:31 +0100
Subject: [PATCH 13/16] params additional

---
 main_configuration.config | 2 ++
 pipeline.nf               | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/main_configuration.config b/main_configuration.config
index fe86d82..7a8c279 100644
--- a/main_configuration.config
+++ b/main_configuration.config
@@ -4,6 +4,8 @@ params {
  gffclean = "true"
  gffstats = "true"
  evalue = "0.00001"
+ blastFile = ""
+ diamond = "false"
  blastDB_path = "/nfs/db/ncbi/201908/blastdb/db/nr"
  speciesName = "P.vulgaris"
  chunkSize = 25
diff --git a/pipeline.nf b/pipeline.nf
index 05c29f9..a621603 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -114,7 +114,9 @@ log.info "General parameters"
 log.info "------------------"
 log.info "Protein sequence file        : ${params.proteinFile}"
 log.info "Annotation file              : ${params.gffFile}"
+if ( ${params.blastFile} != "" ) {
 log.info "BLAST results file           : ${params.blastFile}"
+}
 log.info "Species name                  : ${params.speciesName}"
 log.info "KEGG species                 : ${params.kegg_species}"
 if ( mysql ) {

From e2c956494869261e3a0019984ac887eab040270a Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Tue, 17 Mar 2020 09:54:16 +0100
Subject: [PATCH 14/16] Update stuff

---
 TODO.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/TODO.md b/TODO.md
index 06b836a..dd022af 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,6 +1,4 @@
 * Split KEGG retrieval and upload
-* Add Format option for DIAMOND and BLAST
-* Add DIAMOND option
 * Include some testing and CI
 * Add PANNZER in analysis
 * Add PhylomeDB in analysis
@@ -13,6 +11,6 @@
 * Allow reports from KEGG orthologs (number of potential orthologs from KEGG species)
 * blast_hit reconsider
 * Allow more flexibility for input parameters batch
-* Consider other programs for BLAST process or similar annotation processes: e.g. [GHOSTZ](http://www.bi.cs.titech.ac.jp/ghostz/) and [Argot2.5](http://www.medcomp.medicina.unipd.it/Argot2-5/)
+* Generalize and consider other programs for BLAST process or similar annotation processes: e.g. [GHOSTZ](http://www.bi.cs.titech.ac.jp/ghostz/) and [Argot2.5](http://www.medcomp.medicina.unipd.it/Argot2-5/)
 * Add option to detect if possible contamination from BLAST (inspiration from MEGAN)
 * Allow more customization of chunks for programs. Fallback one and program specific

From ff7722cab8ada7519d01b11f4dbf1401a5a68d50 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Tue, 17 Mar 2020 10:08:18 +0100
Subject: [PATCH 15/16] params and a bit more doc

---
 README.md                 | 10 +++++++---
 main_configuration.config |  2 +-
 pipeline.nf               |  8 ++++----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index d3f1bc0..83d3977 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,7 @@ More information can be found in the [Nextflow documentation](https://www.nextfl
 ## Pipeline steps
 
 * **blast**: it perfoms BLAST search against defined database from input files
+* **diamond**: the same as above but using DIAMOND ( ```diamond = "true"``` in config file )
 * **ipscn**: it performs InterProScan analyses from input files
 * **signalP**: it performs signalP analyses from input files
 * **targetP**: it performs targetP analyses from input files
@@ -69,6 +70,11 @@ More information can be found in the [Nextflow documentation](https://www.nextfl
 * **generateResultFiles**: it generates report files
 * **generateGFF3File**: if GFF provided as input, it provides a modified GFF with additional information
 
+### Formatted databases
+
+* For BLAST: ```blastDbPath = "/path/to/db"``` It looks for formatted database files (normally named db.p* for protein type based ones), otherwise it will try to format FASTA file with that name
+* For DIAMOND: ```blastDbPath = "/path/to/db"``` It looks for a single formatted database file (normally named db.dmnd), otherwise it will try to format the FASTA file with that name (gzip compressed files accepted)
+
 ### About blast_annotator
 
 Retrieval of GO terms from BLAST results can be performed either from [BLAST2GO](https://www.blast2go.com/) results or from other methods as far as a BLAST2GO-compatible output format is provided.
@@ -81,12 +87,10 @@ We recommend installing either [Docker](https://www.docker.com/) of [Singularity
 
 The software used all along this pipeline is encapsulated in, at least, 4 containers:
 
-Whenever possible, we try to provide necessary images in a public repository (e.g. Docker hub). However, for some software that includes privative components, we suggest to build the container image by yourself.
+As written down in ```nextflow.config``` file, whenever possible, we try to provide necessary images in a public repository (e.g. [Docker hub](https://hub.docker.com/) or quay.io from [Biocontainers](https://biocontainers.pro/)). However, for some software that includes privative components, we suggest to build the container image by yourself.
 
-* [NCBI Blast](https://hub.docker.com/r/ncbi/blast)
 * [SignalP and TargetP](https://github.com/biocorecrg/sigtarp_docker) (user needs to build this)
 * [Interproscan and 3rd party tools](https://github.com/biocorecrg/interproscan_docker) (user needs to build this)
-* [Environment for annotation scripts](https://hub.docker.com/r/guigolab/fa-nf)
 
 ## How to build a container
 
diff --git a/main_configuration.config b/main_configuration.config
index 7a8c279..e81e7e1 100644
--- a/main_configuration.config
+++ b/main_configuration.config
@@ -6,7 +6,7 @@ params {
  evalue = "0.00001"
  blastFile = ""
  diamond = "false"
- blastDB_path = "/nfs/db/ncbi/201908/blastdb/db/nr"
+ blastDbpath = "/nfs/db/ncbi/201908/blastdb/db/nr"
  speciesName = "P.vulgaris"
  chunkSize = 25
  chunkWebSize = 100
diff --git a/pipeline.nf b/pipeline.nf
index a621603..b6fd744 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -168,8 +168,8 @@ if(params.diamond=="TRUE"||params.diamond=="true") {
 if (params.blastFile == "" ||  params.blastFile == null ){
 
  // program-specific parameters
- db_name = file(params.blastDB_path).name
- db_path = file(params.blastDB_path).parent
+ db_name = file(params.blastDbPath).name
+ db_path = file(params.blastDbPath).parent
  
  // Handling Database formatting
  formatdbDetect = "false"
@@ -197,7 +197,7 @@ if (params.blastFile == "" ||  params.blastFile == null ){
    }
   
   } else {
-   formatdb = params.blastDB_path
+   formatdb = params.blastDbPath
   }
   
  } else {
@@ -230,7 +230,7 @@ if (params.blastFile == "" ||  params.blastFile == null ){
    }
  
   } else {
-   formatdb = params.blastDB_path
+   formatdb = params.blastDbPath
   }
  }
  

From b6822cda2299a2e99f76994b6d0b00acb96f15e1 Mon Sep 17 00:00:00 2001
From: Toni Hermoso Pulido <toniher@cau.cat>
Date: Tue, 17 Mar 2020 16:23:01 +0100
Subject: [PATCH 16/16] GenBank conversion option

---
 TODO.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TODO.md b/TODO.md
index dd022af..a234b60 100644
--- a/TODO.md
+++ b/TODO.md
@@ -6,8 +6,8 @@
     * Venn Diagrams
 
 ---
+* Allow conversion from GenBank https://metacpan.org/pod/bp_genbank2gff3.pl
 * In reports, put select distincts again tables
-* Check why CDSearch why starts later
 * Allow reports from KEGG orthologs (number of potential orthologs from KEGG species)
 * blast_hit reconsider
 * Allow more flexibility for input parameters batch