-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.nf
executable file
·86 lines (66 loc) · 2.57 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env nextflow
/*
* @authors
* Ferriol Calvet <ferriol.calvet@crg.eu>
* Emilio Righi <emilio.righi@crg.eu>
*/
/*
* Input parameters: genome, protein evidences, parameter file,
* additional values for the generation of the parameter file.
* Params are stored in the params.config file
*
* The configuration is in nextflow.config file
*/
nextflow.enable.dsl=2
log.info """
GENOMEMASKING+GENEID+BLASTx - NextflowPipeline
=============================================
output path: ${params.outdir}
tsv input path: ${params.tsv}
id column: ${params.column_id_value}
taxid column: ${params.column_taxid_value}
path column: ${params.column_path_value}
mask genomes? ${params.use_masking}
"""
wk_folder = "${projectDir}/workflows"
subwk_folder = "${projectDir}/subworkflows"
species_dir = "${params.outdir}/species"
include { GENEIDX } from "${wk_folder}/GENEIDX"
include { GENOMEANNOTATOR } from "${wk_folder}/GENOMEANNOTATOR"
include { ASSEMBLY_PREPROCESS } from "${subwk_folder}/ASSEMBLY_PREPROCESS"
include { UNZIP_FASTA } from "${subwk_folder}/ASSEMBLY_PREPROCESS"
include { parseFastaHeader } from "${subwk_folder}/tools"
workflow {
if (params.tsv) { tsv_input = file(params.tsv) } else {
exit 1, 'TSV samplesheet not specified!'
}
tsv = channel.fromPath(params.tsv).splitCsv( sep: '\t', header:true )
.collectFile(storeDir:params.assemblies_dir){ row -> ["${row[params.column_id_value]}-${row[params.column_taxid_value]}-.fa.gz", file(row[params.column_path_value])]}
.map { it ->
def elements = it.baseName.tokenize('-')
tuple(elements[0],elements[1], it)
}
.branch {
validFasta: it[2].countFasta() > 0
invalidFasta: true
}
invalid_fasta = tsv.invalidFasta
if(invalid_fasta.count()){
invalid_fasta.count().view { it -> "A total of ${it} invalid assemblies have been found"}
invalid_fasta.collectFile(storeDir:params.outdir){ item ->
[ "INVALID_ASSEMBLIES.txt", "${item[0]}\t${item[1]}\t${item[2]}" + '\n' ]
}
.subscribe {
println "Invalid assemblies have been saved in: $it"
}
}
assemblies = tsv.validFasta.map{it-> tuple(it[0], it[2])}
metadata = tsv.validFasta.map { row -> tuple(row[0], row[1]) }
filtered_assemblies = UNZIP_FASTA(assemblies) | ASSEMBLY_PREPROCESS | parseFastaHeader
target_genomes = filtered_assemblies
if(params.use_masking) target_genomes = GENOMEANNOTATOR(filtered_assemblies, metadata)
results = GENEIDX(target_genomes, metadata)
}
workflow.onComplete {
println ( "\nDone!\n" )
}