From 45a8db47130b32ac9c89382093df5f082e4daef3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= <famke.baeuerle@gmail.com>
Date: Mon, 25 Nov 2024 10:34:29 +0100
Subject: [PATCH] add annotation sbwf

---
 .../nf-core/vcf_annotate_ensemblvep/main.nf   | 45 +++++++++++++
 .../nf-core/vcf_annotate_ensemblvep/meta.yml  | 65 +++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf
 create mode 100644 subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml

diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf
new file mode 100644
index 0000000..291eddc
--- /dev/null
+++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf
@@ -0,0 +1,45 @@
+//
+// Run VEP to annotate VCF files
+//
+
+include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main'
+include { TABIX_TABIX    } from '../../../modules/nf-core/tabix/tabix/main'
+
+workflow VCF_ANNOTATE_ENSEMBLVEP {
+    take:
+    ch_vcf                      // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)]]
+    ch_fasta                    // channel: [ val(meta2), path(fasta) ] (optional)
+    val_genome                  //   value: genome to use
+    val_species                 //   value: species to use
+    val_cache_version           //   value: cache version to use
+    ch_cache                    // channel: [ val(meta3), path(cache) ] (optional)
+    ch_extra_files              // channel: [ path(file1), path(file2)... ] (optional)
+
+    main:
+    ch_versions = Channel.empty()
+
+    ENSEMBLVEP_VEP(
+        ch_vcf,
+        val_genome,
+        val_species,
+        val_cache_version,
+        ch_cache,
+        ch_fasta,
+        ch_extra_files
+    )
+
+    TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf)
+
+    ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true)
+
+    // Gather versions of all tools used
+    ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)
+    ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
+
+    emit:
+    vcf_tbi  = ch_vcf_tbi                  // channel: [ val(meta), path(vcf), path(tbi) ]
+    json     = ENSEMBLVEP_VEP.out.json     // channel: [ val(meta), path(json) ]
+    tab      = ENSEMBLVEP_VEP.out.tab      // channel: [ val(meta), path(tab) ]
+    reports  = ENSEMBLVEP_VEP.out.report   // channel: [ path(html) ]
+    versions = ch_versions                 // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml
new file mode 100644
index 0000000..15d42da
--- /dev/null
+++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml
@@ -0,0 +1,65 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: vcf_annotate_ensemblvep
+description: Perform annotation with ensemblvep and bgzip + tabix index the resulting VCF file
+keywords:
+  - vcf
+  - annotation
+  - ensemblvep
+components:
+  - ensemblvep/vep
+  - tabix/tabix
+input:
+  - ch_vcf:
+      description: |
+        vcf file to annotate
+        Structure: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)] ]
+  - ch_fasta:
+      description: |
+        Reference genome fasta file (optional)
+        Structure: [ val(meta2), path(fasta) ]
+  - val_genome:
+      type: string
+      description: genome to use
+  - val_species:
+      type: string
+      description: species to use
+  - val_cache_version:
+      type: integer
+      description: cache version to use
+  - ch_cache:
+      description: |
+        the root cache folder for ensemblvep (optional)
+        Structure: [ val(meta3), path(cache) ]
+  - ch_extra_files:
+      description: |
+        any extra files needed by plugins for ensemblvep (optional)
+        Structure: [ path(file1), path(file2)... ]
+output:
+  - vcf_tbi:
+      description: |
+        Compressed vcf file + tabix index
+        Structure: [ val(meta), path(vcf), path(tbi) ]
+  - json:
+      description: |
+        json file
+        Structure: [ val(meta), path(json) ]
+  - tab:
+      description: |
+        tab file
+        Structure: [ val(meta), path(tab) ]
+  - reports:
+      type: file
+      description: html reports
+      pattern: "*.html"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@maxulysse"
+  - "@matthdsm"
+  - "@nvnieuwk"
+maintainers:
+  - "@maxulysse"
+  - "@matthdsm"
+  - "@nvnieuwk"