nextflow.config

/*
========================================================================================
    nf-core/radseq Nextflow config file
========================================================================================
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {

    // Input options
    input                          = null
    step                           = null
    
    // Workflow options
    method                         = null // e.g.'denovo' or 'reference'
    
    // Type of RADseq
    sequence_type                  = null // e.g. 'SE', 'PE', 'RPE', 'ROL', and 'OL'

    // Reference options
    genome                         = null
    faidx                          = null
    
    igenomes_base                  = 's3://ngi-igenomes/igenomes'
    igenomes_ignore                = false

    // Trimming options
    cut_right                        = true
    window_size                      = 25
    mean_min_quality                 = 20
    percent_unqualified              = 40
    pairedend_bp_corr                = true
    overlap_dif_limit                = 1
    denovo_clip_r1                   = null
    denovo_clip_r2                   = null
    trim_polyg                       = true
    dont_eval_duplicates             = true
    disable_quality_filtering        = false
    disable_length_filtering         = false
    fastp_umi_read_structure         = null // uses the reference based on lowest mismatch and highest paired reads
    
    // DENOVO
    // trimming options
    need_to_trim_fastq              = false
    minreaddepth_withinindividual   = null // defaults to 2,3
    minreaddepth_betweenindividual  = null // defaults to 2,3
    clip_umi_r1                     = null // remove UMI barcodes
    clip_umi_r2                     = 10
    only_use_best_reference         = true

    // cdhit options 
    cluster_algorithm              = 1 // slow but accurate algorithm (0 or 1)
    description_length             = 100
    sequence_simularity            = '.9' // may need to change depending on taxa
    
    // rainbow div options
    similarity_fraction            = 0.5
    max_variants                   = 10
    
    // rainbow merge options
    min_reads                      = 2
    max_clusters_for_merge         = 10000
    max_reads_for_assembly         = 10000
    min_overlap                    = 20
    min_similarity_fraction        = 0.75

    // General
    // Alignment options
    aligner                        = 'bwa-mem2'
    clipping_penalty               = '20,5'
    output_secondary               = true
    mark_short_as_sec              = true
    min_aln_quality                = 30
    matching_score                 = 1 
    mismatch_score                 = 3 
    gap_penalty                    = 5
    quality_score                  = 0
    off_diagonal                   = 95
    min_seed_length                = 35

    // UMI BARCODE FGBIO SUBWORKFLOW
    dedup_tool                     = 'umitools'
    
    //fgbio
    duplex                         = false // bool: depending on UMI structure
    extract_umi_from_readname      = null
    fgbio_umi_read_structure       = null
    umi_strategy                   = 'Identity'
    number_of_allowable_edits      = 0 // should be 0 when strategy is Identity
    grpumi_min_mapq                = 30
    cuc_min_reads                  = 1
    cuc_min_base_quality           = 5
    fc_minreads                    = 2
    fc_minbasequality              = 5
    fc_maxbaseerrorrate            = .3
    
    // fastqtobam 
    extract_umi_from_readname      = false

    // Filtering BAM channel based on mapping rate
    check_alignment_mapping_rates  = true
    min_percent_aligned            = .5

    // Interval options
    variant_calling_interval_strategy = 'faidx' // support bedtools or faidx
    splitNLines                    = 1
    subset_intervals_channel       = null // applies to bedtools option
    max_read_coverage_to_split     = '50'
    winsize                        = 1000000

    // Freebayes options
    popmap                         = null
    min_map_qual                   = 5
    min_base_qual                  = 5
    complex_gap                    = 3
    use_best_n_alleles             = 10
    min_alt_fraction               = 0.1
    min_repeat_entropy             = 1
    skip_coverage                  = null

    // VCF norm
    atomize_variants               = false
    split_multiallelic             = '-any'

    // VCF Filtering Options
    fraction_missingness_list      = [0.95]
    minor_allele_count_list        = [6]
    allele_balance                 = "-i'AB > 0.2 && AB < 0.8 || AB < 0.01 || AB > 0.99 && MIN(FORMAT/QR)>0 || MIN(FORMAT/QA)>0'"
    mapping_quality_ratio          = "-i'MQM / MQMR > 0.25 & MQM / MQMR < 1.75'"
    number_of_observations         = "-i'PAIRED > 0.05 & PAIREDR > 0.05 & PAIREDR / PAIRED < 1.75 & PAIREDR / PAIRED > 0.25 | PAIRED < 0.05 & PAIREDR < 0.05'"
    strand_prop_alleles            = "-i'SAF / SAR > 100 & SRF / SRR > 100 | SAR / SAF > 100 & SRR / SRF > 50'"
    fraction_genotypes_missing     = '.2'
    minor_allele_count             = '2'
    minimum_genotype_depth         = '3'
    maximum_genotype_depth         = null
    minimum_indv_fraction_missing  = '.3'

    // Intermediate files
    save_trimmed                   = true
    denovo_intermediate_files      = true
    save_reference_indices         = false
    save_bam_files                 = false
    save_bed_intervals             = true
    save_freebayes_intervals       = true

    // MultiQC options
    multiqc_config                 = null
    multiqc_title                  = null
    max_multiqc_email_size         = '25.MB'

    // Boilerplate options
    outdir                     = './results'
    tracedir                   = "${params.outdir}/pipeline_info"
    publish_dir_mode           = 'symlink'
    email                      = null
    email_on_fail              = null
    plaintext_email            = false
    monochrome_logs            = false
    help                       = false
    validate_params            = true
    show_hidden_params         = false
    schema_ignore_params       = 'aligner_options,genomes,method_options,publish_dir_mode,save_reference_indices,denovo_intermediate_file,method_options,publish_dir_mode,save_freebayes_intervals,save_intervals,save_reference_fai,save_trim_adapters_fastp,save_cdhit_clstr,save_seqtk_seq_fasta,save_uniq_full_fasta,save_uniqseq,save_trimmed,min_repeat_entropy,min_alt_fraction,use_best_n_alleles,complex_gap,min_base_qual,min_map_qual,max_read_coverage_to_split,subset_intervals_channel,quality_score,gap_penalty,mismatch_score,matching_score,min_aln_quality,mark_short_as_sec,output_secondary,clipping_penalty,aligner,min_similarity_fraction,min_overlap,max_reads_for_assembly,max_clusters_for_merge,min_reads,max_variants,similarity_fraction,sequence_simularity,description_length,cluster_algorithm,minreaddepth_betweenindividual,minreaddepth_withinindividual,umi_read_structure,dont_eval_duplicates,trim_polyg,clip_r2,clip_r1,overlap_dif_limit,pairedend_bp_corr,mean_min_quality,window_size,cut_right,sequence_type,method,popmap,skip_coverage,disable_length_filtering,disable_quality_filtering'
    method_options             = ' denovo, reference'
    aligner_options            = ' bwa, bwamem2'
    min_reads_after_fastp      = 10
    
    // Config options
    custom_config_version      = 'master'
    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
    config_profile_description = null
    config_profile_contact     = null
    config_profile_url         = null
    config_profile_name        = null

    // Max resource options
    // Defaults only, expecting to be overwritten
    max_memory                 = '128.GB'
    max_cpus                   = 16
    max_time                   = '240.h'

}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load nf-core custom profiles from different Institutions
try {
    includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
    System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

profiles {
    debug { process.beforeScript = 'echo $HOSTNAME' }
    conda {
        conda.enabled          = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    docker {
        docker.enabled         = true
        docker.registry        = 'quay.io'
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        conda.enabled          = false
    }
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        conda.enabled          = false
    }
    podman {
        podman.enabled         = true
        docker.enabled         = false
        singularity.enabled    = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        conda.enabled          = false
    }
    shifter {
        shifter.enabled        = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        charliecloud.enabled   = false
        conda.enabled          = false
    }
    charliecloud {
        charliecloud.enabled   = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        conda.enabled          = false
    }
    test       { includeConfig 'conf/test.config'       }
    test_umi   { includeConfig 'conf/test_umi.config'   }
    test_full  { includeConfig 'conf/test_full.config'  }
    test_demux { includeConfig 'conf/test_demux.config' }
}

// Load igenomes.config if required
if (!params.igenomes_ignore) {
    includeConfig 'conf/igenomes.config'
} else {
    params.genomes = [:]
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. 
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
}

manifest {
    name            = 'nf-core/radseq'
    author          = 'Gabriel Barrett'
    homePage        = 'https://github.com/nf-core/radseq'
    description     = 'variant calling pipeline for radseq'
    mainScript      = 'main.nf'
    nextflowVersion = '!>=21.10.3'
    version         = '1.0dev'
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}