diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..4ecfbfe3 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index cf88ed60..8719a7f9 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,13 +8,11 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,cff}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 -[*.nf.test] -insert_final_newline = false - -[{LICENSE,LEGAL.txt}] +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] charset = unset end_of_line = unset insert_final_newline = unset @@ -22,5 +20,17 @@ trim_trailing_whitespace = unset indent_style = unset indent_size = unset +[/assets/email*] +indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py}] +indent_style = unset + +# ignore perl [*.{pl,pm}] indent_size = unset diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..7a2dabc2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..edd71437 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,114 @@ +# plant-food-research-open/assemblyqc: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving plant-food-research-open/assemblyqc. + +We try to manage the required tasks for plant-food-research-open/assemblyqc using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +## Contribution workflow + +If you'd like to write some code for plant-food-research-open/assemblyqc, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [plant-food-research-open/assemblyqc issues](https://github.com/plant-food-research-open/assemblyqc/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [plant-food-research-open/assemblyqc repository](https://github.com/plant-food-research-open/assemblyqc) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Pipeline contribution conventions + +To make the plant-food-research-open/assemblyqc code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/plant-food-research-open/assemblyqc/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..af436aa6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,54 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used + to launch the pipeline and the output from your terminal. + render: console + placeholder: "$ nextflow run ... + + + Some output where something broke + + " + - type: textarea + id: files + attributes: + label: Relevant files + description: "Please drag and drop the relevant files here. Create a `.zip` archive + if the extension is not allowed. + + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file + in the directory where you launched the pipeline)_ as well as custom Nextflow + configuration files. + + " + - type: textarea + id: system + attributes: + label: System information + description: "* Nextflow version _(eg. 23.04.0)_ + + * Hardware _(eg. HPC, Desktop, Cloud)_ + + * Executor _(eg. slurm, local, awsbatch)_ + + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, + or Apptainer)_ + + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + + * Version of plant-food-research-open/assemblyqc _(eg. 1.1, 1.5, 1.8.2)_ + + " diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..6287e290 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the plant-food-research-open/assemblyqc pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..50053a1f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,25 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/plant-food-research-open/assemblyqc/tree/master/.github/CONTRIBUTING.md) +- [ ] Make sure your code lints (`nf-core lint`) and (`pre-commit run --all`) +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 00000000..752ea1bf --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'plant-food-research-open/assemblyqc' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == plant-food-research-open/assemblyqc ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..4ff4e1f8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,43 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'plant-food-research-open/assemblyqc') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + RUN_CONFIG: + - "-profile test,docker" + - "-profile docker -c ./tests/stub/stub.config -stub" + - "-profile docker -params-file ./tests/invalid/params.json" + steps: + - name: Check out pipeline code + uses: actions/checkout@v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: ${{ matrix.NXF_VER }} + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} --outdir ./results ${{ matrix.RUN_CONFIG }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..e37cfda5 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..69574376 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,67 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ["self-hosted"] + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${GITHUB_REF#refs/heads/}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..41963bec --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,48 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'plant-food-research-open/assemblyqc' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + cache: "pip" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files || echo "status=fail" >> $GITHUB_ENV + + - name: Commit & push changes + if: env.status == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with pre-commit" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..f807f5e2 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,69 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + cache: "pip" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + # nf-core: + # runs-on: ubuntu-latest + # steps: + # - name: Check out pipeline code + # uses: actions/checkout@v4 + + # - name: Install Nextflow + # uses: nf-core/setup-nextflow@v1 + + # - uses: actions/setup-python@v5 + # with: + # python-version: "3.11" + # architecture: "x64" + + # - name: Install dependencies + # run: | + # python -m pip install --upgrade pip + # pip install nf-core + + # - name: Run nf-core lint + # env: + # GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + # run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + # - name: Save PR number + # if: ${{ always() }} + # run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + # - name: Upload linting log file artifact + # if: ${{ always() }} + # uses: actions/upload-artifact@v4 + # with: + # name: linting-logs + # path: | + # lint_log.txt + # lint_results.md + # PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..147bcd10 --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v3 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.gitignore b/.gitignore index 8698da13..00c62de8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,12 @@ -.DS_Store -*.pyc -__pycahce__ .nextflow* work/ +data/ results/ +.DS_Store +testing/ +testing* +*.pyc + +# PFR files *.stdout *.stderr diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..363d5b1d --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,22 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..61d19b4d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,24 @@ +lint: + files_exist: + - CODE_OF_CONDUCT.md + - assets/nf-core-assemblyqc_logo_light.png + - docs/images/nf-core-assemblyqc_logo_light.png + - docs/images/nf-core-assemblyqc_logo_dark.png + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml + files_unchanged: + - CODE_OF_CONDUCT.md + - assets/nf-core-assemblyqc_logo_light.png + - docs/images/nf-core-assemblyqc_logo_light.png + - docs/images/nf-core-assemblyqc_logo_dark.png + - .github/ISSUE_TEMPLATE/bug_report.yml + multiqc_config: + - report_comment + nextflow_config: + - manifest.name + - manifest.homePage repository_type: pipeline +template: + prefix: plant-food-research-open + skip: [] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82855944..bc85d767 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,18 @@ repos: rev: "v3.1.0" hooks: - id: prettier - - repo: https://github.com/psf/black - rev: 22.10.0 - hooks: - - id: black - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: 2.7.3 + rev: "2.7.3" hooks: - id: editorconfig-checker alias: ec + - repo: local + hooks: + - id: version_checks + name: Version checks + language: system + entry: > + ./version_check.sh + always_run: true + fail_fast: true + pass_filenames: false diff --git a/.prettierignore b/.prettierignore index e5fc455d..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,9 +1,12 @@ -# gitignore -.DS_Store -*.pyc -__pycahce__ +email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ +data/ results/ -*.stdout -*.stderr +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index ded6ca14..dc2e51c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,46 @@ -# Change Log +# plant-food-research-open/assemblyqc: Changelog -## Version 1.3 (08-Feb-2023) +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 1.4 - [04-Mar-2024] + +### `Added` + +1. Now it is possible to skip FASTP and FASTQC for the HIC module +2. Renamed ASSEMBLY_QC workflow to ASSEMBLYQC +3. Now using nf-core/FASTA_EXPLORE_SEARCH_PLOT_TIDK +4. Now redirecting validation errors to AssemblyQC report +5. Simplified layout of CITATIONS.md file +6. Now using pfr/gff3_validate sub-workflow for gff3 validation +7. Now listing software versions from the versions.yml file +8. Replaced custom GUNZIP module with nf-core/gunzip +9. Replaced custom gt/stat with pfr/gt/stat +10. Replaced custom fasta_validator with nf-core/fastavalidator +11. Added pre-commit version checking +12. Now gt/stat reports extended stats and multiple distribution plots have been added to the report +13. Added a tools tab to the report which lists the tools used by the pipeline to create the report +14. Refactored and cleaned data flows for all the custom sub-workflow +15. Started using nf-core template +16. Started using semantic versioning +17. Moved all python depending packages to 'docker.io/gallvp/python3npkgs:v0.6' + +### `Fixed` + +1. All modules are now emitting versioning information +2. Fixed a bug which caused LAI to run with null assembly fasta +3. Fixed FASTA_LTRRETRIEVER_LAI sub-workflow so that it respects `monoploid_ids` parameter. + +### `Dependencies` + +1. NextFlow!>=23.04.0 +2. nf-validation@1.1.3 + +### `Deprecated` + +1. Removed BIOCODE GFF3 STATS owing to its frequent failures + +## v1.3 [08-Feb-2023] 1. Docker engine is now also supported 2. Added Amazon Genomics CLI project file and a minimal test params file: [./docs/test_params/test_agc.json](./docs/test_params/test_agc.json) @@ -14,7 +54,7 @@ 10. BWA_INDEX_AND_MEM can now run for two days 11. Now using FASTQ_BWA_MEM_SAMBLASTER subworkflow to optimize SAM file transfer on AWS -## Version 1.2 (18-Dec-2023) +## v1.2 [18-Dec-2023] 1. Switched to apptainer from singularity 2. Now requiring Nextflow/23.04.4 @@ -35,21 +75,21 @@ For a ~600 MB assembly, EDTA (without sensitive flag) takes ~25 hours of compute | red5/v2.1 | 18.75 | 16.59 | | tair/v10 | 18.06 | 17.42 | -## Version 1.1 (09-Nov-2023) +## v1.1 [09-Nov-2023] 1. Now running kraken2 with a single cpu. 2. Now pulling containers from https://depot.galaxyproject.org/singularity/ -## Version 1.0.1 (07-Sep-2023) +## v1.0.1 [07-Sep-2023] 1. Now pipeline timeline, report, and trace are enabled by default. 2. Included `procps` package where needed to allow NextFlow to collect system statistics. -## Version 1 (25-Jul-2023) +## v1 [25-Jul-2023] -Same as Version 1 RC6c +Same as v1rc6c -## Version 1 (RC6c; 20-Jul-2023) +## v1rc6c [20-Jul-2023] 1. Added logic for the `-mono` parameter in LAI. This parameter allows correct LAI calculation for polyploid assemblies. 2. Fixed the typo in `assemblathon_stats` in nextflow.config. @@ -62,26 +102,26 @@ Same as Version 1 RC6c 9. (RC6c) Krona plot for Kraken2 now uses sequence length for abundance calculation. 10. Made ASSEMBLATHON_STATS robust to missing paths declared in the PATH variable. -## Version 1 (RC5; 22-Jun-2023) +## v1rc5 [22-Jun-2023] 1. Updated README in accordance with SPO Editor. 2. Added a note on LTR sequence identity in the nextflow.config. 3. Split MATLOCK_BAM2_JUICER module into MATLOCK_BAM2_JUICER and JUICER_SORT and using `--parallel` with `sort`. -## Version 1 (RC4; 15-Jun-2023) +## v1rc4 [15-Jun-2023] 1. Fixed a bug in the BIOCODE GFF3 STATS module which resulted in a cramped up plot of CDS vs mRNA counts. -## Version 1 (RC3; 14-Jun-2023) +## v1rc3 [14-Jun-2023] 1. Fixed a bug in the BIOCODE GFF3 STATS module which prevented it from processing valid gff3 files. -## Version 1 (RC2; 13-Jun-2023) +## v1rc2 [13-Jun-2023] 1. Added labels to the pipeline flowchart. 2. Update the README based on team feedback. -## Version 1 (RC1; 12-Jun-2023) +## v1rc1 [12-Jun-2023] 1. Added validation for fasta and gff3 files. 2. Added support for compressed files (fasta.gz, gff3.gz). @@ -98,22 +138,22 @@ Same as Version 1 RC6c 13. Added test configuration for a Transcriptome of a Nematode. 14. Now allowed up to 7 days for SYNTENY::DNADIFF based on recent evidence from two ~2.5 GB genomes. -## Version 0.10.9 (01-Jun-2023) +## v0.10.9 [01-Jun-2023] 1. CRITICAL: Fixed a bug in LAI::EDTA which prevented it from renaming fasta ids in case they were longer than 13 characters. -## Version 0.10.8 (30-May-2023) +## v0.10.8 [30-May-2023] 1. Now NCBI FCS Adaptor and NCBI FCS GX both run in parallel so that both contamination checks are part of the final report even if there is adaptor contamination. -## Version 0.10.7 (29-May-2023) +## v0.10.7 [29-May-2023] 1. CRITICAL: Fixed a bug in LAI::EDTA which prevented it from renaming fasta ids in case they were longer than 13 characters. 2. Now the HiC module does not require the storage_server parameter and the HiC contact map does not disappear when the report is moved across folders. 3. Further developed the tutorials section. 4. Improved presentation of tables for BUSCO and LAI in the report. -## Version 0.10.6 (25-May-2023) +## v0.10.6 [25-May-2023] 1. CRITICAL: Fixed a bug in LAI::EDTA which prevented it from renaming fasta ids in case they were longer than 13 characters. 2. CRITICAL: Fixed a bug in LAI::EDTA which prevented it from accessing the tmp directory. @@ -125,7 +165,7 @@ Same as Version 1 RC6c 8. Now only saving the renamed.ids.tsv instead of the whole fasta file from EDTA. 9. Now also saving the EDTA.intact.gff3 file as EDTA sometimes does not store all the annotations in the EDTA.TEanno.gff3 file. -## Version 0.10.5 (19-May-2023) +## v0.10.5 [19-May-2023] 1. CRITICAL: Fixed a bug in RUN_ASSEMBLY_VISUALIZER, HIC_QC introduced by the specification of the temporary directory in version 0.10.4. 2. MATLOCK_BAM2_JUICER now has two hours time limit. @@ -133,7 +173,7 @@ Same as Version 1 RC6c 4. Started adding detailed tutorials. 5. Now TIDK supports a filter by size parameter to filter out small contigs from it output. By default this filter is turned off. -## Version 0.10.4 (16-May-2023) +## v0.10.4 [16-May-2023] 1. Moved the main workflow into `workflows/assembly_qc.nf` so that it can be imported by other NextFlow pipelines. 2. Fixed a bug in synteny due to which the pipeline did not resume properly sometimes. @@ -143,7 +183,7 @@ Same as Version 1 RC6c 6. CRITICAL: Now explicitly setting the temporary directory to avoid "No space left" errors. This problem may have affected container build and NCBI FCS Adaptor/GX modules in the past. 7. Now reporting max_gap and min_bundle size in the report for improved readability. -## Version 0.10.3 (08-May-2023) +## v0.10.3 [08-May-2023] 1. Improved annotation of the config file. 2. Now using natural sort in the synteny color generator so that chr10's color is assigned after chr9's color. @@ -153,13 +193,13 @@ Same as Version 1 RC6c 6. Added GPLv3 license. 7. Now assembly tags in the dropdown menus of the report are in natural sort order. -## Version 0.10.2 (04-May-2023) +## v0.10.2 [04-May-2023] 1. Allowed 2 hours for DNADIFF and CIRCOS_BUNDLE_LINKS modules. 2. Contigs are now ordered by number on the synteny plot. 3. Added `color_by_contig` option to the synteny module along with a maximum contrast color generator. -## Version 0.10.1 (28-April-2023) +## v0.10.1 [28-April-2023] 1. Fixed a bug in the TIDK module which resulted in genome fasta file emptying in some cases. 2. Added a contributors section to README.md @@ -173,7 +213,7 @@ Same as Version 1 RC6c 10. Added the `plot_1_vs_all` option in the synteny module. 11. Added `max_gap` and `min_bundle_size` options to the synteny module. -## Version 0.10 (20-April-2023) +## v0.10 [20-April-2023] 1. Added Synteny Analysis. 2. Added "-q" and "-qq" option to LAI. "-qq" is the default. @@ -186,27 +226,27 @@ Same as Version 1 RC6c 9. Allowed 8 hours for BWA MEM. 10. Fixed a bug in LAI where the output was not parsed correctly due to file name mismatch. -## Version 0.9 (31-Mar-2023) +## v0.9 [31-Mar-2023] 1. Added NCBI FCS GX module. 2. Added additional annotation to config file. 3. Removed unnecessary species argument in BUSCO module. 4. Moved NCBI FCS Adaptor/GX scripts to user home directory for sharing across pipeline downloads to different directories. -## Version 0.8 (29-Mar-2023) +## v0.8 [29-Mar-2023] 1. Now using system-wide DBs for BUSCO and KRAKEN2. 2. Added HiC Contact Map module. 3. Further simplified and annotated the config file. -## Version 0.7.2 (24-Mar-2023) +## v0.7.2 [24-Mar-2023] 1. Fixed a potential bug in ncbi fcs adaptor. 2. Fixed rm -f bug in KRAKEN2. 3. Added additional info for LAI 4. Fixed a few typos in the config file. -## Version 0.7.1 (23-Mar-2023) +## v0.7.1 [23-Mar-2023] 1. Fixed a bug in the slurm job submission script. 2. Fixed a bug in the ASSEMBLATHON_STATS module. @@ -214,7 +254,7 @@ Same as Version 1 RC6c 4. Now using uniform naming in the TIDK sub-workflow. 5. Max time for LAI now set to 2 hours. -## Version 0.7 (17-Mar-2023) +## v0.7 [17-Mar-2023] 1. Added Kraken2 and NCBI FCS Adaptor tools. 2. Added Assemblathon stats. @@ -225,13 +265,13 @@ Same as Version 1 RC6c 7. Fixed css styling browser conflicts 8. TIDK process now uses a container instead of conda. -## Version 0.6.1 (8-Mar-2023) +## v0.6.1 [8-Mar-2023] 1. Included results_dict and dependencies dict (without html formatting) to json. 2. Removed completed items in readme. 3. Fixed json dump repeating image url. -## Version 0.6 (17-Feb-2023) +## v0.6 [17-Feb-2023] 1. Added LAI. 2. Now sorting sequences by size before feeding to TIDK. @@ -239,15 +279,15 @@ Same as Version 1 RC6c 4. Added configuration annotations. 5. Optimised resource allocation. -## Version 0.5.1 +## v0.5.1 1. Changed report parsers to allow alphanumeric ([a-zA-Z0-9_]) characters in the haplotype names. -## Version 0.5 +## v0.5 1. Added TIDK -## Version 0.4 +## v0.4 1. Added ability run BUSCO for multiple augustus species simultaneously 2. Formatted tabs into a drop down list for ease of navigation @@ -255,14 +295,14 @@ Same as Version 1 RC6c 4. BUSCO plots are now rendered on the summary page 5. Styling has been changed for better user experience -## Version 0.3 +## v0.3 1. Added ability to run BUSCO for multiple haplotypes simultaneously 2. Updated README for new functionality 3. Adjusted styling for easier comparisons between reports 4. Incorporated conda instead of python venv -## Version 0.2 +## v0.2 1. Added ability to run BUSCO for multiple lineages simultaneously 2. Removed intermediary outputDir diff --git a/CITATION.cff b/CITATION.cff index 97253e43..efb1b209 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -21,7 +21,7 @@ authors: - family-names: "Deng" given-names: "Cecilia" title: "AssemblyQC: A NextFlow pipeline for evaluating assembly quality" -version: 1.3 +version: 1.4 date-released: 2024-02-12 url: "https://github.com/Plant-Food-Research-Open/assembly_qc" doi: 10.5281/zenodo.10647870 diff --git a/CITATIONS.md b/CITATIONS.md index 00c1e7ad..63cbdd2b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,107 +1,135 @@ -## Citations +# plant-food-research-open/assemblyqc: Citations -- nf-core/modules([MIT](https://github.com/nf-core/modules/blob/master/LICENSE)) +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) - > Ewels PA, Peltzer A, Fillinger S et al. 2020. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol 38, 276–278 (2020). doi: +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. -- FASTA_VALIDATE ([MIT](https://github.com/GallVp/fasta_validator/blob/master/LICENSE)) - > - > - > Forked from: - > - > Edwards RA. 2019. fasta_validate: a fast and efficient fasta validator written in pure C. doi: -- GT_GFF3VALIDATOR ([ISC](http://genometools.org/license.html)) +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) - > Gremme G, Steinbiss S, Kurtz S. 2013. "GenomeTools: A Comprehensive Software Library for Efficient Processing of Structured Genome Annotations," in IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 10, no. 3, pp. 645-656, May 2013, doi: . +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - GT_GFF3VALIDATOR workflow also employs: +## Pipeline tools - - SAMTOOLS (1.16.1, [MIT/Expat](https://github.com/samtools/samtools/blob/develop/LICENSE)) - > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. 2021. Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, February 2021, giab008, +- py_fasta_validator, [MIT](https://github.com/linsalrob/py_fasta_validator/blob/master/LICENSE) -- NCBI-FCS-ADAPTOR (0.4, [License](https://github.com/ncbi/fcs/blob/main/LICENSE.txt)) - > -- NCBI-FCS-GX (0.4, [License](https://github.com/ncbi/fcs/blob/main/LICENSE.txt)) + > Edwards, R.A. 2019. fasta_validate: a fast and efficient fasta validator written in pure C. doi: - > - > - > Astashyn A, Tvedte ES, Sweeney D, Sapojnikov V, Bouk N, Joukov V, Mozes E, Strope PK, Sylla PM, Wagner L, Bidwell SL, Clark K, Davis EW, Smith-White B, Hlavina W, Pruitt KD, Schneider VA, Murphy TD. 2023. bioRxiv 2023.06.02.543519; doi: +- GenomeTools, [ISC](http://genometools.org/license.html) + + > Gremme G, Steinbiss S, Kurtz S. 2013. "GenomeTools: A Comprehensive Software Library for Efficient Processing of Structured Genome Annotations," in IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 10, no. 3, pp. 645-656, May 2013, doi: + +- SAMTOOLS, [MIT/Expat](https://github.com/samtools/samtools/blob/develop/LICENSE) + + > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. 2021. Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, February 2021, giab008, - NCBI-FCS-GX workflow also employs: +- NCBI/FCS, [License](https://github.com/ncbi/fcs/blob/main/LICENSE.txt) - - KRONA (2.7.1, [License](https://github.com/marbl/Krona/blob/master/KronaTools/LICENSE.txt)) - > Ondov BD, Bergman NH, Phillippy AM. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics. 2011 Sep 30;12:385. doi: + > Astashyn A, Tvedte ES, Sweeney D, Sapojnikov V, Bouk N, Joukov V, Mozes E, Strope PK, Sylla PM, Wagner L, Bidwell SL, Clark K, Davis EW, Smith-White B, Hlavina W, Pruitt KD, Schneider VA, Murphy TD. 2023. Rapid and sensitive detection of genome contamination at scale with FCS-GX. bioRxiv 2023.06.02.543519; doi: + +- KRONA, [License](https://github.com/marbl/Krona/blob/master/KronaTools/LICENSE.txt) + + > Ondov BD, Bergman NH, Phillippy AM. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics. 2011 Sep 30;12:385. doi: + +- assemblathon_stats, [CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) -- ASSEMBLATHON_STATS ([CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/)) > [github/PlantandFoodResearch/assemblathon2-analysis/a93cba2](https://github.com/PlantandFoodResearch/assemblathon2-analysis/blob/a93cba25d847434f7eadc04e63b58c567c46a56d/assemblathon_stats.pl) > > Forked from: -- GENOMETOOLS_GT_STAT (1.6.2, [ISC](http://genometools.org/license.html)): - > Gremme G, Steinbiss S, Kurtz S. 2013. "GenomeTools: A Comprehensive Software Library for Efficient Processing of Structured Genome Annotations," in IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 10, no. 3, pp. 645-656, May 2013, doi: . -- BIOCODE_GFF3_STATS (0.10.0, [MIT](https://github.com/jorvis/biocode/blob/master/LICENSE)) - > -- BUSCO (5.2.2, [MIT](https://gitlab.com/ezlab/busco/-/blob/master/LICENSE)) + +- BUSCO, [MIT](https://gitlab.com/ezlab/busco/-/blob/master/LICENSE) + > Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM. 2021. BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes, Molecular Biology and Evolution, Volume 38, Issue 10, October 2021, Pages 4647–4654, -- TIDK (0.2.31, [MIT](https://github.com/tolkit/telomeric-identifier/blob/main/LICENSE)) + +- TIDK, [MIT](https://github.com/tolkit/telomeric-identifier/blob/main/LICENSE) > - TIDK workflow also employs: +- SEQKIT, [MIT](https://github.com/shenwei356/seqkit/blob/master/LICENSE) - - SEQKIT (2.3.1, [MIT](https://github.com/shenwei356/seqkit/blob/master/LICENSE)) - > Shen W, Le S, Li Y, Hu F. 2016. SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation. PLoS ONE 11(10): e0163962. + > Shen W, Le S, Li Y, Hu F. 2016. SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation. PLoS ONE 11(10): e0163962. -- LAI (beta3.2, [GPL v3](https://github.com/oushujun/LTR_retriever/blob/master/LICENSE)) +- LAI, [GPL v3](https://github.com/oushujun/LTR_retriever/blob/master/LICENSE) > Ou S, Chen J, Jiang N. 2018. Assessing genome assembly quality using the LTR Assembly Index (LAI), Nucleic Acids Research, Volume 46, Issue 21, 30 November 2018, Page e126, - LAI workflow also employs: +- LTR_FINDER_parallel, [MIT](https://github.com/oushujun/LTR_FINDER_parallel/blob/master/LICENSE) - - LTR_FINDER_parallel (1.2, [MIT](https://github.com/oushujun/LTR_FINDER_parallel/blob/master/LICENSE)) - > Ou S, Jiang N 2019. LTR_FINDER_parallel: parallelization of LTR_FINDER enabling rapid identification of long terminal repeat retrotransposons. Mobile DNA 10, 48 (2019). - - GT_LTRHARVEST (1.6.2, [ISC](http://genometools.org/license.html)) + > Ou S, Jiang N 2019. LTR_FINDER_parallel: parallelization of LTR_FINDER enabling rapid identification of long terminal repeat retrotransposons. Mobile DNA 10, 48 (2019). - > Gremme G, Steinbiss S, Kurtz S. 2013. "GenomeTools: A Comprehensive Software Library for Efficient Processing of Structured Genome Annotations," in IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 10, no. 3, pp. 645-656, May 2013, doi: . +- LTRharvest, [ISC](http://genometools.org/license.html) - > Ellinghaus, D, Kurtz, S & Willhoeft, U 2008. LTRharvest, an efficient and flexible software for de novo detection of LTR retrotransposons. BMC Bioinformatics 9, 18 (2008). + > Ellinghaus, D, Kurtz, S & Willhoeft, U 2008. LTRharvest, an efficient and flexible software for de novo detection of LTR retrotransposons. BMC Bioinformatics 9, 18 (2008). - - LTR_retriever (2.9.0 [GPL v3](https://github.com/oushujun/LTR_retriever/blob/master/LICENSE)) - > Shujun O, Ning J 2018. LTR_retriever: A Highly Accurate and Sensitive Program for Identification of Long Terminal Repeat Retrotransposons, Plant Physiology, 176, 2 (2018). +- LTR_retriever, [GPL v3](https://github.com/oushujun/LTR_retriever/blob/master/LICENSE) -- KRAKEN2 (2.1.2, [MIT](https://github.com/DerrickWood/kraken2/blob/master/LICENSE)) + > Shujun O, Ning J 2018. LTR_retriever: A Highly Accurate and Sensitive Program for Identification of Long Terminal Repeat Retrotransposons, Plant Physiology, 176, 2 (2018). + +- KRAKEN2, [MIT](https://github.com/DerrickWood/kraken2/blob/master/LICENSE) > Wood DE, Salzberg SL, Wood DE, Lu J, Langmead B. 2019. Improved metagenomic analysis with Kraken 2. Genome Biol 20, 257 (2019). - KRAKEN2 workflow also employs: - - - KRONA (2.7.1, [License](https://github.com/marbl/Krona/blob/master/KronaTools/LICENSE.txt)) - > Ondov BD, Bergman NH, Phillippy AM. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics. 2011 Sep 30;12:385. doi: - -- HIC CONTACT MAP - - JUICEBOX.JS (2.4.3, [MIT](https://github.com/igvteam/juicebox.js/blob/master/LICENSE)) - > Robinson JT, Turner D, Durand NC, Thorvaldsdóttir H, Mesirov JP, Aiden EL. 2018. Juicebox.js Provides a Cloud-Based Visualization System for Hi-C Data. Cell Syst. 2018 Feb 28;6(2):256-258.e1. doi: . Epub 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755. - - FASTP (0.23.2, [MIT](https://github.com/OpenGene/fastp/blob/master/LICENSE)) - > Chen S, Zhou Y, Chen Y, Gu J. 2018. fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, - - FASTQC (0.11.9, [GPL v3](https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt)) - > - - RUN_ASSEMBLY_VISUALIZER (commit: 63029aa, [MIT](https://github.com/aidenlab/3d-dna/blob/master/LICENSE)) - > Dudchenko O, Batra SS, Omer AD, Nyquist SK, Hoeger M, Durand NC, Shamim MS, Machol I, Lander, Aiden AP, Aiden EL 2017. De novo assembly of the Aedes aegypti genome using Hi-C yields chromosome-length scaffolds.Science356, 92-95(2017). doi: . Available at: - - HIC_QC (commit: 6881c33, [AGPL v3](https://github.com/phasegenomics/hic_qc/blob/master/LICENSE)) - > - - JUICEBOX_SCRIPTS (commit: a7ae991, [AGPL v3](https://github.com/phasegenomics/juicebox_scripts/blob/master/LICENSE)) - > - - BWA (0.7.17, [GPL v3](https://github.com/lh3/bwa/blob/master/COPYING)) - > Li H. 2013. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. - - MATLOCK ([AGPL v3](https://github.com/phasegenomics/matlock/blob/master/LICENSE)) - > ; - - SAMBLASTER (0.1.26, [MIT](https://github.com/GregoryFaust/samblaster/blob/master/LICENSE.txt)) - > Faust GG, Hall IM. 2014. SAMBLASTER: fast duplicate marking and structural variant read extraction, Bioinformatics, Volume 30, Issue 17, September 2014, Pages 2503–2505, - - SAMTOOLS (1.18, [MIT/Expat](https://github.com/samtools/samtools/blob/develop/LICENSE)) - > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. 2021. Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, February 2021, giab008, -- SYNTENY - - CIRCOS (0.23-1, [GPL v3](https://www.gnu.org/licenses/gpl-3.0.txt)) - > Krzywinski M, Schein J, Birol I, Connors J, Gascoyne R. Horsman D, ... Marra MA. 2009. Circos: an information aesthetic for comparative genomics. Genome research, 19(9), 1639-1645. - - MUMMER (4.0.0, [Artistic 2.0](https://github.com/mummer4/mummer/blob/master/LICENSE.md)) - > Marçais G, Delcher AL, Phillippy AM, Coston R, Salzberg SL, Zimin A. 2018. MUMmer4: A fast and versatile genome alignment system. PLoS Comput Biol. 2018 Jan 26;14(1):e1005944. doi: . PMID: 29373581; PMCID: PMC5802927. - - SAMTOOLS (1.16.1, [MIT/Expat](https://github.com/samtools/samtools/blob/develop/LICENSE)) - > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, M Davies RM, Li H. 2021. Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, February 2021, giab008, +- JUICEBOX.JS, [MIT](https://github.com/igvteam/juicebox.js/blob/master/LICENSE) + + > Robinson JT, Turner D, Durand NC, Thorvaldsdóttir H, Mesirov JP, Aiden EL. 2018. Juicebox.js Provides a Cloud-Based Visualization System for Hi-C Data. Cell Syst. 2018 Feb 28;6(2):256-258.e1. doi: . Epub 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755. + +- FASTP, [MIT](https://github.com/OpenGene/fastp/blob/master/LICENSE) + + > Chen S, Zhou Y, Chen Y, Gu J. 2018. fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, + +- FASTQC, [GPL v3](https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt) + + > + +- run-assembly-visualizer.sh, [MIT](https://github.com/aidenlab/3d-dna/blob/master/LICENSE) + + > Dudchenko O, Batra SS, Omer AD, Nyquist SK, Hoeger M, Durand NC, Shamim MS, Machol I, Lander, Aiden AP, Aiden EL 2017. De novo assembly of the Aedes aegypti genome using Hi-C yields chromosome-length scaffolds.Science356, 92-95(2017). doi: . Available at: + +- HIC_QC, [AGPL v3](https://github.com/phasegenomics/hic_qc/blob/master/LICENSE) + + > + +- JUICEBOX_SCRIPTS, [AGPL v3](https://github.com/phasegenomics/juicebox_scripts/blob/master/LICENSE) + + > + +- BWA, [GPL v3](https://github.com/lh3/bwa/blob/master/COPYING) + + > Li H. 2013. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. + +- MATLOCK, [AGPL v3](https://github.com/phasegenomics/matlock/blob/master/LICENSE) + + > ; + +- SAMBLASTER, [MIT](https://github.com/GregoryFaust/samblaster/blob/master/LICENSE.txt) + + > Faust GG, Hall IM. 2014. SAMBLASTER: fast duplicate marking and structural variant read extraction, Bioinformatics, Volume 30, Issue 17, September 2014, Pages 2503–2505, + +- CIRCOS, [GPL v3](https://www.gnu.org/licenses/gpl-3.0.txt) + + > Krzywinski M, Schein J, Birol I, Connors J, Gascoyne R. Horsman D, ... Marra MA. 2009. Circos: an information aesthetic for comparative genomics. Genome research, 19(9), 1639-1645. + +- MUMMER, [Artistic 2.0](https://github.com/mummer4/mummer/blob/master/LICENSE.md) + + > Marçais G, Delcher AL, Phillippy AM, Coston R, Salzberg SL, Zimin A. 2018. MUMmer4: A fast and versatile genome alignment system. PLoS Comput Biol. 2018 Jan 26;14(1):e1005944. doi: . PMID: 29373581; PMCID: PMC5802927. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/LEGAL.txt b/LEGAL.txt deleted file mode 100644 index 9e813c16..00000000 --- a/LEGAL.txt +++ /dev/null @@ -1,6083 +0,0 @@ -fasta_validator - -MIT License - -Copyright (c) 2019 Rob Edwards - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -GenomeTools - -/* - Copyright (c) 2003-2016 G. Gremme, S. Steinbiss, S. Kurtz, and CONTRIBUTORS - Copyright (c) 2003-2016 Center for Bioinformatics, University of Hamburg - - Permission to use, copy, modify, and distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -=============================================================================== - -SAMTOOLS - -The MIT/Expat License - -Copyright (C) 2008-2023 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - - -[The use of a range of years within a copyright notice in this distribution -should be interpreted as being equivalent to a list of years including the -first and last year specified and all consecutive years between them. - -For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, -2011-2012" should be interpreted as being identical to a notice that reads -"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice -that reads "Copyright (C) 2005-2012" should be interpreted as being identical -to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, -2011, 2012".] - -=============================================================================== - -NCBI FCS ADAPTOR -NCBI FCS GX - -LICENSING TERMS - -NCBI FCS tool suite - -This software authored by NCBI is a "United States -Government Work" under the terms of the United States Copyright -Act. It was written as part of the authors' official duties as United -States Government employees and thus cannot be copyrighted. This -software is freely available to the public for use. The National -Library of Medicine and the U.S. Government have not placed any -restriction on its use or reproduction. - -Although all reasonable efforts have been taken to ensure the accuracy -and reliability of the software and data, the NLM and the -U.S. Government do not and cannot warrant the performance or results -that may be obtained by using this software or data. The NLM and the -U.S. Government disclaim all warranties, express or implied, including -warranties of performance, merchantability or fitness for any -particular purpose. - -Please cite NCBI in any work or product based on this material. - -=============================================================================== - -KRONA - -PURPOSE - -Krona is a flexible tool for exploring the relative proportions of -hierarchical data, such as metagenomic classifications, using a -radial, space-filling display. It is implemented using HTML5 and -JavaScript, allowing charts to be explored locally or served over the -Internet, requiring only a current version of any major web -browser. Krona charts can be created using an Excel template or from -common bioinformatic formats using the provided conversion scripts. - - -COPYRIGHT LICENSE - -Copyright © 2011, Battelle National Biodefense Institute (BNBI); -all rights reserved. Authored by: Brian Ondov, Nicholas Bergman, and -Adam Phillippy - -This Software was prepared for the Department of Homeland Security -(DHS) by the Battelle National Biodefense Institute, LLC (BNBI) as -part of contract HSHQDC-07-C-00020 to manage and operate the National -Biodefense Analysis and Countermeasures Center (NBACC), a Federally -Funded Research and Development Center. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of the Battelle National Biodefense Institute nor - the names of its contributors may be used to endorse or promote - products derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -TRADEMARK LICENSE - -KRONA™ is a trademark of the Department of Homeland Security, and use -of the trademark is subject to the following conditions: - -* Distribution of the unchanged, official code/software using the - KRONA™ mark is hereby permitted by the Department of Homeland - Security, provided that the software is distributed without charge - and modification. - -* Distribution of altered source code/software using the KRONA™ mark - is not permitted unless written permission has been granted by the - Department of Homeland Security. - - -PATENTS - -The Krona software is not eligible for patent because there have been -previous and open implementations of this visualization method since -the year 2000 (e.g. "Focus+context display and navigation techniques -for enhancing radial, space-filling hierarchy visualizations." Stasko, -J. and Zhang, E. InfoVis, 2000). Krona represents a reimplementation and -application of an existing method to the new domain of -metagenomics. BNBI, as the copyright holders, wish to release Krona -freely and openly as a service to the bioinformatics community. - - -POINT OF CONTACT - -Todd Harrington -General Counsel -Battelle National Biodefense Institute -harringtont@nbacc.net - -=============================================================================== - -ASSEMBLATHON STATS - -Author: Keith Bradnam, Genome Center, UC Davis -This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. -This software is provided AS IS, without warranty of any kind. - -License - -THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. - -BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. - -1. Definitions - -"Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. -"Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(g) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. -"Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. -"License Elements" means the following high-level license attributes as selected by Licensor and indicated in the title of this License: Attribution, Noncommercial, ShareAlike. -"Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. -"Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. -"Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. -"You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. -"Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. -"Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. -2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. - -3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: - -to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; -to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; -to Distribute and Publicly Perform the Work including as incorporated in Collections; and, -to Distribute and Publicly Perform Adaptations. -The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved, including but not limited to the rights described in Section 4(e). - -4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: - -You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(d), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(d), as requested. -You may Distribute or Publicly Perform an Adaptation only under: (i) the terms of this License; (ii) a later version of this License with the same License Elements as this License; (iii) a Creative Commons jurisdiction license (either this or a later license version) that contains the same License Elements as this License (e.g., Attribution-NonCommercial-ShareAlike 3.0 US) ("Applicable License"). You must include a copy of, or the URI, for Applicable License with every copy of each Adaptation You Distribute or Publicly Perform. You may not offer or impose any terms on the Adaptation that restrict the terms of the Applicable License or the ability of the recipient of the Adaptation to exercise the rights granted to that recipient under the terms of the Applicable License. You must keep intact all notices that refer to the Applicable License and to the disclaimer of warranties with every copy of the Work as included in the Adaptation You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Adaptation, You may not impose any effective technological measures on the Adaptation that restrict the ability of a recipient of the Adaptation from You to exercise the rights granted to that recipient under the terms of the Applicable License. This Section 4(b) applies to the Adaptation as incorporated in a Collection, but this does not require the Collection apart from the Adaptation itself to be made subject to the terms of the Applicable License. -You may not exercise any of the rights granted to You in Section 3 above in any manner that is primarily intended for or directed toward commercial advantage or private monetary compensation. The exchange of the Work for other copyrighted works by means of digital file-sharing or otherwise shall not be considered to be intended for or directed toward commercial advantage or private monetary compensation, provided there is no payment of any monetary compensation in con-nection with the exchange of copyrighted works. -If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and, (iv) consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4(d) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. -For the avoidance of doubt: - -Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; -Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License if Your exercise of such rights is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c) and otherwise waives the right to collect royalties through any statutory or compulsory licensing scheme; and, -Voluntary License Schemes. The Licensor reserves the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License that is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c). -Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. -5. Representations, Warranties and Disclaimer - -UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING AND TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO THIS EXCLUSION MAY NOT APPLY TO YOU. - -6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -7. Termination - -This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. -Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. -8. Miscellaneous - -Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. -Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. -If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. -No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. -This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You. -The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. - -=============================================================================== - -BIOCODE GFF3 STATS - -MIT License - -Copyright (c) 2016 Joshua Orvis - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -BUSCO - -The MIT License (MIT) - -Copyright (c) 2016-2023, Evgeny Zdobnov (ez@ezlab.org) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -TIDK - -Copyright 2023 Max Brown, Wellcome Sanger Institute - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -=============================================================================== - -SEQKIT - -The MIT License (MIT) - -Copyright © 2016-2019 Wei Shen, 2019 Oxford Nanopore Technologies. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -=============================================================================== - -LAI - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {one line to give the program's name and a brief idea of what it does.} - Copyright (C) {year} {name of author} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - {project} Copyright (C) {year} {fullname} - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. - -=============================================================================== - -EDTA - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {one line to give the program's name and a brief idea of what it does.} - Copyright (C) {year} {name of author} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - {project} Copyright (C) {year} {fullname} - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. - -=============================================================================== - -KRAKEN2 - -The MIT License (MIT) - -Copyright (c) 2017-2023 Derrick Wood - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -JUICEBOX - -MIT License - -Copyright (c) 2016-2019 The Regents of the University of California - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -FASTP - -MIT License - -Copyright (c) 2016 OpenGene - Open Source Genetics Toolbox - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -FASTQC - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. - -=============================================================================== - -RUN ASSEMBLY VISUALIZER - -MIT License - -Copyright (c) 2018 Aiden Lab - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -=============================================================================== - -HIC QC - - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. - -=============================================================================== - -JUICEBOX SCRIPTS - - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. - -=============================================================================== - -BWA - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. - -=============================================================================== - -MATLOCK - - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. - -=============================================================================== - -SAMBLASTER - -The MIT License (MIT) - -Copyright (c) 2013-2020 Gregory G. Faust - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -=============================================================================== - -CIRCOS - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. - -=============================================================================== - -MUMMER - -The Artistic License 2.0 -======================== - -_Copyright © 2000-2006, The Perl Foundation._ - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. - -### Preamble - -This license establishes the terms under which a given free software -Package may be copied, modified, distributed, and/or redistributed. -The intent is that the Copyright Holder maintains some artistic -control over the development of that Package while still keeping the -Package available as open source and free software. - -You are always permitted to make arrangements wholly outside of this -license directly with the Copyright Holder of a given Package. If the -terms of this license do not permit the full use that you propose to -make of the Package, you should contact the Copyright Holder and seek -a different licensing arrangement. - -### Definitions - -“Copyright Holder” means the individual(s) or organization(s) -named in the copyright notice for the entire Package. - -“Contributor” means any party that has contributed code or other -material to the Package, in accordance with the Copyright Holder's -procedures. - -“You” and “your” means any person who would like to copy, -distribute, or modify the Package. - -“Package” means the collection of files distributed by the -Copyright Holder, and derivatives of that collection and/or of -those files. A given Package may consist of either the Standard -Version, or a Modified Version. - -“Distribute” means providing a copy of the Package or making it -accessible to anyone else, or in the case of a company or -organization, to others outside of your company or organization. - -“Distributor Fee” means any fee that you charge for Distributing -this Package or providing support for this Package to another -party. It does not mean licensing fees. - -“Standard Version” refers to the Package if it has not been -modified, or has been modified only in ways explicitly requested -by the Copyright Holder. - -“Modified Version” means the Package, if it has been changed, and -such changes were not explicitly requested by the Copyright -Holder. - -“Original License” means this Artistic License as Distributed with -the Standard Version of the Package, in its current version or as -it may be modified by The Perl Foundation in the future. - -“Source” form means the source code, documentation source, and -configuration files for the Package. - -“Compiled” form means the compiled bytecode, object code, binary, -or any other form resulting from mechanical transformation or -translation of the Source form. - - -### Permission for Use and Modification Without Distribution - -**(1)** You are permitted to use the Standard Version and create and use -Modified Versions for any purpose without restriction, provided that -you do not Distribute the Modified Version. - - -### Permissions for Redistribution of the Standard Version - -**(2)** You may Distribute verbatim copies of the Source form of the -Standard Version of this Package in any medium without restriction, -either gratis or for a Distributor Fee, provided that you duplicate -all of the original copyright notices and associated disclaimers. At -your discretion, such verbatim copies may or may not include a -Compiled form of the Package. - -**(3)** You may apply any bug fixes, portability changes, and other -modifications made available from the Copyright Holder. The resulting -Package will still be considered the Standard Version, and as such -will be subject to the Original License. - - -### Distribution of Modified Versions of the Package as Source - -**(4)** You may Distribute your Modified Version as Source (either gratis -or for a Distributor Fee, and with or without a Compiled form of the -Modified Version) provided that you clearly document how it differs -from the Standard Version, including, but not limited to, documenting -any non-standard features, executables, or modules, and provided that -you do at least ONE of the following: - -* **(a)** make the Modified Version available to the Copyright Holder -of the Standard Version, under the Original License, so that the -Copyright Holder may include your modifications in the Standard -Version. -* **(b)** ensure that installation of your Modified Version does not -prevent the user installing or running the Standard Version. In -addition, the Modified Version must bear a name that is different -from the name of the Standard Version. -* **(c)** allow anyone who receives a copy of the Modified Version to -make the Source form of the Modified Version available to others -under - * **(i)** the Original License or - * **(ii)** a license that permits the licensee to freely copy, - modify and redistribute the Modified Version using the same - licensing terms that apply to the copy that the licensee - received, and requires that the Source form of the Modified - Version, and of any works derived from it, be made freely - available in that license fees are prohibited but Distributor - Fees are allowed. - - -### Distribution of Compiled Forms of the Standard Version -### or Modified Versions without the Source - -**(5)** You may Distribute Compiled forms of the Standard Version without -the Source, provided that you include complete instructions on how to -get the Source of the Standard Version. Such instructions must be -valid at the time of your distribution. If these instructions, at any -time while you are carrying out such distribution, become invalid, you -must provide new instructions on demand or cease further distribution. -If you provide valid instructions or cease distribution within thirty -days after you become aware that the instructions are invalid, then -you do not forfeit any of your rights under this license. - -**(6)** You may Distribute a Modified Version in Compiled form without -the Source, provided that you comply with Section 4 with respect to -the Source of the Modified Version. - - -### Aggregating or Linking the Package - -**(7)** You may aggregate the Package (either the Standard Version or -Modified Version) with other packages and Distribute the resulting -aggregation provided that you do not charge a licensing fee for the -Package. Distributor Fees are permitted, and licensing fees for other -components in the aggregation are permitted. The terms of this license -apply to the use and Distribution of the Standard or Modified Versions -as included in the aggregation. - -**(8)** You are permitted to link Modified and Standard Versions with -other works, to embed the Package in a larger work of your own, or to -build stand-alone binary or bytecode versions of applications that -include the Package, and Distribute the result without restriction, -provided the result does not expose a direct interface to the Package. - - -### Items That are Not Considered Part of a Modified Version - -**(9)** Works (including, but not limited to, modules and scripts) that -merely extend or make use of the Package, do not, by themselves, cause -the Package to be a Modified Version. In addition, such works are not -considered parts of the Package itself, and are not subject to the -terms of this license. - - -### General Provisions - -**(10)** Any use, modification, and distribution of the Standard or -Modified Versions is governed by this Artistic License. By using, -modifying or distributing the Package, you accept this license. Do not -use, modify, or distribute the Package, if you do not accept this -license. - -**(11)** If your Modified Version has been derived from a Modified -Version made by someone other than you, you are nevertheless required -to ensure that your Modified Version complies with the requirements of -this license. - -**(12)** This license does not grant you the right to use any trademark, -service mark, tradename, or logo of the Copyright Holder. - -**(13)** This license includes the non-exclusive, worldwide, -free-of-charge patent license to make, have made, use, offer to sell, -sell, import and otherwise transfer the Package with respect to any -patent claims licensable by the Copyright Holder that are necessarily -infringed by the Package. If you institute patent litigation -(including a cross-claim or counterclaim) against any party alleging -that the Package constitutes direct or contributory patent -infringement, then this Artistic License to you shall terminate on the -date that such litigation is filed. - -**(14)** **Disclaimer of Warranty:** - -THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS -IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR -NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL -LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL -DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE b/LICENSE index f288702d..96e3eb88 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,21 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. +MIT License + +Copyright (c) Usman Rashid, Ken Smith, Ross Crowhurst, Chen Wu, Marcus Davy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.json b/MANIFEST.json deleted file mode 100644 index 7a525a2c..00000000 --- a/MANIFEST.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "mainWorkflowURL": "main.nf", - "inputFileURLs": ["./docs/test_params/test_agc.json"], - "engineOptions": "-resume" -} diff --git a/README.md b/README.md index 7b126557..ba0154c5 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,15 @@ -# AssemblyQC - -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10647870.svg)](https://doi.org/10.5281/zenodo.10647870) - -- [AssemblyQC](#assemblyqc) - - [Introduction](#introduction) - - [Pipeline Flowchart](#pipeline-flowchart) - - [Running the Pipeline](#running-the-pipeline) - - [Minimal Test Run](#minimal-test-run) - - [Quick Start for Plant \& Food Research Users](#quick-start-for-plant--food-research-users) - - [Post-run clean-up](#post-run-clean-up) - - [AssemblyQC Report](#assemblyqc-report) - - [Known Issues](#known-issues) - - [Contributors](#contributors) - - [Citations](#citations) +[![GitHub Actions CI Status](https://github.com/plant-food-research-open/assemblyqc/workflows/nf-core%20CI/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/plant-food-research-open/assemblyqc/workflows/nf-core%20linting/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10647870-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10647870) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda ❌](http://img.shields.io/badge/run%20with-conda%20❌-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/plant-food-research-open/assemblyqc) ## Introduction -AssemblyQC is a [NextFlow](https://www.nextflow.io/docs/latest/index.html) pipeline which evaluates assembly quality with well-established tools and presents the results in a unified html report. The tools are shown in the [Pipeline Flowchart](#pipeline-flowchart) and their version are listed in [CITATIONS.md](./CITATIONS.md). +**plant-food-research-open/assemblyqc** is a [NextFlow](https://www.nextflow.io/docs/latest/index.html) pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report. The tools are shown in the [Pipeline Flowchart](#pipeline-flowchart) and their version are listed in [CITATIONS.md](./CITATIONS.md). ## Pipeline Flowchart @@ -35,7 +28,6 @@ flowchart LR Skip --> REPORT VALIDATE_FORMAT --> GFF_STATS[GENOMETOOLS GT STAT] - VALIDATE_FORMAT --> GFF_STATS_II[BIOCODE GFF3 STATS] Run --> ASS_STATS[ASSEMBLATHON STATS] Run --> BUSCO @@ -48,7 +40,6 @@ flowchart LR ASS_STATS --> REPORT GFF_STATS --> REPORT - GFF_STATS_II --> REPORT BUSCO --> REPORT TIDK --> REPORT LAI --> REPORT @@ -60,74 +51,80 @@ flowchart LR - [FASTA VALIDATION](https://github.com/GallVp/fasta_validator) - [GFF3 VALIDATION](https://github.com/genometools/genometools) - [ASSEMBLATHON STATS](https://github.com/PlantandFoodResearch/assemblathon2-analysis/blob/a93cba25d847434f7eadc04e63b58c567c46a56d/assemblathon_stats.pl): Assembly statistics -- [GENOMETOOLS GT STAT](https://github.com/genometools/genometools)/[BIOCODE GFF3 STATS](https://github.com/jorvis/biocode): Annotation statistics +- [GENOMETOOLS GT STAT](https://github.com/genometools/genometools): Annotation statistics - [NCBI FCS ADAPTOR](https://github.com/ncbi/fcs): Adaptor contamination pass/fail - [NCBI FCS GX](https://github.com/ncbi/fcs): Foreign organism contamination pass/fail - [BUSCO](https://gitlab.com/ezlab/busco/-/tree/master): Gene-space completeness estimation - [TIDK](https://github.com/tolkit/telomeric-identifier): Telomere repeat identification - [LAI](https://github.com/oushujun/LTR_retriever/blob/master/LAI): Continuity of repetitive sequences -- [LAI::LTRRETRIEVER](https://github.com/oushujun/LTR_retriever): Repeat identification - [KRAKEN2](https://github.com/DerrickWood/kraken2): Taxonomy classification - [HIC CONTACT MAP](https://github.com/igvteam/juicebox-web): Alignment and visualisation of HiC data - SYNTENY: Synteny analysis using [MUMMER](https://github.com/mummer4/mummer) and [CIRCOS](http://circos.ca/documentation/) -## Running the Pipeline - -See the [tutorials](./docs/README.md) for detailed instructions on how to use the pipeline. The pipeline can be executed on a range of executors including AWS, LSF, Slurm, and others supported by [NextFlow](https://www.nextflow.io/docs/latest/executor.html#executors). - -### Minimal Test Run +## Usage -```bash -nextflow main.nf \ - -profile local,docker \ - -c conf/test_minimal.config -``` +Refer to [usage](./docs/usage.md), [parameters](./docs/parameters.md) and [output](./docs/output.md) documents for details. -### Quick Start for Plant & Food Research Users +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -To run the pipeline, first edit the nextflow.config. The following parameters must be checked and modified accordingly: +Prepare an `assemblysheet.csv` file with following columns representing target assemblies and associated meta-data. See an example [assemblysheet.csv](./assets/assemblysheet.csv) -- target_assemblies -- assembly_gff3 -- assemblathon_stats::n_limit -- ncbi_fcs_adaptor::empire -- ncbi_fcs_gx::tax_id -- busco::lineage_datasets -- busco::mode -- tidk::repeat_seq -- hic::paired_reads -- synteny::assembly_seq_list -- synteny::xref_assemblies +- `tag:` A unique tag which represents the target assembly throughout the pipeline and in the final report +- `fasta:` FASTA file +- `gff3 [Optional]:` GFF3 annotation file if available +- `monoploid_ids [Optional]:` A txt file listing the IDs used to calculate LAI in monoploid mode if necessary +- `synteny_labels [Optional]:` A two column tsv file listing fasta sequence ids (first column) and labels for the synteny plots (second column) when performing synteny analysis -Then, the pipeline should be posted to Slurm for execution with the following command: +Now, you can run the pipeline using: ```bash -sbatch ./pfr_assemblyqc +nextflow run plant-food-research-open/assemblyqc \ + -profile \ + --input assemblysheet.csv \ + --outdir ``` -### Post-run clean-up +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +### Quick Start for Plant&Food Users -The intermediary files produced by the pipeline are stored in the "work" folder. After running the pipeline, if you wish to clean up the logs and "work" folder, run the following command: +Download the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution. ```bash -./cleanNXF.sh +sbatch ./pfr_assemblyqc ``` -## AssemblyQC Report +## Credits -Once the pipeline has finished execution, the results folder specified in the config file should contain a file named 'report.html'. The 'report.html' is a standalone file for all the modules except HiC and Kraken2. Thus, if you move the report to another folder, make sure to also move the 'hic' folder and the 'kraken2' folder with it. +plant-food-research-open/assemblyqc was originally written by Usman Rashid and Ken Smith. Ross Crowhurst, Chen Wu and Marcus Davy generously contributed their QC scripts. -## Known Issues +We thank the following people for their extensive assistance in the development of this pipeline: -- On its first run, the pipeline has to download a lot many software containers. This download may fail. If it happens, resume the pipeline and it should be able to download the required containers. -- The pipeline may fail more frequently at building singularity containers when the temporary directory is not the system "/tmp" directory. +- Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng) +- Chen Wu [@christinawu2008](https://github.com/christinawu2008) +- Jason Shiller [@jasonshiller](https://github.com/jasonshiller) +- Marcus Davy [@mdavy86](https://github.com/mdavy86) +- Ross Crowhurst [@rosscrowhurst](https://github.com/rosscrowhurst) +- Susan Thomson [@cflsjt](https://github.com/cflsjt) +- Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen) -## Contributors +## Contributions and Support -Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng), Chen Wu [@christinawu2008](https://github.com/christinawu2008), Jason Shiller [@jasonshiller](https://github.com/jasonshiller), Ken Smith [@hzlnutspread](https://github.com/hzlnutspread), Marcus Davy [@mdavy86](https://github.com/mdavy86), Ross Crowhurst [@rosscrowhurst](https://github.com/rosscrowhurst), Susan Thomson [@cflsjt](https://github.com/cflsjt), Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen), Usman Rashid [@GallVp](https://github.com/GallVp) +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). ## Citations -For a comprehensive list of references and versions for the tools, see [CITATIONS.md](./CITATIONS.md). +If you use plant-food-research-open/assemblyqc for your analysis, please cite it using the following doi: [10.5281/zenodo.10647870](https://doi.org/10.5281/zenodo.10647870) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + +This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). -> Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Thomson, S., & Deng, C. (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality (1.3). Zenodo. https://doi.org/10.5281/zenodo.10647870 +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/agc-project.yaml b/agc-project.yaml deleted file mode 100644 index 7d7cc54d..00000000 --- a/agc-project.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: AssemblyQC -schemaVersion: 1 -workflows: - PFR_ASSEMBLY_QC: - type: - language: nextflow - version: 1.0 - sourceURL: ./ -contexts: - CtxAssemblyQC: - instanceTypes: - - "c5.large" - - "c5.2xlarge" - - "c5.4xlarge" - - "m5.2xlarge" - - "m5.4xlarge" - - "r5.2xlarge" - - "r5.4xlarge" - - "r5.8xlarge" # process_high_memory (200.GB) -> { 32, 256 } - - "r5.24xlarge" # process_very_high_memory (512.GB) -> { 96, 768 } - engines: - - type: nextflow - engine: nextflow - requestSpotInstances: true diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..6477ced0 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "plant-food-research-open/assemblyqc v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/assemblysheet.csv b/assets/assemblysheet.csv new file mode 100644 index 00000000..06b8a783 --- /dev/null +++ b/assets/assemblysheet.csv @@ -0,0 +1,2 @@ +tag,fasta,gff3,monoploid_ids,synteny_labels +FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.monoploid.seqs.txt,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.seq.labels.tsv diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100644 index 00000000..5a56215f --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,53 @@ + + + + + + + + plant-food-research-open/assemblyqc Pipeline Report + + +
+ + + +

plant-food-research-open/assemblyqc ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

plant-food-research-open/assemblyqc execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ plant-food-research-open/assemblyqc execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

plant-food-research-open/assemblyqc

+

https://github.com/plant-food-research-open/assemblyqc

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100644 index 00000000..720fd719 --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,31 @@ +Run Name: $runName + +<% if (success){ + out << "## plant-food-research-open/assemblyqc execution completed successfully! ##" +} else { + out << """#################################################### +## plant-food-research-open/assemblyqc execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +plant-food-research-open/assemblyqc +https://github.com/plant-food-research-open/assemblyqc diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..2aab4188 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_input.json", + "title": "plant-food-research-open/assemblyqc pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "pattern": "^\\w+$", + "errorMessage": "Assembly tags must be provided and can only contain alphanumeric characters including '_'" + }, + "fasta": { + "type": "string", + "pattern": "^\\S+\\.f(a|asta|as|sa|na)(\\.gz)?$", + "errorMessage": "FASTA file path cannot contain spaces and must have extension '.f(a|asta|as|sa|na)' or '.f(a|asta|as|sa|na).gz'" + }, + "gff3": { + "type": "string", + "pattern": "^\\S+\\.gff(3)?(\\.gz)?$", + "errorMessage": "GFF3 file path cannot contain spaces and must have extension '.gff.gz', '.gff3.gz', '.gff' or '.gff3'" + }, + "monoploid_ids": { + "errorMessage": "Monoploid IDs txt file path cannot contain spaces and must have extension '.txt'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.txt$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "synteny_labels": { + "errorMessage": "Synteny labels tsv path cannot contain spaces and must have extension '.tsv'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.tsv$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["tag", "fasta"] + } +} diff --git a/assets/schema_xref_assemblies.json b/assets/schema_xref_assemblies.json new file mode 100644 index 00000000..47ea1ed0 --- /dev/null +++ b/assets/schema_xref_assemblies.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_xref_assemblies.json", + "title": "plant-food-research-open/assemblyqc pipeline - params.synteny_xref_assemblies schema", + "description": "Schema for the file provided with params.synteny_xref_assemblies", + "type": "array", + "items": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "pattern": "^\\w+$", + "errorMessage": "Assembly tags must be provided and can only contain alphanumeric characters including '_'" + }, + "fasta": { + "type": "string", + "pattern": "^\\S+\\.f(a|asta|as|sa|na)(\\.gz)?$", + "errorMessage": "FASTA file path cannot contain spaces and must have extension '.f(a|asta|as|sa|na)' or '.f(a|asta|as|sa|na).gz'" + }, + "synteny_labels": { + "errorMessage": "Synteny labels tsv path cannot contain spaces and must have extension '.tsv'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.tsv$" + } + ] + } + }, + "required": ["tag", "fasta", "synteny_labels"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100644 index 00000000..0ba5f393 --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,53 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="plant-food-research-open-assemblyqc_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="plant-food-research-open-assemblyqc_logo_light.png" + +<% out << new File("$projectDir/assets/plant-food-research-open-assemblyqc_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..2786203f --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "plant-food-research-open/assemblyqc ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/assets/xrefsheet.csv b/assets/xrefsheet.csv new file mode 100644 index 00000000..5c218707 --- /dev/null +++ b/assets/xrefsheet.csv @@ -0,0 +1,2 @@ +tag,fasta,synteny_labels +TT_2021a,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/TT_2021a.seq.labels.tsv diff --git a/bin/FAlite_943e0fb.pm b/bin/FAlite_a93cba2.pm similarity index 80% rename from bin/FAlite_943e0fb.pm rename to bin/FAlite_a93cba2.pm index 262728b4..bebd8ed5 100644 --- a/bin/FAlite_943e0fb.pm +++ b/bin/FAlite_a93cba2.pm @@ -1,15 +1,15 @@ -package FAlite_943e0fb; +package FAlite_a93cba2; use strict; sub new { my ($class, $fh) = @_; if (ref $fh !~ /GLOB/) - {die ref $fh, "\n", "FAlite_943e0fb ERROR: expect a GLOB reference\n"} + {die ref $fh, "\n", "FAlite_a93cba2 ERROR: expect a GLOB reference\n"} my $this = bless {}; $this->{FH} = $fh; while(<$fh>) {last if $_ =~ /\S/} # not supposed to have blanks, but... my $firstline = $_; - if (not defined $firstline) {warn "FAlite_943e0fb: Empty\n"; return $this} - if ($firstline !~ /^>/) {warn "FAlite_943e0fb: Not FASTA formatted\n"; return $this} + if (not defined $firstline) {warn "FAlite_a93cba2: Empty\n"; return $this} + if ($firstline !~ /^>/) {warn "FAlite_a93cba2: Not FASTA formatted\n"; return $this} $this->{LASTLINE} = $firstline; chomp $this->{LASTLINE}; return $this; @@ -32,11 +32,11 @@ sub nextEntry { } return 0 if $lines_read == 0; chomp @seq; - my $entry = FAlite_943e0fb::Entry::new($def, \@seq); + my $entry = FAlite_a93cba2::Entry::new($def, \@seq); return $entry; } -package FAlite_943e0fb::Entry; +package FAlite_a93cba2::Entry; use overload '""' => 'all'; sub new { my ($def, $seqarry) = @_; @@ -56,12 +56,12 @@ __END__ =head1 NAME -FAlite_943e0fb; +FAlite_a93cba2; =head1 SYNOPSIS - use FAlite_943e0fb; - my $fasta = new FAlite_943e0fb(\*STDIN); + use FAlite_a93cba2; + my $fasta = new FAlite_a93cba2(\*STDIN); while(my $entry = $fasta->nextEntry) { $entry->def; $entry->seq; @@ -69,7 +69,7 @@ FAlite_943e0fb; =head1 DESCRIPTION -FAlite_943e0fb is a package for parsing FASTA files and databases. The FASTA format is +FAlite_a93cba2 is a package for parsing FASTA files and databases. The FASTA format is widely used in bioinformatics. It consists of a definition line followed by sequence with an arbitrary number of lines and line lengths. @@ -89,9 +89,9 @@ A FASTA database looks like this: =head2 Object -FAlite_943e0fb has two kinds of objects, the file and the entry. +FAlite_a93cba2 has two kinds of objects, the file and the entry. - my $fasta_file = new FAlite_943e0fb(\*STDIN); # or any other filehandle + my $fasta_file = new FAlite_a93cba2(\*STDIN); # or any other filehandle $entry = $fasta_file->nextEntry; # single fasta fle while(my $entry = $fasta_file->nextEntry) { # canonical form of use for fasta database diff --git a/bin/assemblathon_stats_943e0fb.pl b/bin/assemblathon_stats_a93cba2.pl similarity index 99% rename from bin/assemblathon_stats_943e0fb.pl rename to bin/assemblathon_stats_a93cba2.pl index 002d121d..d10ba565 100755 --- a/bin/assemblathon_stats_943e0fb.pl +++ b/bin/assemblathon_stats_a93cba2.pl @@ -10,7 +10,7 @@ use strict; use warnings; -use FAlite_943e0fb; +use FAlite_a93cba2; use Getopt::Long; use List::Util qw(sum max min); @@ -118,7 +118,7 @@ sub process_FASTA{ open($input, "<", "$seqs") or die "Can't open $seqs\n"; } - my $fasta = new FAlite_943e0fb(\*$input); + my $fasta = new FAlite_a93cba2(\*$input); # want to keep track of various contig + scaffold counts my $seq_count = 0; diff --git a/bin/assembly_2_bedpe_943e0fb.py b/bin/assembly2bedpe.py similarity index 100% rename from bin/assembly_2_bedpe_943e0fb.py rename to bin/assembly2bedpe.py diff --git a/bin/assembly_qc_report_943e0fb.py b/bin/assemblyqc.py similarity index 66% rename from bin/assembly_qc_report_943e0fb.py rename to bin/assemblyqc.py index a862be8d..fc42eeed 100755 --- a/bin/assembly_qc_report_943e0fb.py +++ b/bin/assemblyqc.py @@ -1,6 +1,15 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import json +import yaml + +from report_modules.report_printer import ReportPrinter + +from report_modules.parsers.params_parser import parse_params_json +from report_modules.parsers.tools_parser import parse_tools_yaml + +from report_modules.parsers.gff3_validate_parser import parse_gff3_validate_folder +from report_modules.parsers.fasta_validate_parser import parse_fasta_validate_folder from report_modules.parsers.ncbi_fcs_adaptor_parser import parse_ncbi_fcs_adaptor_folder from report_modules.parsers.ncbi_fcs_gx_parser import parse_ncbi_fcs_gx_folder @@ -10,28 +19,28 @@ from report_modules.parsers.genometools_gt_stat_parser import ( parse_genometools_gt_stat_folder, ) -from report_modules.parsers.biocode_gff3_stats_parser import ( - parse_biocode_gff3_stats_folder, -) from report_modules.parsers.busco_parser import parse_busco_folder from report_modules.parsers.tidk_parser import parse_tidk_folder from report_modules.parsers.lai_parser import parse_lai_folder from report_modules.parsers.kraken2_parser import parse_kraken2_folder from report_modules.parsers.hic_parser import parse_hic_folder from report_modules.parsers.circos_parser import parse_circos_folder -from report_modules.report_printer import ReportPrinter -from report_modules.parsers.params_parser import parse_params_json if __name__ == "__main__": - params_dict, params_table = parse_params_json() + params_dict, params_table = parse_params_json("params_json.json") + params_summary_dict, params_summary_table = parse_params_json( + "params_summary_json.json" + ) + tools_dict, tools_table = parse_tools_yaml() data_from_tools = {} + data_from_tools = {**data_from_tools, **parse_gff3_validate_folder()} + data_from_tools = {**data_from_tools, **parse_fasta_validate_folder()} data_from_tools = {**data_from_tools, **parse_ncbi_fcs_adaptor_folder()} data_from_tools = {**data_from_tools, **parse_ncbi_fcs_gx_folder()} data_from_tools = {**data_from_tools, **parse_assemblathon_stats_folder()} data_from_tools = {**data_from_tools, **parse_genometools_gt_stat_folder()} - data_from_tools = {**data_from_tools, **parse_biocode_gff3_stats_folder()} data_from_tools = {**data_from_tools, **parse_busco_folder()} data_from_tools = {**data_from_tools, **parse_tidk_folder()} data_from_tools = {**data_from_tools, **parse_lai_folder()} @@ -39,25 +48,21 @@ data_from_tools = {**data_from_tools, **parse_hic_folder()} data_from_tools = {**data_from_tools, **parse_circos_folder()} + with open("software_versions.yml", "r") as f: + versions_from_ch_versions = yaml.safe_load(f) + data_from_tools = { - **data_from_tools, - "VERSIONS": { - "SELF": "v1.3", - "NCBI_FCS_ADAPTOR": "0.4", - "NCBI_FCS_GX": "0.4", - "ASSEMBLATHON_STATS": "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2", - "GENOMETOOLS_GT_STAT": "1.6.2", - "BIOCODE_GFF3_STATS": "0.10.0", - "BUSCO": "5.2.2", - "TIDK": "0.2.31", - "LAI": "beta3.2", - "KRAKEN2": "2.1.2", - "HIC": "2.4.3", - "CIRCOS": "0.23-1", - "MUMMER": "4.0.0", - }, "PARAMS_DICT": params_dict, "PARAMS_TABLE": params_table, + "PARAMS_SUMMARY_DICT": params_summary_dict, + "PARAMS_SUMMARY_TABLE": params_summary_table, + "TOOLS_DICT": tools_dict, + "TOOLS_TABLE": tools_table, + "VERSIONS": { + **versions_from_ch_versions, + "JUICEBOX_JS": "2.4.3", + }, + **data_from_tools, } report_printer = ReportPrinter() diff --git a/bin/check_gff3_fasta_corresp_3031aca.sh b/bin/check_gff3_fasta_corresp_3031aca.sh deleted file mode 100755 index f302be2b..00000000 --- a/bin/check_gff3_fasta_corresp_3031aca.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash - -## Inputs -fasta_file="$1" -gff3_file="$2" - -# Requires -# samtools faidx - -## STEP 1 -# Check that gff3 has no identifers that are not in fasta (fasta can -# have ids that are not in gff3 since not all assembly units have gff3 records - -# Extract identifiers from the GFF3 file -gff3_identifiers=$(grep -v '^#' "$gff3_file" | awk '{print $1}' | sort -u) - -# Extract identifiers from the FASTA file -fasta_identifiers=$(grep '^>' "$fasta_file" | awk '{print substr($1, 2)}' | sort -u) - -# Compare identifiers and find any that are present in the GFF3 but not in the FASTA -missing_identifiers=$(comm -23 <(echo "$gff3_identifiers") <(echo "$fasta_identifiers")) - -# Check if any missing identifiers were found -if [[ -n "$missing_identifiers" ]]; then - echo "Failed to validate gff3 file for: $tag_label" - echo "Fasta file: $fasta_file" - echo "Gff3 file: $gff3_file" - echo "GFF3 file contains identifiers not present in FASTA:" - echo "$missing_identifiers" - exit 1 -fi - -## STEP 2 -# check that there are no coordiantes in gff3 for any seqid that are -# greater than the seq length of the paretn fasta entry - -# Compute sequence lengths using samtools faidx -samtools faidx "$fasta_file" | cut -f 1,2 > sequence_lengths.txt - -# Check GFF3 file for coordinates exceeding sequence lengths -while IFS=$'\t' read -r seqname source feature start end score strand frame attributes && \ - read -r seq seq_length <&3; do - if [[ $start -gt $seq_length || $end -gt $seq_length ]]; then - echo "Failed to validate gff3 file for: $tag_label" - echo "Fasta file: $fasta_file" - echo "Gff3 file: $gff3_file" - echo "Coordinates exceed sequence length in GFF3 file:" - echo "Sequence: $seqname" - echo "Sequence length: $seq_length" - echo "Start: $start" - echo "End: $end" - exit 1 - fi -done < "$gff3_file" 3< "sequence_lengths.txt" diff --git a/bin/color_circos_bundles_by_contig_943e0fb.py b/bin/colorbundlesbycontig.py similarity index 100% rename from bin/color_circos_bundles_by_contig_943e0fb.py rename to bin/colorbundlesbycontig.py diff --git a/bin/add_color_2_circos_bundle_file_943e0fb.pl b/bin/colorbundlesbysize.pl similarity index 100% rename from bin/add_color_2_circos_bundle_file_943e0fb.pl rename to bin/colorbundlesbysize.pl diff --git a/bin/count_t2t_complete_scaffolds_6fdcd87.py b/bin/count_t2t_complete_scaffolds_6fdcd87.py deleted file mode 100755 index d8810b68..00000000 --- a/bin/count_t2t_complete_scaffolds_6fdcd87.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -import pandas as pd -import sys - - -def get_combined_repeat_number(data): - data["combined_repeat_number"] = ( - data["forward_repeat_number"] + data["reverse_repeat_number"] - ) - return data - - -def check_edges(data): - largest_indices = data["combined_repeat_number"].nlargest(2).index - - if len(largest_indices) < 2: - return data["id"].iloc[0], False, data["window"].max() - - if largest_indices[0] == data.index[0] and largest_indices[1] == data.index[-1]: - return data["id"].iloc[0], True, data["window"].max() - elif largest_indices[0] == data.index[-1] and largest_indices[1] == data.index[0]: - return data["id"].iloc[0], True, data["window"].max() - else: - return data["id"].iloc[0], False, data["window"].max() - - -def count_t2t_complete_scaffolds(tidk_tsv_file_path): - tidk_tsv_as_pd = pd.read_csv(tidk_tsv_file_path, sep="\t") - - grouped_data = tidk_tsv_as_pd.groupby("id") - ids_with_checks_lens = [] - for _, group in grouped_data: - group = get_combined_repeat_number(group) - - ids_with_checks_lens.append(check_edges(group)) - - count_MB = sum( - [ - 1 if check and length > 1000_000 else 0 - for (_, check, length) in ids_with_checks_lens - ] - ) - count_KB = sum( - [ - 1 if check and length > 1000 else 0 - for (_, check, length) in ids_with_checks_lens - ] - ) - print( - f"Number of T2T complete scaffolds: {count_MB} (> 1 Mbp), {count_KB} (> 1 Kbp)" - ) - - -if __name__ == "__main__": - tidk_tsv_file_path = sys.argv[1] - count_t2t_complete_scaffolds(tidk_tsv_file_path) diff --git a/bin/hic_2_html_fc62f04.py b/bin/hic2html.py similarity index 100% rename from bin/hic_2_html_fc62f04.py rename to bin/hic2html.py diff --git a/bin/report_modules/parsers/biocode_gff3_stats_parser.py b/bin/report_modules/parsers/biocode_gff3_stats_parser.py deleted file mode 100644 index 01925375..00000000 --- a/bin/report_modules/parsers/biocode_gff3_stats_parser.py +++ /dev/null @@ -1,214 +0,0 @@ -import os -import re -import base64 -from pathlib import Path -from tabulate import tabulate -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -import math - -from report_modules.parsers.parsing_commons import sort_list_of_results - - -def parse_gff3_statistics(file_lines): - general_stats = [] - cds_stats = [] - - read_lines = 0 - for line in file_lines: - read_lines += 1 - - if line.startswith("Skipping feature"): - continue - - if line == "\n" or len(line) < 1: - continue - - if line.startswith("# CDS fragment composition profile"): - break - - line_components = line.split("\t") - - # Return None as the parsing assumptions are not valid anymore. - # The file is not parsable. - if len(line_components) != 2: - return None - - key = line_components[0] - value = line_components[1] - - if key == "Assembly length": - continue - - general_stats.append((key, int(round(float(value))))) - - for line in file_lines[read_lines - 1 :]: - if line.startswith("# CDS fragment composition profile"): - continue - - if line == "\n" or len(line) < 1: - continue - - key, value, percentage = line.split("\t") - cds_stats.append((int(key.split(" ")[2]), int(value), float(percentage))) - - general_stats_table = pd.DataFrame(general_stats, columns=["Metric", "Value"]) - cds_stats_table = pd.DataFrame( - cds_stats, columns=["CDS Count", "mRNA Count", "Percentage"] - ) - - return general_stats_table, cds_stats_table - - -def create_bar_graph(cds_stats_table, file_name): - _, ax = plt.subplots() - ax.bar(cds_stats_table["CDS Count"], cds_stats_table["mRNA Count"]) - - ax.set_xlabel("CDS Count") - ax.set_ylabel("mRNA Count") - ax.set_title("CDS fragment composition profile") - - num_ticks = 16.0 - min_x = float(min(cds_stats_table["CDS Count"])) - max_x = float(max(cds_stats_table["CDS Count"])) - setp_x = math.ceil((max_x - min_x) / num_ticks) - plt.xticks(np.arange(int(min_x), int(max_x) + setp_x, setp_x)) - - plt.gca().spines["top"].set_visible(False) - plt.gca().spines["right"].set_visible(False) - - offset = 0.01 * max(cds_stats_table["mRNA Count"]) - - if len(cds_stats_table["CDS Count"]) <= 24: - plt.yticks([]) - plt.ylim(0, max(cds_stats_table["mRNA Count"]) * 1.2) - - for i, value in enumerate(cds_stats_table["mRNA Count"]): - plt.text( - cds_stats_table["CDS Count"].iloc[i], - value + offset, - str(value), - ha="center", - va="bottom", - rotation="vertical", - ) - - plt.gca().spines["left"].set_visible(False) - else: - num_ticks = 10.0 - min_y = float(min(cds_stats_table["mRNA Count"])) - max_y = float(max(cds_stats_table["mRNA Count"])) - setp_y = math.ceil((max_y - min_y) / num_ticks) - plt.yticks(np.arange(int(min_y), int(max_y) + setp_y, setp_y)) - - max_y = cds_stats_table["mRNA Count"].max() - max_y_i = cds_stats_table["mRNA Count"].idxmax() - x_for_max_of_y = cds_stats_table["CDS Count"].iloc[max_y_i] - - plt.text( - x_for_max_of_y, - max_y + offset, - f"Max: {str(max_y)}", - ha="left", - va="baseline", - rotation="horizontal", - ) - - plt.savefig(file_name, dpi=600) - - -def read_file_lines(file_path): - with open(file_path, "r") as f: - file_lines = f.readlines() - - return file_lines - - -def parse_biocode_gff3_stats_folder(folder_name="biocode_gff3_stats"): - dir = os.getcwdb().decode() - reports_folder_path = Path(f"{dir}/{folder_name}") - - if not os.path.exists(reports_folder_path): - return {} - - list_of_report_files = reports_folder_path.glob("*.csv") - - data = {"BIOCODE_GFF3_STATS": []} - - for report_path in list_of_report_files: - file_lines = read_file_lines(report_path) - - file_tag = re.findall( - r"([\w]+)_stats.csv", - os.path.basename(str(report_path)), - )[0] - - parsed_stats = parse_gff3_statistics(file_lines) - - if parsed_stats == None: - data["BIOCODE_GFF3_STATS"].append( - { - "hap": file_tag, - "general_stats_table": {}, - "cds_stats_table": {}, - "general_stats_table_html": '
'
-                    + "\n".join(
-                        ["Failed to parse the BIOCODE GFF3 STATS output:\n\n"]
-                        + file_lines
-                    )
-                    + "
", - "cds_plot": "", - } - ) - continue - - general_stats_table = parsed_stats[0] - cds_stats_table = parsed_stats[1] - - plot_path = f"./{folder_name}/{os.path.basename(report_path)}.png" - create_bar_graph(cds_stats_table, plot_path) - - general_stats_metric = general_stats_table.iloc[:, 0].values.tolist() - general_stats_values = general_stats_table.iloc[:, 1].values.tolist() - - cds_stats_metric = cds_stats_table.iloc[:, 0].values.tolist() - cds_stats_values = cds_stats_table.iloc[:, 1].values.tolist() - cds_stats_percentages = cds_stats_table.iloc[:, 2].values.tolist() - - general_stats_dict = { - f"{x}": f"{y}" for (x, y) in zip(general_stats_metric, general_stats_values) - } - cds_stats_dict = { - f"{x}": [f"{y}", f"{z}"] - for (x, y, z) in zip( - cds_stats_metric, cds_stats_values, cds_stats_percentages - ) - } - - with open(plot_path, "rb") as f: - binary_fc = f.read() - - base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") - ext = str(plot_path).split(".")[-1] - plot_url = f"data:image/{ext}+xml;base64,{base64_utf8_str}" - - data["BIOCODE_GFF3_STATS"].append( - { - "hap": file_tag, - "general_stats_table": general_stats_dict, - "cds_stats_table": cds_stats_dict, - "general_stats_table_html": tabulate( - general_stats_table, - headers=["Metric", "Value"], - tablefmt="html", - numalign="left", - showindex=False, - ), - "cds_plot": plot_url, - } - ) - - return { - "BIOCODE_GFF3_STATS": sort_list_of_results(data["BIOCODE_GFF3_STATS"], "hap") - } diff --git a/bin/report_modules/parsers/busco_parser.py b/bin/report_modules/parsers/busco_parser.py index 22dcff27..74c6fb05 100644 --- a/bin/report_modules/parsers/busco_parser.py +++ b/bin/report_modules/parsers/busco_parser.py @@ -118,13 +118,14 @@ def get_deps_and_versions_dict(self, file_data): def get_busco_result_table(self, file_data): list_of_lines = file_data.split("\n") for index, line in enumerate(list_of_lines): - if "Dependencies and versions" in line: - dev_dep_index = index + if "Assembly Statistics" in line: + stats_index = index + break results_dict = {} for index, line in enumerate(list_of_lines): if "C:" in line: - for i in range(index + 1, dev_dep_index - 1): + for i in range(index + 1, stats_index - 1): number = list_of_lines[i].split("\t")[1] descr = list_of_lines[i].split("\t")[2] @@ -140,13 +141,13 @@ def get_busco_result_table(self, file_data): def get_busco_result_dict(self, file_data): list_of_lines = file_data.split("\n") for index, line in enumerate(list_of_lines): - if "Dependencies and versions" in line: - dev_dep_index = index + if "Assembly Statistics" in line: + stats_index = index results_dict = {} for index, line in enumerate(list_of_lines): if "C:" in line: - for i in range(index + 1, dev_dep_index - 1): + for i in range(index + 1, stats_index - 1): number = list_of_lines[i].split("\t")[1] descr = list_of_lines[i].split("\t")[2] diff --git a/bin/report_modules/parsers/fasta_validate_parser.py b/bin/report_modules/parsers/fasta_validate_parser.py new file mode 100644 index 00000000..786978ec --- /dev/null +++ b/bin/report_modules/parsers/fasta_validate_parser.py @@ -0,0 +1,35 @@ +import os +from pathlib import Path +import re + +from report_modules.parsers.parsing_commons import sort_list_of_results + + +def parse_fasta_validate_folder(folder_name="fastavalidator_logs"): + dir = os.getcwdb().decode() + logs_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(logs_folder_path): + return {} + + list_of_log_files = logs_folder_path.glob("*.log") + + data = {"FASTA_VALIDATE": []} + + for log_path in list_of_log_files: + with open(log_path, "r") as f: + log_lines = [f"

{l}

" for l in f.readlines()] + + file_tokens = re.findall( + r"([\w]+).error.log", + os.path.basename(str(log_path)), + )[0] + + data["FASTA_VALIDATE"].append( + { + "hap": file_tokens, + "validation_log": "".join(log_lines), + } + ) + + return {"FASTA_VALIDATE": sort_list_of_results(data["FASTA_VALIDATE"], "hap")} diff --git a/bin/report_modules/parsers/genometools_gt_stat_parser.py b/bin/report_modules/parsers/genometools_gt_stat_parser.py index 4fc80402..a4e4e6f4 100644 --- a/bin/report_modules/parsers/genometools_gt_stat_parser.py +++ b/bin/report_modules/parsers/genometools_gt_stat_parser.py @@ -3,6 +3,9 @@ import pandas as pd from tabulate import tabulate import re +import matplotlib.pyplot as plt +import numpy as np +import base64 from report_modules.parsers.parsing_commons import sort_list_of_results @@ -14,37 +17,328 @@ def parse_genometools_gt_stat_folder(folder_name="genometools_gt_stat"): if not os.path.exists(reports_folder_path): return {} - list_of_report_files = reports_folder_path.glob("*.csv") + list_of_report_files = reports_folder_path.glob("*.yml") data = {"GENOMETOOLS_GT_STAT": []} for report_path in list_of_report_files: - report_table = pd.read_csv(report_path) - stat_names = report_table.iloc[:, 0].values.tolist() - stat_values = report_table.iloc[:, 1].values.tolist() + NUM_GROUPS = -1 + ( + report_table_dict, + gene_length_distribution, + gene_score_distribution, + exon_length_distribution, + exon_number_distribution, + intron_length_distribution, + cds_length_distribution, + ) = extract_report_data(report_path, NUM_GROUPS) - report_table_dict = {f"{x}": f"{y}" for (x, y) in zip(stat_names, stat_values)} + gene_length_distribution_graph = "" + if gene_length_distribution != []: + gene_length_distribution_graph = create_dist_graph( + gene_length_distribution, + "Length", + "Gene Length Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.gene.length.png", + ) - file_tokens = re.findall( - r"([\w]+)_stats.csv", + gene_score_distribution_graph = "" + if gene_score_distribution != []: + gene_score_distribution_graph = create_dist_graph( + gene_score_distribution, + "Score", + "Gene Score Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.gene.score.png", + ) + + exon_length_distribution_graph = "" + if exon_length_distribution != []: + exon_length_distribution_graph = create_dist_graph( + exon_length_distribution, + "Length", + "Exon Length Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.exon.length.png", + ) + + exon_number_distribution_graph = "" + if exon_number_distribution != []: + exon_number_distribution_graph = create_dist_graph( + exon_number_distribution, + "Number", + "Exon Number Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.exon.number.png", + ) + + intron_length_distribution_graph = "" + if intron_length_distribution != []: + intron_length_distribution_graph = create_dist_graph( + intron_length_distribution, + "Length", + "Intron Length Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.intron.length.png", + ) + + cds_length_distribution_graph = "" + if cds_length_distribution != []: + cds_length_distribution_graph = create_dist_graph( + intron_length_distribution, + "Length", + "CDS Length Distribution", + f"./{folder_name}/{os.path.basename(report_path)}.cds.length.png", + ) + + file_tag = re.findall( + r"([\w]+).gt.stat.yml", os.path.basename(str(report_path)), )[0] data["GENOMETOOLS_GT_STAT"].append( { - "hap": file_tokens, + "hap": file_tag, "report_table": report_table_dict, "report_table_html": tabulate( - report_table, + pd.DataFrame( + report_table_dict.items(), columns=["Metric", "Value"] + ), headers=["Stat", "Value"], tablefmt="html", numalign="left", showindex=False, ), + "gene_length_plot": gene_length_distribution_graph, + "gene_score_plot": gene_score_distribution_graph, + "exon_length_plot": exon_length_distribution_graph, + "exon_number_plot": exon_number_distribution_graph, + "intron_length_plot": intron_length_distribution_graph, + "cds_length_plot": cds_length_distribution_graph, } ) return { "GENOMETOOLS_GT_STAT": sort_list_of_results(data["GENOMETOOLS_GT_STAT"], "hap") } + + +def extract_report_data(report_path, num_groups): + yaml_data = {} + parent_key = "" + with open(report_path, "r") as stream: + for line in stream: + key, value = line.strip().split(":", 1) + + if value == "": + parent_key = key + yaml_data[parent_key] = {} + continue + + if parent_key == "": + yaml_data[key] = value.strip() + continue + + yaml_data[parent_key][key] = value.strip() + + report_table_dict = { + key: value for key, value in yaml_data.items() if not isinstance(value, dict) + } + gene_length_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["gene length distribution"].items() + ], + num_groups, + ) + gene_score_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["gene score distribution"].items() + ], + num_groups, + ) + exon_length_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["exon length distribution"].items() + ], + num_groups, + ) + exon_number_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["exon number distribution"].items() + ], + num_groups, + ) + intron_length_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["intron length distribution"].items() + ], + num_groups, + ) + cds_length_distribution = create_frequency_groups( + [ + (int(key), int(value.split("(")[0].strip())) + for key, value in yaml_data["CDS length distribution"].items() + ], + num_groups, + ) + + return ( + report_table_dict, + gene_length_distribution, + gene_score_distribution, + exon_length_distribution, + exon_number_distribution, + intron_length_distribution, + cds_length_distribution, + ) + + +def create_frequency_groups(data, num_groups): + + if num_groups == -1: + sorted_data = sorted(data, key=lambda x: x[0]) + return [ + { + "start": x, + "stop": x, + "freq": freq, + } + for x, freq in sorted_data + ] + + assert ( + num_groups >= 1 + ), f"num_groups should be larger than or equal to 1. It is {num_groups}" + + if data == []: + return [] + + sorted_data = sorted(data, key=lambda x: x[0]) + + ordinal = [x for x, _ in sorted_data] + + ordinal_max = max(ordinal) + ordinal_range = ordinal_max - min(ordinal) + ordinal_step = ordinal_range // num_groups + + groups = [] + current_group = { + "start": sorted_data[0][0], + "stop": [x for x in ordinal if x <= (sorted_data[0][0] + ordinal_step)][-1], + "freq": 0, + } + + for num, freq in sorted_data: + if num <= current_group["stop"]: + current_group["freq"] += freq + continue + + groups.append(current_group.copy()) + + current_group["start"] = num + current_group["stop"] = [x for x in ordinal if x <= (num + ordinal_step)][-1] + current_group["freq"] = freq + + groups.append(current_group) + + return groups + + +def test_create_frequency_groups_multiple(): + data = [(15, 4), (5, 1), (70, 10)] + num_groups = 2 + + expect = [ + {"start": 5, "stop": 15, "freq": 5}, + {"start": 70, "stop": 70, "freq": 10}, + ] + + assert expect == create_frequency_groups(data, num_groups) + + +def test_create_frequency_groups_single(): + data = [(15, 4)] + num_groups = 2 + + expect = [{"start": 15, "stop": 15, "freq": 4}] + + assert expect == create_frequency_groups(data, num_groups) + + +def test_create_frequency_groups_repeat(): + data = [(15, 4), (15, 8)] + num_groups = 2 + + expect = [{"start": 15, "stop": 15, "freq": 12}] + + assert expect == create_frequency_groups(data, num_groups) + + +# test_create_frequency_groups_multiple() +# test_create_frequency_groups_single() +# test_create_frequency_groups_repeat() + + +def create_dist_graph(groups_dict, x_label, title, file_name): + + x_list = [i["stop"] for i in groups_dict] + y_list = [i["freq"] for i in groups_dict] + sum_y = float(sum(y_list)) + cum_sum_y = np.cumsum(y_list) + y_list = [float(y) / sum_y * 100.0 for y in cum_sum_y] + + _, ax = plt.subplots() + ax.plot(x_list, y_list) + + ax.set_xlabel(x_label) + ax.set_ylabel("Cumulative percentage (%)") + ax.set_title(title) + + plt.gca().spines["top"].set_visible(False) + plt.gca().spines["right"].set_visible(False) + + min_x, min_y = (min(x_list), min(y_list)) + x_anno_step = int(float(max(x_list)) * 0.1) + ax.annotate( + f"(<={min_x}, {round(min_y, 2)}%)", + xy=(min_x, min_y), + xytext=(min_x + x_anno_step, min_y + 10), + arrowprops=dict(color="red", arrowstyle="->, head_width=.15"), + ) + + near_50 = min([y for y in y_list if y >= 50.0]) + min_x, min_y = (x_list[y_list.index(near_50)], near_50) + ax.annotate( + f"(<={min_x}, {round(min_y, 2)}%)", + xy=(min_x, min_y), + xytext=(min_x + x_anno_step, min_y), + arrowprops=dict(color="red", arrowstyle="->, head_width=.15"), + ) + + near_90 = min([y for y in y_list if y >= 90.0]) + min_x, min_y = (x_list[y_list.index(near_90)], near_90) + ax.annotate( + f"(<={min_x}, {round(min_y, 2)}%)", + xy=(min_x, min_y), + xytext=(min_x + x_anno_step, min_y - 10), + arrowprops=dict(color="red", arrowstyle="->, head_width=.15"), + ) + + near_3_sigma = min([y for y in y_list if y >= 99.7]) + min_x, min_y = (x_list[y_list.index(near_3_sigma)], near_3_sigma) + ax.annotate( + f"(<={min_x}, {round(min_y, 2)}%)", + xy=(min_x, min_y), + xytext=(min_x + x_anno_step, min_y - 10), + arrowprops=dict(color="red", arrowstyle="->, head_width=.15"), + ) + + plt.savefig(file_name, dpi=600) + + with open(file_name, "rb") as f: + binary_fc = f.read() + + base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") + return f"data:image/png+xml;base64,{base64_utf8_str}" diff --git a/bin/report_modules/parsers/gff3_validate_parser.py b/bin/report_modules/parsers/gff3_validate_parser.py new file mode 100644 index 00000000..e9a7d1e7 --- /dev/null +++ b/bin/report_modules/parsers/gff3_validate_parser.py @@ -0,0 +1,35 @@ +import os +from pathlib import Path +import re + +from report_modules.parsers.parsing_commons import sort_list_of_results + + +def parse_gff3_validate_folder(folder_name="gff3_validate_logs"): + dir = os.getcwdb().decode() + logs_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(logs_folder_path): + return {} + + list_of_log_files = logs_folder_path.glob("*.log") + + data = {"GFF3_VALIDATE": []} + + for log_path in list_of_log_files: + with open(log_path, "r") as f: + log_lines = [f"

{l}

" for l in f.readlines()] + + file_tokens = re.findall( + r"([\w]+).error.log", + os.path.basename(str(log_path)), + )[0] + + data["GFF3_VALIDATE"].append( + { + "hap": file_tokens, + "validation_log": "".join(log_lines), + } + ) + + return {"GFF3_VALIDATE": sort_list_of_results(data["GFF3_VALIDATE"], "hap")} diff --git a/bin/report_modules/parsers/params_parser.py b/bin/report_modules/parsers/params_parser.py index bf07dc5a..7806f18f 100644 --- a/bin/report_modules/parsers/params_parser.py +++ b/bin/report_modules/parsers/params_parser.py @@ -12,32 +12,9 @@ def highlight_json(json_string): return highlight(json_string, lexer, formatter) -def format_params_dict(json_dict): - formatted_dict = {} - for key, value in json_dict.items(): - if key in ["max_cpus", "max_memory", "max_time"]: - continue - - if not isinstance(value, dict): - formatted_dict[key] = value - continue - - if "skip" in value.keys(): - if value["skip"] == 1: - formatted_dict[key] = "Skipped" - continue - - formatted_dict[key] = value - formatted_dict[key].pop("skip", None) - - return formatted_dict - - -def parse_params_json(): - with open("params_json.json", "r") as f: +def parse_params_json(file_name): + with open(file_name, "r") as f: params_dict = json.load(f) - formatted_dict_json = highlight_json( - json.dumps(format_params_dict(params_dict), indent=4) - ) + formatted_dict_json = highlight_json(json.dumps(params_dict, indent=4)) return params_dict, formatted_dict_json diff --git a/bin/report_modules/parsers/tidk_parser.py b/bin/report_modules/parsers/tidk_parser.py index c12c5adf..556d93a4 100644 --- a/bin/report_modules/parsers/tidk_parser.py +++ b/bin/report_modules/parsers/tidk_parser.py @@ -30,35 +30,35 @@ def parse_tidk_folder(folder_name="tidk_outputs"): plot_url = f"data:image/{ext}+xml;base64,{base64_utf8_str}" file_tokens = re.findall( - r"([\w]+).tidk.plot(.empty)?.svg", + r"([\w]+).([\w]+).svg", os.path.basename(str(plot_path)), )[0] - if "_a_posteriori" in file_tokens[0]: - hap_str_literal = file_tokens[0].replace("_a_posteriori", "") - sequence_file_name = f"{hap_str_literal}.a_posteriori.sequence" + sample_tag = file_tokens[0].strip() + plot_type = file_tokens[1].strip() + if "aposteriori" in plot_type: + sequence_file_name = f"{sample_tag}.top.sequence.txt" with open(f"{dir}/{folder_name}/{sequence_file_name}", "r") as file: lines = file.readlines() sequence = "" if len(lines) < 1 else lines[0].strip() - display_name = f"{hap_str_literal}: a posteriori sequence" + display_name = f"{sample_tag}: a posteriori sequence" else: - hap_str_literal = file_tokens[0].replace("_a_priori", "") - display_name = f"{hap_str_literal}: a priori sequence" + display_name = f"{sample_tag}: a priori sequence" sequence = "" data["TIDK"].append( { - "hap": file_tokens[0], + "hap": f"{sample_tag}_{plot_type}", "hap_display": display_name, "sequence": sequence, "is_a_priori": "a priori" in display_name, "a_priori_sequence": a_priori_sequence, "has_sequence": sequence != "", "tidk_plot": plot_url, - "tidk_plot_empty": file_tokens[1] != "", + "tidk_plot_empty": False, } ) diff --git a/bin/report_modules/parsers/tools_parser.py b/bin/report_modules/parsers/tools_parser.py new file mode 100644 index 00000000..54746094 --- /dev/null +++ b/bin/report_modules/parsers/tools_parser.py @@ -0,0 +1,33 @@ +import yaml +import json + +from pygments import highlight +from pygments.lexers import JsonLexer +from pygments.formatters import HtmlFormatter + + +def parse_tools_yaml(): + with open("software_versions.yml", "r") as f: + tools_dict = yaml.safe_load(f) + formatted_tools_json = highlight_json( + json.dumps(format_tools_dict(tools_dict), indent=4) + ) + + return tools_dict, formatted_tools_json + + +def highlight_json(json_string): + lexer = JsonLexer() + formatter = HtmlFormatter() + + return highlight(json_string, lexer, formatter) + + +def format_tools_dict(input_dict): + output_list = [] + for _, top_level_value in input_dict.items(): + for key, value in top_level_value.items(): + if (key, value) not in output_list: + output_list.append((key, value)) + + return dict(sorted(output_list, key=lambda x: x[0])) diff --git a/bin/report_modules/templates/assemblathon_stats/assemblathon_stats.html b/bin/report_modules/templates/assemblathon_stats/assemblathon_stats.html index 108e8f13..56eda984 100644 --- a/bin/report_modules/templates/assemblathon_stats/assemblathon_stats.html +++ b/bin/report_modules/templates/assemblathon_stats/assemblathon_stats.html @@ -7,11 +7,13 @@ >https://github.com/KorfLab/Assemblathon

-

Version: {{ all_stats_dicts['VERSIONS']['ASSEMBLATHON_STATS'] }}

+

+ Version: {{ all_stats_dicts['VERSIONS']['ASSEMBLATHON_STATS']['assemblathon_stats'] }} +

Warning:

- Contig-related stats are based on the assumption that the n_limit ({{ - all_stats_dicts['PARAMS_DICT']['assemblathon_stats']['n_limit'] }}) parameter is specified correctly. If you + Contig-related stats are based on the assumption that the assemblathon_stats_n_limit ({{ + all_stats_dicts['PARAMS_DICT']['assemblathon_stats_n_limit'] }}) parameter is specified correctly. If you are not certain of the value of the n_limit parameter, please ignore the contig-related stats.

diff --git a/bin/report_modules/templates/base.html b/bin/report_modules/templates/base.html index f2b6e872..bb753a67 100644 --- a/bin/report_modules/templates/base.html +++ b/bin/report_modules/templates/base.html @@ -1,71 +1,57 @@ - {% include 'header.html' %} +{% include 'header.html' %} + + +
+
AssemblyQC {{ + all_stats_dicts['VERSIONS']['Workflow']['plant-food-research-open/assemblyqc'] }}
+
+
+ + + {% if 'FASTA_VALIDATE' in all_stats_dicts %} + + {% endif %} + {% if 'GFF3_VALIDATE' in all_stats_dicts %} + + {% endif %} + {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %} + + {% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% endif %} {% if 'BUSCO' in all_stats_dicts %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% endif %} {% if 'LAI' in all_stats_dicts %}{% endif %} {% if 'KRAKEN2' in all_stats_dicts %}{% endif %} {% if 'HIC' in all_stats_dicts %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% endif %} +
+ {% include 'params/params.html' %} {% include 'tools/tools.html' %} {% if 'FASTA_VALIDATE' in all_stats_dicts %}{% + include 'fasta_validate/fasta_validate.html' %}{% endif %} + + {% if 'GFF3_VALIDATE' in all_stats_dicts %}{% include 'gff3_validate/gff3_validate.html' %}{% endif %} + + {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %}{% + include 'ncbi_fcs_adaptor/ncbi_fcs_adaptor.html' %}{% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% + include 'ncbi_fcs_gx/ncbi_fcs_gx.html' %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% include + 'assemblathon_stats/assemblathon_stats.html' %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% + include 'genometools_gt_stat/genometools_gt_stat.html' %}{% endif %} {% if 'BUSCO' in all_stats_dicts %}{% + include 'busco/busco.html' %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% include 'tidk/tidk.html' %}{% + endif %} {% if 'LAI' in all_stats_dicts %}{% include 'lai/lai.html' %}{% endif %} {% if 'KRAKEN2' in + all_stats_dicts %}{% include 'kraken2/kraken2.html' %}{% endif %} {% if 'HIC' in all_stats_dicts %}{% include + 'hic/hic.html' %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% include 'circos/circos.html' %}{% endif %} + +{% include 'js.html' %} - -
-
AssemblyQC {{ all_stats_dicts['VERSIONS']['SELF'] }}
-
-
- - {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %}{% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% endif %} {% if 'BIOCODE_GFF3_STATS' in all_stats_dicts %}{% endif %} {% if 'BUSCO' in all_stats_dicts %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% endif %} {% if 'LAI' in all_stats_dicts %}{% endif %} {% if 'KRAKEN2' in all_stats_dicts %}{% endif %} {% if 'HIC' in all_stats_dicts %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% endif %} -
- {% include 'params/params.html' %} {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %}{% include - 'ncbi_fcs_adaptor/ncbi_fcs_adaptor.html' %}{% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% include - 'ncbi_fcs_gx/ncbi_fcs_gx.html' %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% include - 'assemblathon_stats/assemblathon_stats.html' %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% - include 'genometools_gt_stat/genometools_gt_stat.html' %}{% endif %} {% if 'BIOCODE_GFF3_STATS' in - all_stats_dicts %}{% include 'biocode_gff3_stats/biocode_gff3_stats.html' %}{% endif %} {% if 'BUSCO' in - all_stats_dicts %}{% include 'busco/busco.html' %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% include - 'tidk/tidk.html' %}{% endif %} {% if 'LAI' in all_stats_dicts %}{% include 'lai/lai.html' %}{% endif %} {% if - 'KRAKEN2' in all_stats_dicts %}{% include 'kraken2/kraken2.html' %}{% endif %} {% if 'HIC' in all_stats_dicts - %}{% include 'hic/hic.html' %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% include 'circos/circos.html' - %}{% endif %} - - {% include 'js.html' %} diff --git a/bin/report_modules/templates/biocode_gff3_stats/biocode_gff3_stats.html b/bin/report_modules/templates/biocode_gff3_stats/biocode_gff3_stats.html deleted file mode 100644 index 711b74ab..00000000 --- a/bin/report_modules/templates/biocode_gff3_stats/biocode_gff3_stats.html +++ /dev/null @@ -1,13 +0,0 @@ - diff --git a/bin/report_modules/templates/biocode_gff3_stats/dropdown.html b/bin/report_modules/templates/biocode_gff3_stats/dropdown.html deleted file mode 100644 index 33f97326..00000000 --- a/bin/report_modules/templates/biocode_gff3_stats/dropdown.html +++ /dev/null @@ -1,10 +0,0 @@ - diff --git a/bin/report_modules/templates/biocode_gff3_stats/report_contents.html b/bin/report_modules/templates/biocode_gff3_stats/report_contents.html deleted file mode 100644 index d5acfe56..00000000 --- a/bin/report_modules/templates/biocode_gff3_stats/report_contents.html +++ /dev/null @@ -1,20 +0,0 @@ -{% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["BIOCODE_GFF3_STATS"]|length) %} {% set -active_text = 'display: block' if vars.is_first else 'display: none' %} -
-
-
-
{{ all_stats_dicts['BIOCODE_GFF3_STATS'][item]['hap'] }}
-
-
-
-
{{ all_stats_dicts['BIOCODE_GFF3_STATS'][item]['general_stats_table_html'] }}
-
-
- -
-
-{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/bin/report_modules/templates/busco/busco.html b/bin/report_modules/templates/busco/busco.html index c8a180a1..4f6f9b3a 100644 --- a/bin/report_modules/templates/busco/busco.html +++ b/bin/report_modules/templates/busco/busco.html @@ -9,9 +9,9 @@ Manni M., Berkeley M.R., Seppey M., Simao F.A., Zdobnov E.M. 2021. BUSCO update: novel and streamlined workflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and viral genomes. arXiv:2106.11799 [q-bio] [Internet]. Available from: - http://arxiv.org/abs/2106.11799 + arxiv.org/abs/2106.11799

-

Version: {{ all_stats_dicts['VERSIONS']['BUSCO'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['BUSCO']['busco'] }}

{% include 'busco/dropdown.html' %} {% include 'busco/summary_contents.html' %} {% include 'busco/report_contents.html' %} diff --git a/bin/report_modules/templates/circos/circos.html b/bin/report_modules/templates/circos/circos.html index 959b9841..4b9c8c1b 100644 --- a/bin/report_modules/templates/circos/circos.html +++ b/bin/report_modules/templates/circos/circos.html @@ -8,27 +8,32 @@

Krzywinski, M., Schein, J., Birol, I., Connors, J., Gascoyne, R., Horsman, D., ... & Marra, M. A. (2009). Circos: an information aesthetic for comparative genomics. Genome research, 19(9), 1639-1645. - https://doi.org/10.1101/gr.092759.109 + 10.1101/gr.092759.109

Marçais G, Delcher AL, Phillippy AM, Coston R, Salzberg SL, Zimin A. MUMmer4: A fast and versatile genome alignment system. PLoS Comput Biol. 2018 Jan 26;14(1):e1005944. - https://doi.org/10.1371/journal.pcbi.1005944 + 10.1371/journal.pcbi.1005944

Version: {{ all_stats_dicts['VERSIONS']['CIRCOS'] }} (CIRCOS), {{ all_stats_dicts['VERSIONS']['MUMMER'] - }} (MUMMER)Versions: {{ all_stats_dicts['VERSIONS']['CIRCOS']['circos'] }} (CIRCOS), {{ + all_stats_dicts['VERSIONS']['MUMMER']['nucmer'] }} (MUMMER)

Notes:

-

- Alignments within a distance of {{ all_stats_dicts['PARAMS_DICT']['synteny']['max_gap'] }}bp have been - bundled together. After bundling, any bundle smaller than {{ - all_stats_dicts['PARAMS_DICT']['synteny']['min_bundle_size'] }}bp has been filtered out. -

+
    +
  • + Alignments within a distance of {{ all_stats_dicts['PARAMS_DICT']['synteny_max_gap'] }}bp have been + bundled together. +
  • +
  • + After bundling, any bundle smaller than {{ all_stats_dicts['PARAMS_DICT']['synteny_min_bundle_size'] }}bp has been filtered out. +
  • +
  • + The sequence labels shown on the plot are based on the labelling file provided to the pipeline. These labels may or may not be same as the sequence IDs in the corresponding FASTA files. +
  • +
{% include 'circos/dropdown.html' %} {% include 'circos/report_contents.html' %} diff --git a/bin/report_modules/templates/fasta_validate/dropdown.html b/bin/report_modules/templates/fasta_validate/dropdown.html new file mode 100644 index 00000000..56fb72ef --- /dev/null +++ b/bin/report_modules/templates/fasta_validate/dropdown.html @@ -0,0 +1,10 @@ + diff --git a/bin/report_modules/templates/fasta_validate/fasta_validate.html b/bin/report_modules/templates/fasta_validate/fasta_validate.html new file mode 100644 index 00000000..e0643542 --- /dev/null +++ b/bin/report_modules/templates/fasta_validate/fasta_validate.html @@ -0,0 +1,14 @@ + diff --git a/bin/report_modules/templates/fasta_validate/report_contents.html b/bin/report_modules/templates/fasta_validate/report_contents.html new file mode 100644 index 00000000..7abaee35 --- /dev/null +++ b/bin/report_modules/templates/fasta_validate/report_contents.html @@ -0,0 +1,18 @@ +{% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["FASTA_VALIDATE"]|length) %} {% set active_text += 'display: block' if vars.is_first else 'display: none' %} +
+
+
+
{{ all_stats_dicts['FASTA_VALIDATE'][item]['hap'] }}
+
+
+

Validation failed!

+
+
{{ all_stats_dicts['FASTA_VALIDATE'][item]['validation_log'] }}
+
+
+{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/bin/report_modules/templates/genometools_gt_stat/genometools_gt_stat.html b/bin/report_modules/templates/genometools_gt_stat/genometools_gt_stat.html index a181d093..c5a5e7a1 100644 --- a/bin/report_modules/templates/genometools_gt_stat/genometools_gt_stat.html +++ b/bin/report_modules/templates/genometools_gt_stat/genometools_gt_stat.html @@ -9,7 +9,7 @@ structured genome annotations. IEEE/ACM Trans Comput Biol Bioinform. 2013 May-Jun;10(3):645-56. doi: 10.1109/TCBB.2013.68. PMID: 24091398.

-

Version: {{ all_stats_dicts['VERSIONS']['GENOMETOOLS_GT_STAT'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['GT_STAT']['genometools'] }}

{% include 'genometools_gt_stat/dropdown.html' %} {% include 'genometools_gt_stat/report_contents.html' %} diff --git a/bin/report_modules/templates/genometools_gt_stat/report_contents.html b/bin/report_modules/templates/genometools_gt_stat/report_contents.html index 0b7f1119..d2975ede 100644 --- a/bin/report_modules/templates/genometools_gt_stat/report_contents.html +++ b/bin/report_modules/templates/genometools_gt_stat/report_contents.html @@ -13,5 +13,23 @@
{{ all_stats_dicts['GENOMETOOLS_GT_STAT'][item]['report_table_html'] }}
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/bin/report_modules/templates/gff3_validate/dropdown.html b/bin/report_modules/templates/gff3_validate/dropdown.html new file mode 100644 index 00000000..0bece7a0 --- /dev/null +++ b/bin/report_modules/templates/gff3_validate/dropdown.html @@ -0,0 +1,10 @@ + diff --git a/bin/report_modules/templates/gff3_validate/gff3_validate.html b/bin/report_modules/templates/gff3_validate/gff3_validate.html new file mode 100644 index 00000000..01fa2754 --- /dev/null +++ b/bin/report_modules/templates/gff3_validate/gff3_validate.html @@ -0,0 +1,22 @@ + diff --git a/bin/report_modules/templates/gff3_validate/report_contents.html b/bin/report_modules/templates/gff3_validate/report_contents.html new file mode 100644 index 00000000..d61358a2 --- /dev/null +++ b/bin/report_modules/templates/gff3_validate/report_contents.html @@ -0,0 +1,18 @@ +{% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["GFF3_VALIDATE"]|length) %} {% set active_text += 'display: block' if vars.is_first else 'display: none' %} +
+
+
+
{{ all_stats_dicts['GFF3_VALIDATE'][item]['hap'] }}
+
+
+

Validation failed!

+
+
{{ all_stats_dicts['GFF3_VALIDATE'][item]['validation_log'] }}
+
+
+{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/bin/report_modules/templates/hic/hic.html b/bin/report_modules/templates/hic/hic.html index 2937ce52..868dc089 100644 --- a/bin/report_modules/templates/hic/hic.html +++ b/bin/report_modules/templates/hic/hic.html @@ -9,11 +9,10 @@

Robinson JT, Turner D, Durand NC, Thorvaldsdóttir H, Mesirov JP, Aiden EL. Juicebox.js Provides a Cloud-Based Visualization System for Hi-C Data. Cell Syst. 2018 Feb 28;6(2):256-258.e1. - https://doi.org/10.1016/j.cels.2018.01.001. Epub 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755. + 10.1016/j.cels.2018.01.001. Epub + 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755.

-

Version: {{ all_stats_dicts['VERSIONS']['HIC'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['JUICEBOX_JS'] }}

{% include 'hic/dropdown.html' %} {% include 'hic/report_contents.html' %} diff --git a/bin/report_modules/templates/kraken2/kraken2.html b/bin/report_modules/templates/kraken2/kraken2.html index ec12d3e8..250eebf3 100644 --- a/bin/report_modules/templates/kraken2/kraken2.html +++ b/bin/report_modules/templates/kraken2/kraken2.html @@ -1,17 +1,14 @@ diff --git a/bin/report_modules/templates/lai/lai.html b/bin/report_modules/templates/lai/lai.html index ebe389f9..50a98582 100644 --- a/bin/report_modules/templates/lai/lai.html +++ b/bin/report_modules/templates/lai/lai.html @@ -12,15 +12,9 @@

Shujun Ou, Jinfeng Chen, Ning Jiang, Assessing genome assembly quality using the LTR Assembly Index (LAI), Nucleic Acids Research, Volume 46, Issue 21, 30 November 2018, Page e126, - https://doi.org/10.1093/nar/gky730 + 10.1093/nar/gky730

-

Version: {{ all_stats_dicts['VERSIONS']['LAI'] }}

- {% if all_stats_dicts['PARAMS_DICT']['lai']['mode'] == '-qq' %} -

Warning:

-

- The LAI mode is set to '-qq' and, therefore, the RAW LAI can only be used for intra-specific comparisons. -

- {% endif %} +

Version: {{ all_stats_dicts['VERSIONS']['LTRRETRIEVER_LAI']['lai'] }}

{% include 'lai/dropdown.html' %} {% include 'lai/summary_contents.html' %} diff --git a/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html b/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html index 9246ccb3..e62678c9 100644 --- a/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html +++ b/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html @@ -5,7 +5,7 @@

https://github.com/ncbi/fcs

-

Version: {{ all_stats_dicts['VERSIONS']['NCBI_FCS_ADAPTOR'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['NCBI_FCS_ADAPTOR']['av_screen_x'] }}

{% include 'ncbi_fcs_adaptor/dropdown.html' %} {% include 'ncbi_fcs_adaptor/report_contents.html' %} diff --git a/bin/report_modules/templates/ncbi_fcs_gx/ncbi_fcs_gx.html b/bin/report_modules/templates/ncbi_fcs_gx/ncbi_fcs_gx.html index 097f48c8..e2c2a338 100644 --- a/bin/report_modules/templates/ncbi_fcs_gx/ncbi_fcs_gx.html +++ b/bin/report_modules/templates/ncbi_fcs_gx/ncbi_fcs_gx.html @@ -7,11 +7,12 @@ Mozes, Pooja K Strope, Pape M Sylla, Lukas Wagner, Shelby L Bidwell, Karen Clark, Emily W Davis, Brian Smith-White, Wratko Hlavina, Kim D Pruitt, Valerie A Schneider, Terence D Murphy bioRxiv 2023.06.02.543519; doi: - https://doi.org/10.1101/2023.06.02.543519, GitHub: https://github.com/ncbi/fcs + 10.1101/2023.06.02.543519, GitHub: + https://github.com/ncbi/fcs +

+

+ Version: {{ all_stats_dicts['VERSIONS']['NCBI_FCS_GX_SCREEN_SAMPLES']['fcs_gx'] }}

-

Version: {{ all_stats_dicts['VERSIONS']['NCBI_FCS_GX'] }}

DB Version: {{ all_stats_dicts['NCBI_FCS_GX'][0]['report_meta_data'][1]['db']['build-date'] }}

diff --git a/bin/report_modules/templates/params/contents.html b/bin/report_modules/templates/params/contents.html index 7c10ad16..f89d27dd 100644 --- a/bin/report_modules/templates/params/contents.html +++ b/bin/report_modules/templates/params/contents.html @@ -3,6 +3,7 @@
Pipeline Parameters
- {{ all_stats_dicts['PARAMS_TABLE'] }} +

Only displaying parameters that differ from the pipeline defaults.

+ {{ all_stats_dicts['PARAMS_SUMMARY_TABLE'] }} diff --git a/bin/report_modules/templates/params/params.html b/bin/report_modules/templates/params/params.html index 6526ab03..70452246 100644 --- a/bin/report_modules/templates/params/params.html +++ b/bin/report_modules/templates/params/params.html @@ -7,10 +7,11 @@

Reference:

Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Thomson, S., & Deng, C. - (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality (1.3). Zenodo. + (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality ({{ + all_stats_dicts['VERSIONS']['Workflow']['plant-food-research-open/assemblyqc'] }}). Zenodo. 10.5281/zenodo.10647870. GitHub. - https://github.com/Plant-Food-Research-Open/assembly_qchttps://github.com/Plant-Food-Research-Open/assemblyqc

diff --git a/bin/report_modules/templates/tidk/tidk.html b/bin/report_modules/templates/tidk/tidk.html index 62180a61..0503dae9 100644 --- a/bin/report_modules/templates/tidk/tidk.html +++ b/bin/report_modules/templates/tidk/tidk.html @@ -9,7 +9,7 @@ >https://github.com/tolkit/telomeric-identifier

-

Version: {{ all_stats_dicts['VERSIONS']['TIDK'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['TIDK_PLOT_APRIORI']['tidk'] }}

{% include 'tidk/dropdown.html' %} {% include 'tidk/report_contents.html' %} diff --git a/bin/report_modules/templates/tools/contents.html b/bin/report_modules/templates/tools/contents.html new file mode 100644 index 00000000..721721b6 --- /dev/null +++ b/bin/report_modules/templates/tools/contents.html @@ -0,0 +1,9 @@ +
+
+
+
Pipeline Tools
+

Following is a non-exhaustive list of tools used to generate this report.

+
+ {{ all_stats_dicts['TOOLS_TABLE'] }} +
+
diff --git a/bin/report_modules/templates/tools/tools.html b/bin/report_modules/templates/tools/tools.html new file mode 100644 index 00000000..4d78232c --- /dev/null +++ b/bin/report_modules/templates/tools/tools.html @@ -0,0 +1 @@ + diff --git a/bin/reverse_edta_naming_f1b7bce.py b/bin/reverse_edta_naming_f1b7bce.py deleted file mode 100755 index c0471008..00000000 --- a/bin/reverse_edta_naming_f1b7bce.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -import sys - -renamed_ids_tsv = sys.argv[1] -te_anno_gff3 = sys.argv[2] -intact_gff3 = sys.argv[3] -output_prefix = sys.argv[4] - - -def create_name_mapping_from_file(file_path): - dictionary = {} - - with open(file_path, "r") as tsv_file: - for line in tsv_file: - columns = line.strip().split("\t") - if len(columns) != 2: - raise ValueError(f"{file_path} should be a two column TSV file") - - orig_id, new_id = columns[0], columns[1] - dictionary[new_id] = orig_id - - return dictionary - - -def reverse_rename_gff3_file(new_to_orig_ids, file_path, output_file_name): - with open(file_path, "r") as input_gff3_file: - input_lines = input_gff3_file.readlines() - - with open(output_file_name, "w") as output_gff_file: - for line in input_lines: - if line.startswith("##"): - output_gff_file.write(line) - continue - - new_id = line.split("\t")[0] - orig_id = new_to_orig_ids[new_id] - output_gff_file.write(line.replace(new_id, orig_id)) - - -if __name__ == "__main__": - new_to_orig_ids = create_name_mapping_from_file(renamed_ids_tsv) - reverse_rename_gff3_file( - new_to_orig_ids, te_anno_gff3, f"{output_prefix}.EDTA.TEanno.gff3" - ) - reverse_rename_gff3_file( - new_to_orig_ids, intact_gff3, f"{output_prefix}.EDTA.intact.gff3" - ) diff --git a/bin/shorten_fasta_ids_c97537f.py b/bin/shorten_fasta_ids_c97537f.py deleted file mode 100755 index 1ae5076c..00000000 --- a/bin/shorten_fasta_ids_c97537f.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python - -import re -import sys - -from Bio import SeqIO - -# The input fasta file path -fasta_file_path = sys.argv[1] - -# The prefix for output files: prefix.renamed.ids.fa, prefix.renamed.ids.tsv -output_files_prefix = sys.argv[2] - -# In the case where IDs have acceptable character and no change is needed, the output is stdout: -# "IDs have acceptable length and character. No change required." - - -def extract_fasta_ids(fasta_file_path): - fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta") - - ids = [] - for record in fasta_file_obj: - ids.append(record.id) - return ids - - -def write_fasta_with_new_ids(fasta_file_path, id_mapping, file_prefix): - old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta") - id_map = dict(id_mapping) - - replaced_records = [] - for record in old_fasta_file_obj: - old_id = record.id - - new_id = id_map[old_id] - record.id = new_id - record.description = "" - - replaced_records.append(record) - - SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta") - - -def write_fasta_without_comments(fasta_file_path, file_prefix): - old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta") - - replaced_records = [] - for record in old_fasta_file_obj: - record.description = "" - replaced_records.append(record) - - SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta") - - -def do_id_need_to_change(id): - if len(id) > 13 or not re.match(r"^[a-zA-Z0-9_]+$", id): - return True - - return False - - -def do_ids_need_to_change(ids): - return any([do_id_need_to_change(id) for id in ids]) - - -def extract_common_patterns(ids): - pattern_counts = {} - for id in ids: - patterns = re.findall(r"[A-Za-z0_]{4,}", id) - for pattern in set(patterns): - pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1 - - common_patterns = [ - pattern for pattern, count in pattern_counts.items() if count >= 2 - ] - - if len(common_patterns) < 1: - return {} - - return {pattern: pattern[:3] for pattern in common_patterns} - - -def shorten_ids(ids, patterns_dict): - shortened_ids = [] - - for id in ids: - if not do_id_need_to_change(id): - shortened_ids.append(id) - continue - - shortened_id = shorten_id_by_pattern_replacement(patterns_dict, id) - - if not do_id_need_to_change(shortened_id): - shortened_ids.append(shortened_id) - continue - - shortened_id = f"Ctg{generate_hash(id)}" - - if not do_id_need_to_change(shortened_id): - shortened_ids.append(shortened_id) - continue - - raise ValueError(f"Failed to shorten id: {id} ({shortened_id})") - - return shortened_ids - - -def shorten_id_by_pattern_replacement(patterns_dict, id): - if patterns_dict == {}: - return id - - shortened_id = id - matches_for_id = match_substrings(patterns_dict.keys(), shortened_id) - - for pattern in matches_for_id: - shortened_id = re.sub( - r"({})".format(re.escape(pattern)), - patterns_dict[pattern], - shortened_id, - ) - return ( - shortened_id - if shortened_id[len(shortened_id) - 1] != "_" - else shortened_id[0 : (len(shortened_id) - 1)] - ) - - -def match_substrings(substrings, target_string): - pattern = "|".join(map(re.escape, substrings)) - matches = re.findall(pattern, target_string) - return matches - - -def generate_hash(string): - import hashlib - - hash_object = hashlib.sha1(string.encode()) - full_hash = hash_object.hexdigest() - short_hash = full_hash[:10] - return short_hash - - -def fail_if_new_ids_not_valid(ids): - if len(ids) != len(set(ids)): - raise ValueError("Th new IDs are not unique") - - -if __name__ == "__main__": - input_ids = extract_fasta_ids(fasta_file_path) - - if not do_ids_need_to_change(input_ids): - print("IDs have acceptable length and character. No change required.") - - with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f: - f.write("IDs have acceptable length and character. No change required.") - - write_fasta_without_comments(fasta_file_path, output_files_prefix) - - exit(0) - - new_ids = shorten_ids(input_ids, extract_common_patterns(input_ids)) - fail_if_new_ids_not_valid(new_ids) - - with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f: - for input_id, new_id in zip(input_ids, new_ids): - f.write(f"{input_id}\t{new_id}\n") - - write_fasta_with_new_ids( - fasta_file_path, zip(input_ids, new_ids), output_files_prefix - ) diff --git a/bin/validate_seq_lists_1d50376.sh b/bin/validateseqlists.sh similarity index 100% rename from bin/validate_seq_lists_1d50376.sh rename to bin/validateseqlists.sh diff --git a/conf/base.config b/conf/base.config index 3daf70ce..c7f00271 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,115 +1,78 @@ -profiles { - local { - process { - executor = 'local' - } - } - - pfr { - process { - executor = 'slurm' - } - - apptainer { - envWhitelist= "APPTAINER_BINDPATH,APPTAINER_BIND" - } - } - - apptainer { - apptainer.enabled = true - apptainer.autoMounts= true - } - - docker { - docker.enabled = true - docker.runOptions = '--platform=linux/amd64' - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/assemblyqc Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [140,143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' // Process-specific resource requirements // NOTE - Please try and re-use the labels below as much as possible. // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } - withLabel:process_two_days_long { - time = { check_max( 48.h * task.attempt, 'time' ) } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } } - withLabel:process_week_long { - time = { check_max( 7.days * task.attempt, 'time' ) } + withLabel:error_ignore { + errorStrategy = 'ignore' } - withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 } - withLabel:process_very_high_memory { - memory = { check_max( 512.GB * task.attempt, 'memory' ) } + withName:NCBI_FCS_GX_SCREEN_SAMPLES { + time = { check_max( 20.h * task.attempt, 'time' ) } + memory = { check_max( 512.GB * task.attempt, 'memory' ) } } -} - -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } + withName:KRAKEN2 { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withName:BWA_MEM { + time = { check_max( 2.day * task.attempt, 'time' ) } + } + withName:SAMBLASTER { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withName:DNADIFF { + time = { check_max( 7.day * task.attempt, 'time' ) } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false } } diff --git a/conf/modules.config b/conf/modules.config index 71928d1a..0b1ba155 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,62 +1,300 @@ -if(!params.lai.skip) { - process { - - withName: CUSTOM_SHORTENFASTAIDS { - publishDir = [ - path: { "${params.outdir}/lai" }, - mode: "copy", - pattern: '*.short.ids.tsv' - ] - } - - withName: EDTA_LTRHARVEST { - ext.prefix = { "${meta.id}_edta_ltrharvest" } - } - - withName: LTRFINDER { - ext.args = '-harvest_out -size 1000000 -time 300' - } - - withName: CAT_CAT { - ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } - } - - withName: LTRRETRIEVER { - publishDir = [ - path: { "${params.outdir}/lai" }, - mode: "copy", - pattern: '*.LTRlib.fa' - ] - } - - withName: CUSTOM_RESTOREGFFIDS { - publishDir = [ - path: { "${params.outdir}/lai" }, - mode: "copy", - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } - - withName: LAI { - publishDir = [ - path: { "${params.outdir}/lai" }, - mode: "copy", - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: ASSEMBLATHON_STATS { + publishDir = [ + path: { "${params.outdir}/assemblathon_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: NCBI_FCS_ADAPTOR { + publishDir = [ + path: { "${params.outdir}/ncbi_fcs_adaptor" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: NCBI_FCS_GX_SCREEN_SAMPLES { + publishDir = [ + path: { "${params.outdir}/ncbi_fcs_gx" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: NCBI_FCS_GX_KRONA_PLOT { + publishDir = [ + path: { "${params.outdir}/ncbi_fcs_gx" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: BUSCO { + publishDir = [ + path: { "${params.outdir}/busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: BUSCO_PLOT { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: KRAKEN2 { + publishDir = [ + path: { "${params.outdir}/kraken2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: KRAKEN2_KRONA_PLOT { + publishDir = [ + path: { "${params.outdir}/kraken2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: CIRCOS { + publishDir = [ + path: { "${params.outdir}/synteny/${target_on_ref_seq}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: '.*:GFF3_VALIDATE:GT_GFF3' { + ext.args = '-tidy -retainids' + } + + withName: GT_STAT { + ext.args = [ + '-genelengthdistri', + '-genescoredistri', + '-exonlengthdistri', + '-exonnumberdistri', + '-intronlengthdistri', + '-cdslengthdistri', + '-addintrons' + ].join(' ').trim() + + publishDir = [ + path: { "${params.outdir}/genometools_gt_stat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: FILTER_BY_LENGTH { + ext.args = params.tidk_filter_by_size ? "-m ${params.tidk_filter_size_bp}" : '' + ext.prefix = { "${meta.id}.filtered" } + } + + withName: SORT_BY_LENGTH { + ext.args = '--quiet --reverse --by-length' + ext.prefix = { "${meta.id}.sorted" } + } + + withName: TIDK_EXPLORE { + ext.args = '--minimum 5 --maximum 30' + publishDir = [ + path: { "${params.outdir}/tidk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: TIDK_SEARCH_APRIORI { + ext.prefix = { "${meta.id}.apriori" } + ext.args = '--extension tsv' + publishDir = [ + path: { "${params.outdir}/tidk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: TIDK_SEARCH_APOSTERIORI { + ext.prefix = { "${meta.id}.aposteriori" } + ext.args = '--extension tsv' + publishDir = [ + path: { "${params.outdir}/tidk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: TIDK_PLOT_APRIORI { + ext.prefix = { "${meta.id}.apriori" } + publishDir = [ + path: { "${params.outdir}/tidk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: TIDK_PLOT_APOSTERIORI { + ext.prefix = { "${meta.id}.aposteriori" } + publishDir = [ + path: { "${params.outdir}/tidk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: CUSTOM_SHORTENFASTAIDS { + publishDir = [ + path: { "${params.outdir}/lai" }, + mode: params.publish_dir_mode, + pattern: '*.short.ids.tsv' + ] + } + + withName: LTRHARVEST { + ext.prefix = { "${meta.id}_ltrharvest" } + } + + withName: LTRFINDER { + ext.args = '-harvest_out -size 1000000 -time 300' + } + + withName: CAT_CAT { + ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } + } + + withName: LTRRETRIEVER_LTRRETRIEVER { + publishDir = [ + path: { "${params.outdir}/lai" }, + mode: params.publish_dir_mode, + pattern: '*.LTRlib.fa' + ] + } + + withName: CUSTOM_RESTOREGFFIDS { + publishDir = [ + path: { "${params.outdir}/lai" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: LTRRETRIEVER_LAI { + publishDir = [ + path: { "${params.outdir}/lai" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: FASTQC_RAW { + publishDir = [ + path: { "${params.outdir}/hic/fastqc_raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: FASTQC_TRIM { + publishDir = [ + path: { "${params.outdir}/hic/fastqc_trim" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: FASTP { + ext.args = params.hic_fastp_ext_args + publishDir = [ + path: { "${params.outdir}/hic/fastp" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: BWA_MEM { + ext.prefix = { "${meta.id}.on.${meta.ref_id}.bwa.mem" } + ext.args = '-5SP' + } + + withName: SAMBLASTER { + ext.prefix = { "${meta.id}.on.${meta.ref_id}.samblaster" } + ext.args3 = '-h -F 2316' + } + + withName: AGP2ASSEMBLY { + publishDir = [ + path: { "${params.outdir}/hic/assembly" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: ASSEMBLY2BEDPE { + publishDir = [ + path: { "${params.outdir}/hic/bedpe" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: HIC2HTML { + publishDir = [ + path: { "${params.outdir}/hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: HICQC { + publishDir = [ + path: { "${params.outdir}/hic/hicqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: RUNASSEMBLYVISUALIZER { + publishDir = [ + path: { "${params.outdir}/hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] } -} -if(!params.hic.skip){ - process { - withName: BWA_MEM { - ext.prefix = { "${meta.id}.on.${meta.ref_id}.bwa.mem" } - ext.args = '-5SP' - } - - withName: SAMBLASTER { - ext.prefix = { "${meta.id}.on.${meta.ref_id}.samblaster" } - ext.args3 = '-h -F 2316' - } + withName: CREATEREPORT { + publishDir = [ + path: { "$params.outdir" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] } } diff --git a/conf/reporting_defaults.config b/conf/reporting_defaults.config deleted file mode 100644 index 33542f0a..00000000 --- a/conf/reporting_defaults.config +++ /dev/null @@ -1,15 +0,0 @@ -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" -} diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 00000000..3ba5bef2 --- /dev/null +++ b/conf/test.config @@ -0,0 +1,23 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run plant-food-research-open/assemblyqc -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + input = 'https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/assemblysheet.csv' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' +} diff --git a/conf/test_bacterial.config b/conf/test_bacterial.config deleted file mode 100644 index 558e53f2..00000000 --- a/conf/test_bacterial.config +++ /dev/null @@ -1,64 +0,0 @@ -params { - - // Vibrio parahaemolyticus: https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_000196095.1/ - target_assemblies = [ - [ - "v_parahaemolyticus", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/196/095/GCF_000196095.1_ASM19609v1/GCF_000196095.1_ASM19609v1_genomic.fna.gz" - ], - ] - - assembly_gff3 = [ - [ - "v_parahaemolyticus", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/196/095/GCF_000196095.1_ASM19609v1/GCF_000196095.1_ASM19609v1_genomic.gff.gz" - ], - ] - - assemblathon_stats { - n_limit = 100 - } - - ncbi_fcs_adaptor { - empire = 'prok' - } - - ncbi_fcs_gx { - tax_id = "670" - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" - } - - busco { - mode = "geno" - lineage_datasets = ["bacteria_odb10", "gammaproteobacteria_odb10", "vibrionales_odb10"] - download_path = "/workspace/ComparativeDataSources/BUSCO/assembly_qc" - } - - tidk { - skip = 1 - } - - lai { - monoploid_seqs = [] - } - - kraken2 { - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" - } - - hic { - skip = 1 - } - - synteny { - skip = 1 - } - - outdir = "./results" - - max_time = 6.hours -} - -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" -} diff --git a/conf/test_full.config b/conf/test_full.config index 9db94272..4c4b5151 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,76 +1,42 @@ -params { - - target_assemblies = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz" - ], - ] - - assembly_gff3 = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz" - ], - ] - - assemblathon_stats { - n_limit = 100 - } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. - ncbi_fcs_adaptor { - empire = 'euk' - } + Use as follows: + nextflow run plant-food-research-open/assemblyqc -profile test_full, --outdir - ncbi_fcs_gx { - tax_id = "35717" - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" - } +---------------------------------------------------------------------------------------- +*/ - busco { - mode = "geno" - lineage_datasets = ["fungi_odb10", "hypocreales_odb10"] - download_path = "/workspace/ComparativeDataSources/BUSCO/assembly_qc" - } - - tidk { - repeat_seq = "TTAGGG" - } +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - lai { - monoploid_seqs = [ - ["FI1", "./docs/test_files/FI1.monoploid.seqs.txt"] - ] - } + input = 'https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/assemblysheet.csv' - kraken2 { - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" - } + ncbi_fcs_adaptor_skip = false + ncbi_fcs_adaptor_empire = 'euk' - hic { - paired_reads = "SRR8238190" - } + ncbi_fcs_gx_skip = true // Skipping this step as the dataset is humengous (500 GB). Please download the dataset manually + // ncbi_fcs_gx_tax_id = 35717 + // ncbi_fcs_gx_db_path = 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2023-01-24' - synteny { + busco_skip = false + busco_mode = 'geno' + busco_lineage_datasets = 'fungi_odb10 hypocreales_odb10' - assembly_seq_list = [ - ["FI1", "./docs/test_files/FI1.seq.list"] - ] + tidk_skip = false + tidk_repeat_seq = 'TTTGGG' - xref_assemblies = [ - [ - "TT_2021a", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz", - "./docs/test_files/TT_2021a.seq.list" - ], - ] - } + lai_skip = false - outdir = "./results" + kraken2_skip = true // Skipping this step as the dataset is humengous (126 GB). Please download the dataset manually + // kraken2_db_path = 'https://genome-idx.s3.amazonaws.com/kraken/k2_pluspfp_20240112.tar.gz' - max_time = 6.hours -} + hic = 'SRR8238190' -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" + synteny_skip = false + synteny_xref_assemblies = 'https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/xrefsheet.csv' } diff --git a/conf/test_minimal.config b/conf/test_minimal.config deleted file mode 100644 index f1dda81e..00000000 --- a/conf/test_minimal.config +++ /dev/null @@ -1,32 +0,0 @@ -params { - target_assemblies = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz" - ], - ] - - assembly_gff3 = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz" - ], - ] - - ncbi_fcs_adaptor { skip = 1 } - ncbi_fcs_gx { skip = 1 } - busco { skip = 1 } - tidk { skip = 1 } - lai { skip = 1 } - kraken2 { skip = 1 } - hic { skip = 1 } - synteny { skip = 1 } - - outdir = "./results" - - max_time = 1.hour -} - -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" -} diff --git a/conf/test_transcriptome.config b/conf/test_transcriptome.config deleted file mode 100644 index bb8d1d61..00000000 --- a/conf/test_transcriptome.config +++ /dev/null @@ -1,55 +0,0 @@ -params { - - // Mesorhabditis belari (nematodes) transcriptome: https://www.ncbi.nlm.nih.gov/assembly/GCA_900631935.1 - target_assemblies = [ - [ - "m_belari", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/631/935/GCA_900631935.1_M.Belari_Hybrid_transcriptome/GCA_900631935.1_M.Belari_Hybrid_transcriptome_genomic.fna.gz" - ], - ] - - assembly_gff3 = [] - - ncbi_fcs_adaptor { - empire = 'euk' - } - - ncbi_fcs_gx { - tax_id = "2138241" - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" - } - - busco { - mode = "tran" - lineage_datasets = ["eukaryota_odb10", "metazoa_odb10", "nematoda_odb10"] - download_path = "/workspace/ComparativeDataSources/BUSCO/assembly_qc" - } - - tidk { - skip = 1 - } - - lai { - skip = 1 - } - - kraken2 { - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" - } - - hic { - skip = 1 - } - - synteny { - skip = 1 - } - - outdir = "./results" - - max_time = 6.hours -} - -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" -} diff --git a/conf/test_viral.config b/conf/test_viral.config deleted file mode 100644 index b02d3759..00000000 --- a/conf/test_viral.config +++ /dev/null @@ -1,58 +0,0 @@ -params { - - // HIV1: https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_000864765.1 - target_assemblies = [ - [ - "hiv", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/864/765/GCF_000864765.1_ViralProj15476/GCF_000864765.1_ViralProj15476_genomic.fna.gz" - ], - ] - - assembly_gff3 = [ - [ - "hiv", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/864/765/GCF_000864765.1_ViralProj15476/GCF_000864765.1_ViralProj15476_genomic.gff.gz" - ], - ] - - ncbi_fcs_adaptor { - empire = 'prok' - } - - ncbi_fcs_gx { - tax_id = "11676" - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" - } - - busco { - skip = 1 - } - - tidk { - skip = 1 - } - - lai { - skip = 1 - } - - kraken2 { - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" - } - - hic { - skip = 1 - } - - synteny { - skip = 1 - } - - outdir = "./results" - - max_time = 6.hours -} - -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" -} diff --git a/docs/README.md b/docs/README.md index 25dbf8f0..e761e2d5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,14 +1,10 @@ -# AssemblyQC Tutorials +# plant-food-research-open/assemblyqc: Documentation -1. [Quick Start: A Minimal Example](./minimal_example.md) -2. [Configuring the Pipeline for a Complete Run](./full_run.md) -3. [Execution with Amazon Genomics CLI](./aws_run.md) +The plant-food-research-open/assemblyqc documentation is split into the following pages: -## Test Configurations - -The following test configurations are included with the pipeline. Before going through these configurations, - -- [Test Configuration for a Fungus](../conf/test_full.config) -- [Test Configuration for a Bacterium](../conf/test_bacterial.config) -- [Test Configuration for a Virus](../conf/test_viral.config) -- [Test Configuration for a Transcriptome of a Nematode](../conf/test_viral.config) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it. +- [Parameters](parameters.md) + - A description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. diff --git a/docs/aws_run.md b/docs/aws_run.md deleted file mode 100644 index 53a96b44..00000000 --- a/docs/aws_run.md +++ /dev/null @@ -1,18 +0,0 @@ -# Execution with Amazon Genomics CLI - -The pipeline can be executed on AWS Batch using the Amazon Genomics CLI (AGC). Please first go through the [AGC examples](https://catalog.workshops.aws/agc-pipelines/en-US/02-running-pipelines/02-nextflow) before continuing this tutorial. - -An AGC project configured for the pipeline is included with the source code, [agc-project.yaml](../agc-project.yaml). An example parameters file with test data is also included, [test_agc.json](../test_params/test_agc.json). Some of the parameters in this file are configured to take data from a private bucket. These parameters must be redirected to a bucket accessible to you. These parameters are: - -- ncbi_fcs_gx::db_path -- kraken2::db_path -- outdir - -> [!WARNING] -> The location specified by `outdir` should be changed when the dataset is changed. Otherwise, the pipeline will overwrite the existing files in this directory. - -After creating a valid parameters file, replace the path of the existing parameters file with your parameters file path in the `inputFileURLs` in [MANIFEST.json](../MANIFEST.json). Next, the pipeline can be submitted to AWS for execution. - -```bash -agc workflow run PFR_ASSEMBLY_QC -c CtxAssemblyQC -v -``` diff --git a/docs/full_run.md b/docs/full_run.md deleted file mode 100644 index b7938a08..00000000 --- a/docs/full_run.md +++ /dev/null @@ -1,234 +0,0 @@ -# Configuring the Pipeline for a Complete Run - -This document explains how to correctly configure the pipeline for a complete run. For a minimal example which documents basic configuration along with pipeline execution, refer to [Quick Start: A Minimal Example](./minimal_example.md). - -- [Configuring the Pipeline for a Complete Run](#configuring-the-pipeline-for-a-complete-run) - - [Complete Example Configuration File](#complete-example-configuration-file) - - [ASSEMBLATHON STATS](#assemblathon-stats) - - [NCBI FCS Adaptor](#ncbi-fcs-adaptor) - - [NCBI FCS GX](#ncbi-fcs-gx) - - [BUSCO](#busco) - - [TIDK](#tidk) - - [LAI](#lai) - - [KRAKEN2](#kraken2) - - [HIC](#hic) - - [SYNTENY](#synteny) - -## Complete Example Configuration File - -This document explains the pipeline configuration using an example configuration file packaged with the pipeline. Refer to 'conf/test_full.config'. This configuration is an expansion of the 'conf/test_minimal.config' covered in [Quick Start: A Minimal Example](./minimal_example.md). - -## ASSEMBLATHON STATS - -There is only one configurable parameter for this module: `n_limit`. This is the number of 'N's for the unknown gap size. This number is used to split the scaffolds into contigs to compute contig-related stats. NCBI's recommendation for unknown gap size is 100 . - -> ⚙️ From conf/test_full.config - -```groovy -assemblathon_stats { - n_limit = 100 -} -``` - -## NCBI FCS Adaptor - -This module has only one parameter: `empire`. The permissible values are: `euk` for Eukaryotes and `prok` for Prokaryotes. - -> ⚙️ From conf/test_full.config - -```groovy -ncbi_fcs_adaptor { - empire = 'euk' -} -``` - -## NCBI FCS GX - -Following parameters must be configured: - -- `tax_id`: The taxonomy ID for all the target assemblies listed in the `target_assemblies` parameter. A taxonomy ID can be obtained by searching a _Genus species_ at . A single ID for all assemblies implies that the pipeline is designed to be used for checking one or more assemblies of the same _species_ in one run. -- `db_path`: This is the path to the database files stored on a directory accessible to the pipeline. Before running the pipeline, the user must ensure that the database is correctly downloaded and placed in a directory accessible to the pipeline. Setup instructions are available at . The database directory should contain following files: - -```bash -all.assemblies.tsv -all.blast_div.tsv.gz -all.gxi -all.gxs -all.manifest -all.meta.jsonl -all.README.txt -all.seq_info.tsv.gz -all.taxa.tsv -``` - -> ⚙️ From conf/test_full.config - -```groovy -ncbi_fcs_gx { - tax_id = "35717" - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" -} -``` - -## BUSCO - -Following parameters must be configured: - -- `mode`: geno or genome, for genome assemblies (DNA), tran or transcriptome, for transcriptome assemblies (DNA); and prot or proteins, for annotated gene sets (protein). -- `lineage_datasets`: A list of BUSCO lineages. Any number of lineages can be specified. Each target assembly is assessed against each of the listed lineage. To select a lineage, refer to -- `download_path`: A directory where the BUSCO can download and cache its databases. BUSCO manages download and validation of the databases itself, therefore, the user does not have to manually setup these databases. If this is set to `null`, NextFLow will download the database in the process work directory. - -> ⚙️ From conf/test_full.config - -```groovy -busco { - mode = "geno" - lineage_datasets = ["fungi_odb10", "hypocreales_odb10"] - download_path = "/workspace/ComparativeDataSources/BUSCO/assembly_qc" -} -``` - -## TIDK - -Following parameter must be configured: - -- `repeat_seq`: The telomere search sequence. To select an appropriate sequence, see . Commonly used sequences are: TTTAGGG (Plant), TTAGGG (Fungus, Vertebrates), TTAGG (Insect). - -The following parameters are optional: - -- `filter_by_size`: Set this flag to 1 to filter out assembly sequences smaller than the size specified by the next parameter (default: 0). -- `filter_size_bp`: Minimum size of the assembly sequence processed by TIDK (default: 1000000 (1Mbp)). - -> ⚙️ From conf/test_full.config - -```groovy -tidk { - repeat_seq = "TTAGGG" -} -``` - -In the example configuration above, the `filter_by_size` and `filter_size_bp` are not set. The pipeline will pick up their default values from 'nextflow.config' file. - -## LAI - -Following parameter must be configured: - -- `monoploid_seqs`: A list of lists which specifies the `-mono` parameter-file for LAI when processing a polyploid assembly. The `-mono` parameter-file is a single column text file listing IDs of the monoploid sequences for a polyploid assembly. If this parameter is not needed, it can be set to `[]`. If only some of the assemblies listed in `target_assemblies` are polyploid, the `-mono` parameter-file can be specified only for those assemblies. Similar to the `pass_list` parameter, an assembly is identified by its tag. Here are the contents of an example `-mono` parameter-file: - -```TSV -CP031385.1 -CP031386.1 -CP031387.1 -CP031388.1 -CP031389.1 -CP031390.1 -CP031391.1 -``` - -> ⚙️ From conf/test_full.config - -```groovy -lai { - monoploid_seqs = [ - ["FI1", "./docs/test_files/FI1.monoploid.seqs.txt"] - ] -} -``` - -## KRAKEN2 - -Following parameters must be configured: - -- `db_path`: Path to a directory or a `*.tar.gz` file containing the Kraken2 database. - -> ⚙️ From conf/test_full.config - -```groovy -kraken2 { - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" -} -``` - -## HIC - -Following parameter must be configured: - -- `paired_reads`: A relative or absolute path to paired reads in fastq.gz format, or a SRA ID. The format for file path is `*R{1,2}*.(fasta|fq).gz`. An example is '/input/genomic/fungal/Neonectria/Genome/20190506_CAGRF19591_CGYCF_HiC/PG_PETUNIA_HiC_CGYCF_CACTCA_L001_R{1,2}.fastq.gz'. - -> ⚙️ From conf/test_full.config - -```groovy -hic { - paired_reads = "SRR8238190" -} -``` - -## SYNTENY - -Following parameters must be configured: - -- `assembly_seq_list`: This is a list of lists which specifies the `*.seq.list` file for each assembly declared in the `target_assemblies` parameter. A `*.seq.list` file is a two column tab-delimited txt file listing fasta sequence ids (first column) and labels for the synteny plots (second column). An example file is shown below: - -```TSV -CP031385.1 FI1_1 -CP031386.1 FI1_2 -CP031387.1 FI1_3 -CP031388.1 FI1_4 -CP031389.1 FI1_5 -CP031390.1 FI1_6 -CP031391.1 FI1_7 -``` - -This parameter is specified as a list of lists. Each sub-list has two elements. The first element identifies the assembly `target_assemblies` using the assembly tag. The second element is the path to the `*.seq.list` file. Here is an example: - -```groovy -target_assemblies = [ - ["hap1", "/workspace/assembly_qc/test_data/default/test_data1.fasta.gz"], - ["hap2", "/workspace/assembly_qc/test_data/default/test_data2.fasta"] -] - -assembly_seq_list = [ - ["hap1", "/workspace/assembly_qc/test_data/default/test_data1.seq.list"], - ["hap2", "/workspace/assembly_qc/test_data/default/test_data2.seq.list"] -] -``` - -- `xref_assemblies`: A list of lists which specifies reference assemblies against which the synteny should be performed. This parameter can be set to an empty list `[]` if reference assemblies are not available. To specify a reference assembly, three items must be declared. First a unique tag for the reference assembly, second a fasta file (fasta, fasta.gz) for the assembly, and, third, a `*.seq.list` file. Here is an example: - -```groovy -xref_assemblies = [ - [ - "GenomeA", - "/workspace/assembly_qc/test_data/default/test_data3.fasta", - "/workspace/assembly_qc/test_data/default/test_data3.seq.list" - ] -] -``` - -The following parameters are optional: - -- `between_target_asm`: Set it to 1 to create syntenic plots between each pair of target_assemblies. Default is 1. This parameter is useful if multiple assemblies are specified by the `target_assemblies` parameter and the user needs control over whether syntenic plots are created between each pair of assemblies or not. -- `many_to_many_align`: Set it to 1 to include alignment blocks with many-to-many mappings or set to 0 to only include 1-to-1 mappings. Default is 0. See the documentation of `dnadiff` for further details: -- `max_gap`: Alignments within this distance are bundled together. Default: 1000000 (1 Mbp). -- `min_bundle_size`: After bundling, any bundle smaller than this size is filtered out. Default: 1000 (1 Kbp). -- `plot_1_vs_all`: Set it to 1 to create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly. Set it to 0 to create a single plot for each target assembly against each reference assembly. This joint plot is also created when `plot_1_vs_all` is set to 1. Default: 0. -- `color_by_contig`: Set it to 1 to color the synteny plot by contig. Set it to 0 to color the synteny plot by the number of links in a bundle. Default: 1. - -> ⚙️ From conf/test_full.config - -```groovy -synteny { - - assembly_seq_list = [ - ["FI1", "./docs/test_files/FI1.seq.list"] - ] - - xref_assemblies = [ - [ - "TT_2021a", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz", - "./docs/test_files/TT_2021a.seq.list" - ], - ] -} -``` diff --git a/docs/images/FI1.gt.stat.yml.gene.length.png b/docs/images/FI1.gt.stat.yml.gene.length.png new file mode 100644 index 00000000..6d693b9d Binary files /dev/null and b/docs/images/FI1.gt.stat.yml.gene.length.png differ diff --git a/docs/images/busco_figure.png b/docs/images/busco_figure.png new file mode 100644 index 00000000..e108de18 Binary files /dev/null and b/docs/images/busco_figure.png differ diff --git a/docs/images/hic_map.png b/docs/images/hic_map.png new file mode 100644 index 00000000..e33868cd Binary files /dev/null and b/docs/images/hic_map.png differ diff --git a/docs/images/kraken2.jpg b/docs/images/kraken2.jpg new file mode 100644 index 00000000..5cba553c Binary files /dev/null and b/docs/images/kraken2.jpg differ diff --git a/docs/images/synteny.png b/docs/images/synteny.png new file mode 100644 index 00000000..edd8bc4a Binary files /dev/null and b/docs/images/synteny.png differ diff --git a/docs/images/tidk.png b/docs/images/tidk.png new file mode 100644 index 00000000..ca84135f Binary files /dev/null and b/docs/images/tidk.png differ diff --git a/docs/minimal_example.md b/docs/minimal_example.md deleted file mode 100644 index 44ae7796..00000000 --- a/docs/minimal_example.md +++ /dev/null @@ -1,191 +0,0 @@ -# Quick Start: A Minimal Example - -- [Quick Start: A Minimal Example](#quick-start-a-minimal-example) - - [Step 0: System Prerequisites](#step-0-system-prerequisites) - - [Step 1: Setting up the Data](#step-1-setting-up-the-data) - - [Fasta Files](#fasta-files) - - [Gene Annotations (Optional)](#gene-annotations-optional) - - [Step 2: Skipping Optional Modules](#step-2-skipping-optional-modules) - - [Step 3: Setting Max. Resources](#step-3-setting-max-resources) - - [Step 4a: Setting the Apptainer Cache Directory](#step-4a-setting-the-apptainer-cache-directory) - - [Step 4b: Setting up Docker](#step-4b-setting-up-docker) - - [Example Minimal Config File](#example-minimal-config-file) - - [Step 5: Running the Pipeline](#step-5-running-the-pipeline) - - [Running on Plant\&Food Research Slurm](#running-on-plantfood-research-slurm) - - [Running on a Single Machine](#running-on-a-single-machine) - - [Running on Executors other than Slurm](#running-on-executors-other-than-slurm) - - [AssemblyQC Report](#assemblyqc-report) - -## Step 0: System Prerequisites - -1. A single computer with linux or a linux-based compute cluster. -2. NextFlow >= 22.04.3 -3. Apptainer or Docker - -## Step 1: Setting up the Data - -The pipeline can QC multiple assemblies in parallel. All these assemblies should be in fasta format. - -### Fasta Files - -The pipeline configuration is stored in the 'nextflow.config' file. In this file, add the fasta files (fasta, fasta.gz) to the `target_assemblies` variable under the `params` score. Here is an example: - -```groovy -target_assemblies = [ - ["assembly1", "./test_data/test_data1.fasta.gz"], - ["assembly2", "/output/genomes/test_genome/all_genomic.fsa"], - ["assembly3", "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz"] -] -``` - -Notice that `target_assemblies` is a list of lists. Each sub-list represents an assembly. Each sub-list must have two members. First, a unique tag that represents the assembly. This tag is used by the pipeline to identify this assembly across QC modules. This tag should only consist of alphanumeric characters (A-Za-z0-9\_). Second, the path to the fasta file (fasta, fasta.gz). This path can be a relative, absolute storage path or a valid publicly accessible URL. - -### Gene Annotations (Optional) - -If one or more of these assemblies have gene annotation files, these files should be in gff3 format (gff3, gff3.gz). These files are specified in the `assembly_gff3` parameter. The rules for specifying the gff3 files are same as those for the fasta files. Here is an example: - -```groovy -assembly_gff3 = [ - ["assembly2", "/output/genomes/test_genome/all_genes.gff3"] -] -``` - -Notice that only one of assemblies have annotation. This is a perfectly valid specification. If none of the assemblies has any annotation, the correct configuration is: - -```groovy -assembly_gff3 = [] -``` - -## Step 2: Skipping Optional Modules - -Some of the modules in the QC pipeline are optional. These modules can be skipped by setting their `skip` flag to `1`. These skip flags are found under the modules' configuration scopes under the `params` scope. If a module is skipped, all its remaining parameters are ignored by the pipeline. - -This minimal example sets all the skip flags to 1. - -## Step 3: Setting Max. Resources - -The resources needed for the various modules in the pipeline are dynamically allocated using resource-classes defined in the 'conf/base.config' file. Instead of tweaking these classes, the user can conveniently cap the maximum allowed resources by changing the `max_cpus`, `max_memory` and `max_time` variables in the 'nextflow.config' file. This example caps the maximum time to 1 hour as each module in this example can be executed within an hour. - -```groovy -max_time = 1.hour -``` - -> [!NOTE] -> Maximum values defined by `max_cpus`, `max_memory` and `max_time` apply to each process in the pipeline. The pipeline executes multiple processes in parallel. Therefore, the total execution time is not equal to the sum of time taken by each process. Rather, the total time is determined by adding up the time taken by processes which run one after the other. An estimate of the total time maybe needed if the pipeline is submitted to an executor such as Slurm. This topic is covered later in this document. - -## Step 4a: Setting the Apptainer Cache Directory - -> See [Step 4b](#step-4b-setting-up-docker) if you are using docker. - -The pipeline uses version controlled apptainer containers so that its results are reproducible across systems. These apptainer containers are automatically downloaded by the pipeline when it runs for the first time. The containers are then stored for later runs in the folder specified by the `cacheDir` parameter under the `apptainer` scope inside the 'nextflow.config' file. - -When downloading these containers, the pipeline can fail due to connection issues. In such a case, the pipeline should be resumed with the `-resume` flag. For more on the resume capability, see the NextFlow [documentation](https://www.nextflow.io/docs/latest/getstarted.html?highlight=resume#modify-and-resume). It may be a good idea to test run the pipeline with a small dataset so that it can download the necessary containers. Moreover, the `cacheDir` should not be changed afterwards. Otherwise, the pipeline will have to download the required containers again. - -## Step 4b: Setting up Docker - -If you are using docker, the docker daemon should be up and running. That's all. NextFlow will automatically handle container download and setup. - -## Example Minimal Config File - -An example minimal config file based on publicly accessible data is provided with the pipeline. See the 'conf/test_minimal.config' file in the project directory. Its contents are pasted here: - -```groovy -params { - target_assemblies = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz" - ], - ] - - assembly_gff3 = [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz" - ], - ] - - ncbi_fcs_adaptor { skip = 1 } - ncbi_fcs_gx { skip = 1 } - busco { skip = 1 } - tidk { skip = 1 } - lai { skip = 1 } - kraken2 { skip = 1 } - hic { skip = 1 } - synteny { skip = 1 } - - outdir = "./results" - - max_time = 1.hour -} - -apptainer { - cacheDir = "/workspace/assembly_qc/singularity" -} -``` - -## Step 5: Running the Pipeline - -The next sections explain how to run the pipeline on Plant&Food Research Slurm cluster, a single machine or other executors. - -### Running on Plant&Food Research Slurm - -To submit the pipeline to Slurm for execution, first create a submission script with the following bash commands: - -```bash -cat << EOF > pfr_assemblyqc -#!/bin/bash -e - -#SBATCH --job-name ASM_QC -#SBATCH --time=01:00:00 -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=1 -#SBATCH --output pfr_assemblyqc.stdout -#SBATCH --error pfr_assemblyqc.stderr -#SBATCH --mem=4G - -ml unload perl -ml apptainer/1.1 -ml nextflow/23.04.4 - -export TMPDIR="/workspace/$USER/tmp" -export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp" - -nextflow main.nf -profile pfr -resume -c ./conf/test_minimal.config -EOF -``` - -The overall time is specified as 1 hour. This is the time for which the NextFlow process is allowed to run by Slurm. This is an estimate of the total execution time and is based on the assumption that the parallel execution of all the processes in this minimal example can be completed within 1 hour. - -Similarly, the script specifies the number of CPUs and memory required for running NextFLow. These resources are only for running NextFlow and not the individual modules. Therefore, 1 CPU with 4 GBs of memory is adequate. - -The next 4 lines starting with ml specify the environment modules used by the pipeline. These names of these modules are system dependent. Refer to your system manuals to find out the modules which satisfy the requirements listed in [Step 0: System Prerequisites](#step-0-system-prerequisites). - -The `export TMPDIR` directory specifies the location of the temporary directory. This is system specific and should be specified by referring to the system manuals. - -The last line executes the pipeline implemented in the `main.nf` file with profile slurm and `-resume` flag. - -After creating the slurm submission script, submit to slurm as follows: - -```bash -sbatch ./pfr_assemblyqc -``` - -### Running on a Single Machine - -To run the pipeline on a single machine, make sure that the maximum resources specified by `max_cpus` and `max_memory` variables in the 'nextflow.config' file are suitable for your machine. Moreover, the minimum software required [Step 0: System Prerequisites](#step-0-system-prerequisites) should be available on the machine. Finally, the pipeline can be executed with the following command. - -```bash -nextflow main.nf -profile local,docker -resume -c ./conf/test_minimal.config -``` - -Notice that the `-profile` parameter is now set to `local,docker` in the NextFlow execution command. - -### Running on Executors other than Slurm - -To execute the pipeline on a executor other than Slurm, you will first have to create a profile for the target executor. See the existing profiles in the 'conf/base.config' file. Detailed documentation is available in NextFlow [docs](https://www.nextflow.io/docs/latest/executor.html) and nf-core [docs](https://nf-co.re/docs/usage/tutorials/step_by_step_institutional_profile). - -## AssemblyQC Report - -Once the pipeline has finished execution, the results folder specified in the config file should contain a file named 'report.html'. The 'report.html' is a standalone file for all the modules except HiC and Kraken2. Thus, if you move the report to another folder, make sure to also move the 'hic' folder and the 'kraken2' folder with it. diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 00000000..b215f5c6 --- /dev/null +++ b/docs/output.md @@ -0,0 +1,216 @@ +# plant-food-research-open/assemblyqc: Output + +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the AssemblyQC report which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + + + +- [FASTA and GFF3 validation](#fasta-and-gff3-validation) +- [Assemblathon stats](#assemblathon-stats) +- [Genometools gt stat](#genometools-gt-stat) +- [NCBI FCS adaptor](#ncbi-fcs-adaptor) +- [NCBI FCS GX](#ncbi-fcs-gx) +- [BUSCO](#busco) +- [TIDK](#tidk) +- [LAI](#lai) +- [Kraken2](#kraken2) +- [HiC contact map](#hic-contact-map) +- [Synteny](#synteny) +- [Pipeline information](#pipeline-information) + +### FASTA and GFF3 validation + +The pipeline prints a warning in the pipeline log if FASTA or GFF3 validation fails. The error log from the validator is reported in the `report.html`. The remaining QC tools are skipped for the assembly with invalid fasta file. + +### Assemblathon stats + +
+Output files + +- `assemblathon_stats/` + - `*_stats.csv`: Assembly stats in CSV format. + +
+ +`assemblathon_stats.pl` is a script which calculate a basic set of metrics from a genome assembly. + +> [!WARNING] +> Contig-related stats are based on the assumption that `assemblathon_stats_n_limit` is specified correctly. If you are not certain of the value of `assemblathon_stats_n_limit`, please ignore the contig-related stats. + +### Genometools gt stat + +
+Output files + +- `genometools_gt_stat/` + - `*.gt.stat.yml`: Assembly annotation stats in yaml format. + +
+ +GenomeTools `gt stat` tool calculates a basic set of statistics about features contained in GFF3 files. + +
AssemblyQC - GenomeTools gt stat gene length distribution
AssemblyQC - GenomeTools gt stat gene length distribution
+ +### NCBI FCS adaptor + +
+Output files + +- `ncbi_fcs_adaptor/` + - `*_fcs_adaptor_report.tsv`: NCBI FCS adaptor report in CSV format. + +
+ +[FCS-adaptor detects](https://github.com/ncbi/fcs/wiki/FCS-adaptor#rules-for-action-assignment) adaptor and vector contamination in genome sequences. + +### NCBI FCS GX + +
+Output files + +- `ncbi_fcs_gx/` + - `*.taxonomy.rpt`: [Taxonomy report](https://github.com/ncbi/fcs/wiki/FCS-GX-taxonomy-report#taxonomy-report-output-). + - `*.fcs_gx_report.txt`: A final report of [recommended actions](https://github.com/ncbi/fcs/wiki/FCS-GX#outputs). + - `*.inter.tax.rpt.tsv`: [Select columns](../modules/local/ncbi_fcs_gx_krona_plot.nf) from `*.taxonomy.rpt` used for generation of a Krona taxonomy plot. + - `*.fcs.gx.krona.cut`: Taxonomy file for Krona plot [created](../modules/local/ncbi_fcs_gx_krona_plot.nf) from `*.inter.tax.rpt.tsv`. + - `*.fcs.gx.krona.html`: Interactive Krona taxonomy plot. + +
+ +[FCS-GX detects](https://github.com/ncbi/fcs/wiki/FCS-GX#outputs) contamination from foreign organisms in genome sequences. + +### BUSCO + +
+Output files + +- `busco/` + - `busco_figure.png`: Summary figure created from all the BUSCO summaries. + - `tag` + - `short_summary.specific.*_odb10.tag_*.txt`: BUSCO summary for the assembly represented by `tag`. + +
+ +[BUSCO estimates](https://busco.ezlab.org/busco_userguide.html) the completeness and redundancy of processed genomic data based on universal single-copy orthologs. + +
AssemblyQC - BUSCO summary plot
AssemblyQC - BUSCO summary plot
+ +### TIDK + +
+Output files + +- `tidk/` + - `*.apriori.tsv`: Frequencies for successive windows in forward and reverse directions for the pre-specified telomere-repeat sequence. + - `*.apriori.svg`: Plot of `*.apriori.tsv`. + - `*.tidk.explore.tsv`: List of the most frequent repeat sequences. + - `*.top.sequence.txt`: The top sequence from `*.tidk.explore.tsv`. + - `*.aposteriori.tsv`: Frequencies for successive windows in forward and reverse directions for the top sequence from `*.top.sequence.txt`. + - `*.aposteriori.svg`: Plot of `*.aposteriori.tsv`. + +
+ +TIDK toolkit is designed to [identify and visualize](https://github.com/tolkit/telomeric-identifier) telomeric repeats for the Darwin Tree of Life genomes. + +
AssemblyQC - TIDK plot
AssemblyQC - TIDK plot
+ +### LAI + +
+Output files + +- `lai/` + - `*.LAI.log`: Log file from LAI + - `*.LAI.out`: Output file from LAI which lists assembly index by contig and for the whole genome. + - `*.LTRlib.fa`: Long terminal repeat library generated by LTR_retriever. + - `*.restored.ids.gff3`: Long terminal repeat annotation generated by LTR_retriever. + - `*.short.ids.tsv`: LTR_retriever and LAI require that the assembly sequence IDs are alphanumeric and not more than 13 characters long. If needed, the pipeline shortens these IDS. The new and original IDs are listed in this TSV file. + +
+ +LTR Assembly Index (LAI) is a reference-free genome metric that [evaluates assembly continuity](https://doi.org/10.1093/nar/gky730) using LTR-RTs. LTR retrotransposons (LTR-RTs) are the predominant interspersed repeat that is poorly assembled in draft genomes. Correcting for LTR-RT amplification dynamics, LAI is independent of genome size, genomic LTR-RT content, and gene space evaluation metrics such as BUSCO. LAI = Raw LAI + 2.8138 × (94 – whole genome LTR identity). The LAI is set to 0 when raw LAI = 0 or the adjustment produces a negative value. Raw LAI = (Intact LTR element length / Total LTR sequence length) \* 100 + +### Kraken2 + +
+Output files + +- `kraken2/` + - `*.kraken2.report`: [Kraken2 report](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats). + - `*.kraken2.cut`: [Kraken2 output](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats). + - `*.kraken2.krona.cut`: [Select columns](../modules/local/kraken2_krona_plot.nf) from `*.kraken2.cut` used for generation of a Krona taxonomy plot. + - `*.kraken2.krona.html`: Interactive Krona taxonomy plot. + +
+ +Kraken2 [assigns taxonomic labels](https://ccb.jhu.edu/software/kraken2/) to sequencing reads for metagenomics projects. Further reading regarding performance of Kraken2: + +
AssemblyQC - Interactive Krona plot from Kraken2 taxonomy
AssemblyQC - Interactive Krona plot from Kraken2 taxonomy
+ +### HiC contact map + +
+Output files + +- `hic/` + - `fastqc_raw/` + - `*_1_fastqc.html/*_2_fastqc.html`: FASTQC html report for the raw reads + - `*_1_fastqc.zip/*_2_fastqc.zip`: FASTQC stats for the raw reads + - `fastp/` + - `*.fastp.html`: FASTP HTML report + - `*.fastp.json`: FASTP statistics in JSON format + - `*.fastp.log`: FASTP log + - `*_1.fastp.fastq.gz/*_2.fastp.fastq.gz`: Reads passed by FASTP + - `*_1.fail.fastq.gz/*_2.fail.fastq.gz`: Reads failed by FASTP + - `fastqc_trim/` + - `*_1_fastqc.html/*_2_fastqc.html`: FASTQC html report for the reads passed by FASTP + - `*_1_fastqc.zip/*_2_fastqc.zip`: FASTQC stats for the reads passed by FASTP + - `hicqc` + - `*.on.*_qc_report.pdf`: HiC QC report for reads mapped to an assembly. + - `assembly/` + - `*.agp.assembly`: AGP assembly file listing the length of each contig in the assembly + - `bedpe/` - `*.assembly.bedpe`: `*.agp.assembly` file converted to BEDPE to highlight the contigs on the HiC contact map. +
+ +Hi-C contact mapping experiments measure the frequency of physical contact between loci in the genome. The resulting dataset, called a “contact map,” is represented using a [two-dimensional heatmap](https://github.com/igvteam/juicebox.js) where the intensity of each pixel indicates the frequency of contact between a pair of loci. + +
AssemblyQC - HiC interactive contact map
AssemblyQC - HiC interactive contact map
+ +### Synteny + +
+Output files + +- `synteny/` + - `*.*.all/`: Synteny files corresponding to all contigs of the target assembly with respect to all contig of the reference assembly. + - `*.on.*.all.png/svg`: Synteny plot generated with CIRCOS. + - `bundled.links.tsv`: Bundled links file generated with MUMMER and `dnadiff.pl`. + - `circos.conf`: CIRCOS configuration file used to generate the synteny plot. + - `karyotype.tsv`: Karyotype TSV file used to generate the synteny plot. + - `*.on.*.*`: Synteny files corresponding to a single contig of the target assembly with respect to all contigs of the reference assembly. +
+ +Synteny plots are created with Circos which is a tool [facilitating](https://circos.ca) the identification and analysis of similarities and differences arising from comparisons of genomes. The genome-wide alignments are performed with [MUMMER](https://github.com/mummer4/mummer?tab=readme-ov-file) and bundled with [`dnadiff.pl`](https://github.com/mummer4/mummer/blob/master/scripts/dnadiff.pl). + +
AssemblyQC - Synteny plot
AssemblyQC - Synteny plot
+ +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.html`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/parameters.md b/docs/parameters.md new file mode 100644 index 00000000..5be39404 --- /dev/null +++ b/docs/parameters.md @@ -0,0 +1,121 @@ +# plant-food-research-open/assemblyqc pipeline parameters + +A NextFlow pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report. + +## Input/output options + +| Parameter | Description | Type | Default | Required | Hidden | +| --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | --------- | -------- | ------ | +| `input` | Input assembly sheet in CSV format
HelpFASTA and other associated files for input assemblies provided as a formatted CSV file
| +| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | ./results | True | | +| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you | + +## General stats options + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ------- | -------- | ------ | +| `assemblathon_stats_n_limit` | The number of 'N's for the unknown gap size
HelpThis number is used to split the scaffolds into contigs to compute contig-r | + +## NCBI FCS options + +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `ncbi_fcs_adaptor_skip` | Skip NCBI FCS Adaptor checking | `boolean` | True | | | +| `ncbi_fcs_adaptor_empire` | Empire for NCBI FCS Adaptor checking
Help'euk' for eukaryotes, or 'prok' for prokaryotes
| `string` | | | +| `ncbi_fcs_gx_skip` | Skip NCBI FCS external organism contamination checking | `boolean` | True | | | +| `ncbi_fcs_gx_tax_id` | Tax ID for NCBI FCS GX
HelpGet correct tax ID from https://www.ncbi.nlm.nih.gov/taxonomy
| `number` | | | | +| `ncbi_fcs_gx_db_path` | Path to NCBI FCS GX database
HelpNCBI FCS GX DB path

Due to enormity of the DB size, the pipeline does NOT download the dat | + +## BUSCO options + +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | ------- | -------- | ------ | +| `busco_skip` | Skip BUSCO | `boolean` | True | | | +| `busco_mode` | BUSCO mode
Help'geno' or 'genome' for genome assemblies (DNA), 'tran' or 'transcriptome' for transcriptome assemblies (DNA), 'prot' or 'pro | +| `busco_lineage_datasets` | BUSCO lineages
HelpEach input assembly is assessed against each lineage. It should be provided as a space-separated list of lin | +| `busco_download_path` | Download path for BUSCO
HelpBUSCO DB download path

The pipeline automatically downloads the required DB if needed
HelpPlant: TTTAGGG, Fungus, Vertebrates: TTAGGG, Insect: TTAGG
| `string` | | | | +| `tidk_filter_by_size` | Filter size in base-pairs | `boolean` | | | | +| `tidk_filter_size_bp` | Filter size in base-pairs | `integer` | 1000000 | | | + +## LAI options + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------- | ------------------- | --------- | ------- | -------- | ------ | +| `lai_skip` | Skip LAI estimation | `boolean` | True | | | + +## Kraken2 options + +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------- | --------------------- | --------- | ------- | -------- | ------ | +| `kraken2_skip` | Skip Kraken2 | `boolean` | True | | | +| `kraken2_db_path` | Kraken2 database path | `string` | | | | + +## HiC options + +| Parameter | Description | Type | Default | Required | Hidden | +| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------- | ------------------------------------------------- | -------- | ------ | +| `hic` | HiC reads
HelpPath to reads provided as a SRA ID or as a path to paired reads with pattern '\*{1,2}.(fastq | fq).gz'
| `string` | | +| `hic_skip_fastp` | Skip HiC read trimming | `boolean` | | | | +| `hic_skip_fastqc` | Skip HiC read QC | `boolean` | | | | +| `hic_fastp_ext_args` | Additional parameters for fastp trimming | `string` | --qualified_quality_phred 20 --length_required 50 | | | + +## Synteny options + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `synteny_skip` | Skip synteny analysis | `boolean` | True | | | +| `synteny_between_input_assemblies` | Create syntenic plots between each pair of input assemblies | `boolean` | True | | | +| `synteny_many_to_many_align` | Include alignment blocks with many-to-many mappings (dnadiff .mcoords file) | `boolean` | | | | +| `synteny_max_gap` | Alignments within this distance are bundled together | `integer` | 1000000 | | | +| `synteny_min_bundle_size` | After bundling, any bundle smaller than this size is filtered out | `integer` | 1000 | | | +| `synteny_plot_1_vs_all` | Create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly | `boolean` | True | | | +| `synteny_color_by_contig` | Synteny plot is colored by contig | `boolean` | True | | | +| `synteny_xref_assemblies` | Reference assemblies for synteny analysis
HelpFASTA and synteny label tsv files should be provided in a formatted CSV file HelpUse to set an upper-limit for the CPU requirement for each process. Shou | +| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each proces | +| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Sho | + +## Institutional config options + +Parameters used to describe centralised config profiles. These should not be edited. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | -------- | ------ | +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional confi | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + +## Generic options + +Less common options for the pipeline, typically set in a config file. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `help` | Display help text. | `boolean` | | | True | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files sho | +| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is com | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.HelpBy default, parameters set as _hidden_ in the schema are not shown on the command l | +| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
HelpBy default, when an unrecognised paramete | +| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable as numbers or booleans. For further inform | diff --git a/docs/test_params/test_agc.json b/docs/test_params/test_agc.json deleted file mode 100644 index 451a82ad..00000000 --- a/docs/test_params/test_agc.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "target_assemblies": [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz" - ] - ], - "assembly_gff3": [ - [ - "FI1", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz" - ] - ], - "assemblathon_stats": { - "n_limit": 100 - }, - "ncbi_fcs_adaptor": { - "empire": "euk" - }, - "ncbi_fcs_gx": { - "tax_id": "35717", - "db_path": "s3://agc-246592587427-ap-southeast-2/ComparativeDataSources/ncbi/fcs/gxdb/r2023-01-24" - }, - "busco": { - "mode": "geno", - "lineage_datasets": ["fungi_odb10", "hypocreales_odb10"], - "download_path": null - }, - "tidk": { - "repeat_seq": "TTAGGG" - }, - "lai": { - "monoploid_seqs": [ - [ - "FI1", - "https://raw.githubusercontent.com/Plant-Food-Research-Open/assembly_qc/main/docs/test_files/FI1.monoploid.seqs.txt" - ] - ] - }, - "kraken2": { - "db_path": "s3://agc-246592587427-ap-southeast-2/ComparativeDataSources/kraken2db/k2_pluspfp_20231009" - }, - "hic": { - "paired_reads": "SRR8238190" - }, - "synteny": { - "assembly_seq_list": [ - [ - "FI1", - "https://raw.githubusercontent.com/Plant-Food-Research-Open/assembly_qc/main/docs/test_files/FI1.seq.list" - ] - ], - "xref_assemblies": [ - [ - "TT_2021a", - "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz", - "https://raw.githubusercontent.com/Plant-Food-Research-Open/assembly_qc/main/docs/test_files/TT_2021a.seq.list" - ] - ] - }, - "outdir": "s3://agc-246592587427-ap-southeast-2/output/assembly_qc/tests/test-full-results", - "max_cpus": 96, - "max_memory": "512.GB", - "max_time": "6.hours" -} diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..1f2bd37c --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,247 @@ +# plant-food-research-open/assemblyqc: Usage + +## Assemblysheet input + +You will need to create an assemblysheet with information about the assemblies you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. An [example assemblysheet](../assets/assemblysheet.csv) has been provided with the pipeline. Its fields are: + +- `tag:` A unique tag which represents the target assembly throughout the pipeline and in the final report +- `fasta:` FASTA file +- `gff3 [Optional]:` GFF3 annotation file if available +- `monoploid_ids [Optional]:` A txt file listing the IDs used to calculate LAI in monoploid mode if necessary +- `synteny_labels [Optional]:` A two column tsv file listing fasta sequence ids (first column) and their labels for the synteny plots (second column) when performing synteny analysis + +## External databases + +### NCBI FCS GX database + +If NCBI FCS GX foreign organism contamination check is executed by setting `ncbi_fcs_gx_skip` to `false`, the path to the GX database must be provided with option `ncbi_fcs_gx_db_path`. The user must ensure that the database is correctly downloaded and placed in a location accessible to the pipeline. Setup instructions are available at . The database path must contain following files: + +```bash +all.assemblies.tsv +all.blast_div.tsv.gz +all.gxi +all.gxs +all.manifest +all.meta.jsonl +all.README.txt +all.seq_info.tsv.gz +all.taxa.tsv +``` + +### Kraken2 + +Path to Kraken2 database is provided by the `kraken2_db_path` parameter. This can be a URL to a public `.tar.gz` file such as `https://genome-idx.s3.amazonaws.com/kraken/k2_pluspfp_20240112.tar.gz`. The pipeline can download and extract the database. This is not the recommended practice owing to the size of the database. Rather, the database should be downloaded, extracted and stored in a read-only location. The path to that location can be provided by the `kraken2_db_path` parameter such as `/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314`. + +### BUSCO + +BUSCO lineage databases are downloaded and updated by the BUSCO tool itself. A persistent location for the database can be provided by specifying `busco_download_path` parameter. + +## Other parameters + +### Assemblathon stats + +`assemblathon_stats_n_limit` is the number of 'N's for the unknown gap size. This number is used to split the scaffolds into contigs to compute contig-related stats. NCBI's recommendation for unknown gap size is 100 . + +### NCBI FCS adaptor + +`ncbi_fcs_adaptor_empire` should be set to `euk` for Eukaryotes and `prok` for Prokaryotes. + +### NCBI FCS GX + +`ncbi_fcs_gx_tax_id` is the taxonomy ID for all the assemblies listed in the assemblysheet. A taxonomy ID can be obtained by searching a _Genus species_ at . + +### BUSCO + +- `busco_mode`: `geno` or `genome` for genome assemblies (DNA), `tran` or `transcriptome` for transcriptome assemblies (DNA), and `prot` or `proteins` for annotated gene sets (protein). +- `busco_lineage_datasets`: A space-separated list of BUSCO lineages. Any number of lineages can be specified such as "fungi_odb10 hypocreales_odb10". Each assembly is assessed against each of the listed lineage. To select a lineage, refer to . + +### TIDK + +- `tidk_repeat_seq`: The telomere search sequence. To select an appropriate sequence, see . Commonly used sequences are TTTAGGG (Plant), TTAGGG (Fungus, Vertebrates) and TTAGG (Insect). Further reading: +- `tidk_filter_by_size`: Set this flag to `true` to filter out assembly sequences smaller than the size specified by the next parameter (default: `false`). +- `tidk_filter_size_bp`: Minimum size of the assembly sequence processed by TIDK (default: 1000000 (1Mbp)). + +### HiC + +- `hic`: Path to reads provided as a SRA ID or as a path to paired reads with pattern '\*{1,2}.(fastq|fq).gz' +- `hic_skip_fastp`: Skip fastp trimming +- `hic_skip_fastqc`: Skip QC by fastqc +- `hic_fastp_ext_args`: Additional arguments for fastp (default: '--qualified_quality_phred 20 --length_required 50') + +### Synteny analysis + +- `synteny_between_input_assemblies`: Set it to `true` to create synteny plots between each pair of input assemblies. Default is `true`. +- `synteny_many_to_many_align`: Set it to `true` to include alignment blocks with many-to-many mappings or set to `false` to only include 1-to-1 mappings. Default is `false`. See the documentation of `dnadiff` for further details: +- `synteny_max_gap`: Alignments within this distance are bundled together. Default: 1000000 (1 Mbp). +- `synteny_min_bundle_size`: After bundling, any bundle smaller than this size is filtered out. Default: 1000 (1 Kbp) +- `synteny_plot_1_vs_all`: Set it to `true` to create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly. Set it to `false` to create a single plot for each target assembly against each reference assembly. This joint plot is also created when `plot_1_vs_all` is set to `true`. Default: `false` +- `synteny_color_by_contig`: Set it to `true` to color the synteny plot by contig. Set it to `false` to color the synteny plot by the number of links in a bundle. Default: `true` +- `synteny_xref_assemblies`: Similar to `--input`, this parameter also provides a CSV sheet listing external reference assemblies which are included in the synteny analysis but are not analysed by other QC tools. See the [example xrefsheet](../assets/xrefsheet.csv) included with the pipeline. Its fields are: + - `tag:` A unique tag which represents the reference assembly in the final report + - `fasta:` FASTA file + - `synteny_labels:` A two column tsv file listing fasta sequence ids (first column) and their labels for the synteny plots (second column) + +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```bash +nextflow run plant-food-research-open/assemblyqc --input ./assemblysheet.csv --outdir ./results -profile docker +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run plant-food-research-open/assemblyqc -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: "./assemblysheet.csv" +outdir: "./results/" +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull plant-food-research-open/assemblyqc +``` + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [plant-food-research-open/assemblyqc releases page](https://github.com/plant-food-research-open/assemblyqc/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. + +## Core Nextflow arguments + +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +> [!TIP] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 00000000..4ce590c0 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,351 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = NfcoreTemplate.version(workflow) + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() + } + + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100644 index 00000000..8d030f4e --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowAssemblyqc.groovy b/lib/WorkflowAssemblyqc.groovy new file mode 100755 index 00000000..5ec2cc32 --- /dev/null +++ b/lib/WorkflowAssemblyqc.groovy @@ -0,0 +1,100 @@ +// +// This file holds several functions specific to the workflow/assemblyqc.nf in the plant-food-research-open/assemblyqc pipeline +// + +import nextflow.Nextflow +import groovy.text.SimpleTemplateEngine +import groovy.json.JsonOutput + +class WorkflowAssemblyqc { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + // Check for ncbi_fcs_adaptor_empire + if (!params.ncbi_fcs_adaptor_skip && !params.ncbi_fcs_adaptor_empire) { + Nextflow.error('ncbi_fcs_adaptor_empire must be provided when executing NCBI FCS Adaptor') + } + + // Check for ncbi_fcs_gx_tax_id + if (!params.ncbi_fcs_gx_skip && !params.ncbi_fcs_gx_tax_id) { + Nextflow.error('ncbi_fcs_gx_tax_id must be provided when executing NCBI FCS GX') + } + + // Check for ncbi_fcs_gx_db_path + if (!params.ncbi_fcs_gx_skip && !params.ncbi_fcs_gx_db_path) { + Nextflow.error('ncbi_fcs_gx_db_path must be provided when executing NCBI FCS GX') + } + + // Check for busco_mode + if (!params.busco_skip && !params.busco_mode) { + Nextflow.error("busco_mode must be provided when executing BUSCO") + } + + // Check for busco_lineage_datasets + if (!params.busco_skip && !params.busco_lineage_datasets) { + Nextflow.error('busco_lineage_datasets must be provided when executing BUSCO') + } + + // Check for tidk_repeat_seq + if (!params.tidk_skip && !params.tidk_repeat_seq) { + Nextflow.error('tidk_repeat_seq must be provided when executing TIDK') + } + + // Check for kraken2_db_path + if (!params.kraken2_skip && !params.kraken2_db_path) { + Nextflow.error('kraken2_db_path must be provided when executing Kraken2') + } + } + + public static ArrayList validateInput(input) { + def inputFields = 5 + def assemblyTags = input[(0..input.size()-1).step(inputFields)] + + def tagCounts = [:] + assemblyTags.each { tag -> + tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1 + } + def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key } + + if (repeatedTags.size() > 0) { + Nextflow.error("Please check input assemblysheet -> Multiple assemblies have the same tags!: ${repeatedTags}") + } + + return input + } + + public static ArrayList validateXrefAssemblies(xref) { + def xrefFields = 3 + def xrefTags = xref[(0..xref.size()-1).step(xrefFields)] + + def tagCounts = [:] + xrefTags.each { tag -> + tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1 + } + def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key } + + if (repeatedTags.size() > 0) { + Nextflow.error("Please check synteny_xref_assemblies -> Multiple xref assemblies have the same tags!: ${repeatedTags}") + } + + return xref + } + + public static String jsonifyParams(params) { + return JsonOutput.toJson(params).toString() + } + + public static String jsonifySummaryParams(params) { + + def summary = [:] + for (group in params.keySet()) { + for (subgroup in params[group].keySet()) { + if ( params[group][subgroup] ) { summary << [ "$subgroup": "${params[group][subgroup]}" ] } + } + } + + return JsonOutput.toJson(summary).toString() + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 00000000..abfc7acf --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,57 @@ +// +// This file holds several functions specific to the main.nf workflow in the plant-food-research-open/assemblyqc pipeline +// + +import nextflow.Nextflow + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.10647870\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) + } + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + } + // + // Get attribute from genome config file e.g. fasta + // + public static Object getGenomeAttribute(params, attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null + } +} diff --git a/main.nf b/main.nf index 6d7dc34c..34f2ebb3 100755 --- a/main.nf +++ b/main.nf @@ -1,9 +1,69 @@ #!/usr/bin/env nextflow +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/assemblyqc +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/plant-food-research-open/assemblyqc +---------------------------------------------------------------------------------------- +*/ -nextflow.enable.dsl=2 +nextflow.enable.dsl = 2 -include { ASSEMBLY_QC } from './workflows/assembly_qc.nf' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input assemblysheet.csv --outdir ./results -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +WorkflowMain.initialise(workflow, params, log) + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { ASSEMBLYQC } from './workflows/assemblyqc' + +// +// WORKFLOW: Run main plant-food-research-open/assemblyqc analysis pipeline +// +workflow PLANTFOODRESEARCHOPEN_ASSEMBLYQC { + ASSEMBLYQC () +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// workflow { - ASSEMBLY_QC() + PLANTFOODRESEARCHOPEN_ASSEMBLYQC () } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json index da4cff8c..a25581b4 100644 --- a/modules.json +++ b/modules.json @@ -1,25 +1,30 @@ { - "name": "", - "homePage": "", + "name": "plant-food-research-open/assemblyqc", + "homePage": "https://github.com/plant-food-research-open/assemblyqc", "repos": { "git@github.com:PlantandFoodResearch/nxf-modules.git": { "modules": { "pfr": { "bwa/index": { "branch": "main", - "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] }, "bwa/mem": { "branch": "main", - "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] }, "cat/cat": { "branch": "main", - "git_sha": "a2c6aa54917a232ac543d88edc5d3c3d8bf71ef3", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fasta_ltrretriever_lai"] }, + "custom/checkgff3fastacorrespondence": { + "branch": "main", + "git_sha": "1a76f884082c786760559c462063a5d1de94ca83", + "installed_by": ["gff3_validate"] + }, "custom/restoregffids": { "branch": "main", "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f", @@ -30,29 +35,44 @@ "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c", "installed_by": ["fasta_ltrretriever_lai"] }, - "edta/ltrharvest": { + "gt/gff3": { + "branch": "main", + "git_sha": "bfa4874d3942bdff70cb8df17322834125cafb28", + "installed_by": ["gff3_validate"] + }, + "gt/gff3validator": { + "branch": "main", + "git_sha": "889b9b57b611dcb063594608c2a222c928327cba", + "installed_by": ["gff3_validate"] + }, + "gt/stat": { "branch": "main", - "git_sha": "5a3fa7e2643328a95ced3632b761edea710abe07", + "git_sha": "cb5fb0be78a98fd1e32b7c90d6adf8c3bf44133e", + "installed_by": ["modules"] + }, + "ltrfinder": { + "branch": "main", + "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", "installed_by": ["fasta_ltrretriever_lai"] }, - "lai": { + "ltrharvest": { "branch": "main", - "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fasta_ltrretriever_lai"] }, - "ltrfinder": { + "ltrretriever/lai": { "branch": "main", - "git_sha": "529f900f88ac36fded55f89e92b9dc2ee2c2b813", + "git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d", "installed_by": ["fasta_ltrretriever_lai"] }, - "ltrretriever": { + "ltrretriever/ltrretriever": { "branch": "main", - "git_sha": "47555fcab8a3871c618e779729e109b47b7b5974", + "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", "installed_by": ["fasta_ltrretriever_lai"] }, "samblaster": { "branch": "main", - "git_sha": "32ef0605651e46c3f5720ed55d21dc35ec371001", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] } } @@ -61,13 +81,18 @@ "pfr": { "fasta_ltrretriever_lai": { "branch": "main", - "git_sha": "957519612296ca52e6c070883557a25beeabca55", + "git_sha": "f148f5384395618fc706b6e2f059bd1ce037d06c", "installed_by": ["subworkflows"] }, "fastq_bwa_mem_samblaster": { "branch": "main", "git_sha": "9639ac9a556898d0f0e8592bff24585c33326458", "installed_by": ["subworkflows"] + }, + "gff3_validate": { + "branch": "main", + "git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9", + "installed_by": ["subworkflows"] } } } @@ -75,12 +100,76 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", + "installed_by": ["modules"] + }, + "fastavalidator": { + "branch": "master", + "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", + "installed_by": ["fastq_trim_fastp_fastqc"] + }, + "fastqc": { + "branch": "master", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "installed_by": ["fastq_trim_fastp_fastqc"] + }, + "gunzip": { + "branch": "master", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "installed_by": ["modules"] + }, + "seqkit/seq": { + "branch": "master", + "git_sha": "687ad41c14008d3d55cf7c2ffacebe6a057211a4", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "seqkit/sort": { + "branch": "master", + "git_sha": "ffcdfb354f8c8e59bdccc1a4249f0f3b232c2a3d", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/explore": { + "branch": "master", + "git_sha": "fad335b31b32d78f89a8340373d3ccfc0f2b18b5", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/plot": { + "branch": "master", + "git_sha": "d54cd1d823f151854825da8ffe7289bcbf77206b", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, + "tidk/search": { + "branch": "master", + "git_sha": "669a329f4aa37f5b7f03776c2ed1cd0ef122c626", + "installed_by": ["fasta_explore_search_plot_tidk"] + }, "untar": { "branch": "master", "git_sha": "e719354ba77df0a1bd310836aa2039b45c29d620", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "fasta_explore_search_plot_tidk": { + "branch": "master", + "git_sha": "2b21fbeb20ad9f17612f4a3dd7b12971513f08d5", + "installed_by": ["subworkflows"] + }, + "fastq_trim_fastp_fastqc": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/agp2_assembly.nf b/modules/local/agp2_assembly.nf deleted file mode 100644 index b010c712..00000000 --- a/modules/local/agp2_assembly.nf +++ /dev/null @@ -1,21 +0,0 @@ -nextflow.enable.dsl=2 - -process AGP2_ASSEMBLY { - tag "$sample_id_on_tag" - label "process_single" - - container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" - publishDir "${params.outdir}/hic/assembly", mode:'copy' - - input: - tuple val(sample_id_on_tag), path(agp_file) - - output: - tuple val(sample_id_on_tag), path("*.agp.assembly"), emit: agp_assembly_file - - script: - """ - assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') - agp2assembly.py $agp_file "\${assembly_tag}.agp.assembly" - """ -} diff --git a/modules/local/agp2assembly.nf b/modules/local/agp2assembly.nf new file mode 100644 index 00000000..ee615dd6 --- /dev/null +++ b/modules/local/agp2assembly.nf @@ -0,0 +1,28 @@ +process AGP2ASSEMBLY { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" + + input: + tuple val(sample_id_on_tag), path(agp_file) + + output: + tuple val(sample_id_on_tag), path("*.agp.assembly"), emit: assembly + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def VERSION = '0.1.0' + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + agp2assembly.py $agp_file "\${assembly_tag}.agp.assembly" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + juicebox_scripts: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/assemblathon_stats.nf b/modules/local/assemblathon_stats.nf index 7acd55b9..3ef37de5 100644 --- a/modules/local/assemblathon_stats.nf +++ b/modules/local/assemblathon_stats.nf @@ -1,40 +1,59 @@ -nextflow.enable.dsl=2 - - process ASSEMBLATHON_STATS { - tag "${hap_name}" - label "process_single" + tag "${asm_tag}" + label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "conda-forge::perl" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - publishDir "${params.outdir}/assemblathon_stats", mode: 'copy' + 'nf-core/ubuntu:20.04' }" input: - tuple val(hap_name), path(fasta_file) + tuple val(asm_tag), path(fasta_file) + val n_limit output: - path "${hap_name}_stats.csv" + path "${asm_tag}_stats.csv" , emit: stats + path 'versions.yml' , emit: versions - script: - """ - paths_to_check=\$(printf "%s\\n" \$(echo \$PATH | tr ':' ' ') \ - | xargs -I {} find {} -maxdepth 0 -print 2>/dev/null \ - | grep -v '^\$' \ - | grep -v '/sbin' \ - | xargs) + when: + task.ext.when == null || task.ext.when - falite_path="\$(find \$paths_to_check -name FAlite_943e0fb.pm)" - - ln -s "\$falite_path" FAlite_943e0fb.pm - - PERL5LIB=./ assemblathon_stats_943e0fb.pl \ - -n ${params.assemblathon_stats.n_limit} \ - -csv \ + script: + def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2" + """ + paths_to_check=\$(printf "%s\\n" \$(echo \$PATH | tr ':' ' ') \\ + | xargs -I {} find {} -maxdepth 0 -print 2>/dev/null \\ + | grep -v '^\$' \\ + | grep -v '/sbin' \\ + | xargs + ) + + falite_path="\$(find \$paths_to_check -name FAlite_a93cba2.pm)" + + ln -s "\$falite_path" FAlite_a93cba2.pm + + PERL5LIB=./ assemblathon_stats_a93cba2.pl \\ + -n $n_limit \\ + -csv \\ "${fasta_file}" - csv_file_name=\$(ls | grep "csv") - mv \$csv_file_name "${hap_name}_stats.csv" - """ + csv_file_name=\$(ls | grep "csv") + mv \$csv_file_name "${asm_tag}_stats.csv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + assemblathon_stats: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2" + """ + touch "${asm_tag}_stats.csv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + assemblathon_stats: $VERSION + END_VERSIONS + """ } diff --git a/modules/local/assembly2_bedpe.nf b/modules/local/assembly2_bedpe.nf deleted file mode 100644 index d2c05113..00000000 --- a/modules/local/assembly2_bedpe.nf +++ /dev/null @@ -1,21 +0,0 @@ -nextflow.enable.dsl=2 - -process ASSEMBLY2_BEDPE { - tag "$sample_id_on_tag" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - publishDir "${params.outdir}/hic/bedpe", mode:'copy' - - input: - tuple val(sample_id_on_tag), path(agp_assembly_file) - - output: - tuple val(sample_id_on_tag), path("*.assembly.bedpe"), emit: agp_assembly_bedpe_file - - script: - """ - assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') - assembly_2_bedpe_943e0fb.py $agp_assembly_file > "\${assembly_tag}.assembly.bedpe" - """ -} diff --git a/modules/local/assembly2bedpe.nf b/modules/local/assembly2bedpe.nf new file mode 100644 index 00000000..48e99f6c --- /dev/null +++ b/modules/local/assembly2bedpe.nf @@ -0,0 +1,40 @@ +process ASSEMBLY2BEDPE { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + tuple val(sample_id_on_tag), path(agp_assembly_file) + + output: + tuple val(sample_id_on_tag), path("*.assembly.bedpe") , emit: bedpe + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + assembly2bedpe.py $agp_assembly_file > "\${assembly_tag}.assembly.bedpe" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + END_VERSIONS + """ + + stub: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + touch "\${assembly_tag}.assembly.bedpe" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/biocode_gff3_stats.nf b/modules/local/biocode_gff3_stats.nf deleted file mode 100644 index 9f52aba6..00000000 --- a/modules/local/biocode_gff3_stats.nf +++ /dev/null @@ -1,22 +0,0 @@ -nextflow.enable.dsl=2 - -process BIOCODE_GFF3_STATS { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/biocode:0.10.0--pyhdfd78af_0': - 'quay.io/biocontainers/biocode:0.10.0--pyhdfd78af_0' }" - publishDir "${params.outdir}/biocode_gff3_stats", mode: 'copy' - - input: - tuple val(tag_label), path(gff3_file) - - output: - path "${tag_label}_stats.csv" - - script: - """ - report_gff3_statistics.py --input_file "$gff3_file" &>> "${tag_label}_stats.csv" || true - """ -} diff --git a/modules/local/busco.nf b/modules/local/busco.nf new file mode 100644 index 00000000..1c8b3f46 --- /dev/null +++ b/modules/local/busco.nf @@ -0,0 +1,56 @@ +process BUSCO { + tag "${asm_tag}:${lineage_dataset}" + label 'process_high' + + conda "bioconda::busco=5.6.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.6.1--pyhdfd78af_0': + 'biocontainers/busco:5.6.1--pyhdfd78af_0' }" + + input: + tuple val(asm_tag), path(fasta_file) + val lineage_dataset + val mode + val download_path + + output: + path "${asm_tag}/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt" , emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def lineages_path = download_path ? "--download_path ${download_path}" : '' + lineage_initials = "${lineage_dataset}".split("_")[0] + + """ + busco \\ + -m ${mode} \\ + -o ${asm_tag} \\ + -i $fasta_file \\ + -l ${lineage_dataset} \\ + $lineages_path \\ + -c ${task.cpus} + + mv $asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}.txt \\ + $asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ + + stub: + lineage_initials = "${lineage_dataset}".split("_")[0] + """ + mkdir -p $asm_tag + touch $asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/local/busco_plot.nf b/modules/local/busco_plot.nf new file mode 100644 index 00000000..e8f70189 --- /dev/null +++ b/modules/local/busco_plot.nf @@ -0,0 +1,42 @@ +process BUSCO_PLOT { + tag 'all summaries' + label 'process_single' + + conda "bioconda::busco=5.6.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.6.1--pyhdfd78af_0': + 'biocontainers/busco:5.6.1--pyhdfd78af_0' }" + + input: + path "short_summary.*", stageAs: 'busco/*' + + output: + path 'busco/*.png' , emit: png + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + generate_plot.py \\ + -wd ./busco + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ + + stub: + """ + mkdir -p busco + + touch busco/summary_plot.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/local/circos.nf b/modules/local/circos.nf new file mode 100644 index 00000000..8311e694 --- /dev/null +++ b/modules/local/circos.nf @@ -0,0 +1,151 @@ +process CIRCOS { + tag "${target_on_ref_seq}" + label 'process_single' + + container "docker.io/gallvp/circos-tools:v0.23-1_ps" + + input: + tuple val(target_on_ref_seq), path(karyotype), path(bundle_file) + + output: + path "*.svg", emit: svg_file + path "*.png", emit: png_file + path "bundled.links.tsv", emit: bundled_links_tsv + path "circos.conf", emit: circos_conf + path "karyotype.tsv", emit: karyotype_tsv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + links_count=\$(wc -l < "$bundle_file") + max_links=20000 + if [ "\$links_count" -gt "\$max_links" ]; then + echo "Link count exceeded \$max_links for ${bundle_file}." + echo "Try to shrink the number of links by increasing the max_gap and min_bundle_size options in the config file." + exit 1 + fi + + cat $karyotype > "karyotype.tsv" + cat $bundle_file | awk '{print \$1,\$2,\$3,\$4,\$5,\$6,\$7}' OFS="\\t" > bundled.links.tsv + + num_sequences=\$(cat $karyotype | wc -l) + if (( \$num_sequences <= 10 )); then + label_font_size=40 + elif (( \$num_sequences <= 30 )); then + label_font_size=30 + else + label_font_size=15 + fi + + if (( \$num_sequences <= 10 )); then + ticks_config=" + radius = dims(ideogram,radius_outer) + orientation = out + label_multiplier = 1e-6 + color = black + thickness = 5p + label_offset = 5p + + spacing = 0.5u + size = 10p + show_label = yes + label_size = 20p + format = %.1f + + + spacing = 1.0u + size = 15p + show_label = yes + label_size = 30p + format = %.1f + + " + + label_offset=" + 120p" + else + ticks_config="" + + label_offset=" + 25p" + fi + + cat <<-END_CONF > circos.conf + # circos.conf + karyotype = $karyotype + + + + default = 0.005r + + + radius = 0.8r + thickness = 25p + fill = yes + stroke_thickness = 0 + + show_label = yes + label_font = default + label_radius = dims(ideogram,radius_outer)\$label_offset + label_size = \$label_font_size + label_parallel = yes + + + + radius = 0.99r + crest = 1 + ribbon = yes + flat = yes + stroke_thickness = 0 + color = grey_a3 + + bezier_radius = 0r + bezier_radius_purity = 0.5 + + file = bundled.links.tsv + + + + show_ticks = yes + show_tick_labels = yes + chromosomes_units = 1000000 + chromosomes_display_default = yes + + \$ticks_config + + + <> + + <> + <> + END_CONF + + circos + + mv circos.svg "${target_on_ref_seq}.svg" + mv circos.png "${target_on_ref_seq}.png" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circos: \$(circos -version | awk '{print \$2}' FS='|' | tr -d '[:space:]') + perl: \$(circos -version | awk '{print \$4}' FS='|' | tr -d '[:space:]Perl') + END_VERSIONS + """ + + stub: + """ + touch ${target_on_ref_seq}.svg + touch ${target_on_ref_seq}.png + + touch bundled.links.tsv + touch circos.conf + touch karyotype.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circos: \$(circos -version | awk '{print \$2}' FS='|' | tr -d '[:space:]') + perl: \$(circos -version | awk '{print \$4}' FS='|' | tr -d '[:space:]Perl') + END_VERSIONS + """ +} diff --git a/modules/local/circos_bundlelinks.nf b/modules/local/circos_bundlelinks.nf new file mode 100644 index 00000000..d8f5d9f8 --- /dev/null +++ b/modules/local/circos_bundlelinks.nf @@ -0,0 +1,39 @@ +process CIRCOS_BUNDLELINKS { + tag "${target_on_ref}" + label 'process_single' + + container "docker.io/gallvp/circos-tools:v0.23-1_ps" + + input: + tuple val(target_on_ref), path(coords_file), path(report_file) + val max_gap + val min_bundle_size + + output: + tuple val(target_on_ref), path("*.xcoords.bundle.txt") , emit: links + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def VERSION='24Sep2013' + """ + cat \\ + $coords_file \\ + | awk '{print \$12,\$1,\$2,\$13,\$3,\$4}' OFS="\\t" \\ + > "\$(basename $coords_file).links.txt" + + /usr/share/circos/tools/bundlelinks/bin/bundlelinks \\ + -links "\$(basename $coords_file).links.txt" \\ + -max_gap $max_gap \\ + -min_bundle_size $min_bundle_size \\ + 1> "\$(basename $coords_file).bundle.txt" \\ + 2> bundlelinks.err + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bundlelinks: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/colourbundlelinks.nf b/modules/local/colourbundlelinks.nf new file mode 100644 index 00000000..6d753984 --- /dev/null +++ b/modules/local/colourbundlelinks.nf @@ -0,0 +1,48 @@ +process COLOURBUNDLELINKS { + tag "${target_on_ref}" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + tuple val(target_on_ref), path(bundle_links) + val color_by_contig + + output: + tuple val(target_on_ref), path("*.xcoords.bundle.coloured.txt") , emit: coloured_links + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def color_by_contig_bash = color_by_contig ? '1' : '0' + """ + if [[ "$color_by_contig_bash" = "1" ]];then + colorbundlesbycontig.py \\ + "${bundle_links}" \\ + > "\$(basename $bundle_links .bundle.txt).bundle.coloured.txt" + else + colorbundlesbysize.pl \\ + -i="${bundle_links}" \\ + -o="\$(basename $bundle_links .bundle.txt).bundle.coloured.txt" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + perl: \$(perl --version |& sed -n 's/.*v\\([0-9]\\+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p') + END_VERSIONS + """ + + stub: + """ + touch "\$(basename $bundle_links .bundle.txt).bundle.coloured.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + perl: \$(perl --version |& sed -n 's/.*v\\([0-9]\\+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/local/create_report.nf b/modules/local/create_report.nf deleted file mode 100644 index 703e375a..00000000 --- a/modules/local/create_report.nf +++ /dev/null @@ -1,33 +0,0 @@ -nextflow.enable.dsl=2 - -process CREATE_REPORT { - tag "all modules" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - publishDir params.outdir, mode: 'copy' - - input: - path ncbi_fcs_adaptor_reports, stageAs: 'ncbi_fcs_adaptor_reports/*' - path fcs_gx_reports, stageAs: 'fcs_gx_reports/*' - path assemblathon_stats, stageAs: 'assemblathon_stats/*' - path genometools_gt_stats, stageAs: 'genometools_gt_stat/*' - path biocode_gff3_stats, stageAs: 'biocode_gff3_stats/*' - path busco_outputs, stageAs: 'busco_outputs/*' - path tidk_plots, stageAs: 'tidk_outputs/*' - path lai_outputs, stageAs: 'lai_outputs/*' - path kraken2_outputs, stageAs: 'kraken2_outputs/*' - path hic_outputs, stageAs: 'hic_outputs/*' - path circos_outputs, stageAs: 'circos_outputs/*' - val params_json - - output: - path 'report.html' - path 'report.json' - - script: - """ - echo -n '$params_json' > params_json.json - assembly_qc_report_943e0fb.py > report.html - """ -} diff --git a/modules/local/createreport.nf b/modules/local/createreport.nf new file mode 100644 index 00000000..820f106e --- /dev/null +++ b/modules/local/createreport.nf @@ -0,0 +1,65 @@ +process CREATEREPORT { + tag "AssemblyQC" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + path fastavalidator_logs, stageAs: 'fastavalidator_logs/*' + path gff3_validate_logs, stageAs: 'gff3_validate_logs/*' + path ncbi_fcs_adaptor_reports, stageAs: 'ncbi_fcs_adaptor_reports/*' + path fcs_gx_reports, stageAs: 'fcs_gx_reports/*' + path assemblathon_stats, stageAs: 'assemblathon_stats/*' + path genometools_gt_stats, stageAs: 'genometools_gt_stat/*' + path busco_outputs, stageAs: 'busco_outputs/*' + path tidk_plots, stageAs: 'tidk_outputs/*' + path lai_outputs, stageAs: 'lai_outputs/*' + path kraken2_outputs, stageAs: 'kraken2_outputs/*' + path hic_outputs, stageAs: 'hic_outputs/*' + path circos_outputs, stageAs: 'circos_outputs/*' + path versions + val params_json + val params_summary_json + + output: + path 'report.html' , emit: html + path 'report.json' , emit: json + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + echo \\ + -n \\ + '$params_json' \\ + > params_json.json + + echo \\ + -n \\ + '$params_summary_json' \\ + > params_summary_json.json + + assemblyqc.py \\ + > report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + END_VERSIONS + """ + + stub: + """ + touch report.html + touch report.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/dnadiff.nf b/modules/local/dnadiff.nf new file mode 100644 index 00000000..772244e6 --- /dev/null +++ b/modules/local/dnadiff.nf @@ -0,0 +1,53 @@ +process DNADIFF { + tag "${target_on_ref}" + label 'process_single' + + container "docker.io/staphb/mummer:4.0.0" + + input: + tuple val(target_on_ref), path(target_fasta), path(ref_fasta), path(dnadiff_file) + val many_to_many_align + + output: + tuple val(target_on_ref), path("*.xcoords"), path("*.report") , emit: coords + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def inter_extension = many_to_many_align ? 'mcoords' : '1coords' + def out_extension = many_to_many_align ? 'm.xcoords' : '1.xcoords' + """ + cat \\ + $dnadiff_file \\ + | sed '1s/.*/${ref_fasta} ${target_fasta}/' \\ + > ${target_on_ref}.sed.delta + + dnadiff \\ + -p $target_on_ref \\ + -d ${target_on_ref}.sed.delta + + cat \\ + "${target_on_ref}.${inter_extension}" \\ + > "${target_on_ref}.${out_extension}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dnadiff: \$(dnadiff -v |& sed -n '/DNAdiff version/ s/DNAdiff version //p') + END_VERSIONS + """ + + stub: + def inter_extension = many_to_many_align ? 'mcoords' : '1coords' + def out_extension = many_to_many_align ? 'm.xcoords' : '1.xcoords' + """ + touch "${target_on_ref}.${out_extension}" + touch "${target_on_ref}.report" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dnadiff: \$(dnadiff -v |& sed -n '/DNAdiff version/ s/DNAdiff version //p') + END_VERSIONS + """ +} diff --git a/modules/local/filtersortfasta.nf b/modules/local/filtersortfasta.nf new file mode 100644 index 00000000..4b184abf --- /dev/null +++ b/modules/local/filtersortfasta.nf @@ -0,0 +1,53 @@ +process FILTERSORTFASTA { + tag "${target}.on.${reference}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1': + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(target), path(target_fasta), path(target_txt), val(reference), path(ref_fasta), path(ref_txt) + + output: + tuple val(target), val(reference), path("filtered.ordered.target.fasta"), path("filtered.ordered.ref.fasta"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + validateseqlists.sh \\ + "$target_txt" \\ + "$ref_txt" + + samtools \\ + faidx \\ + $target_fasta \\ + \$(awk '{print \$1}' $target_txt) \\ + > filtered.ordered.target.fasta + + samtools \\ + faidx \\ + $ref_fasta \\ + \$(awk '{print \$1}' $ref_txt) \\ + > filtered.ordered.ref.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(samtools --version | sed -n '1s/samtools//p') + END_VERSIONS + """ + + stub: + """ + touch filtered.ordered.target.fasta + touch filtered.ordered.ref.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(samtools --version | sed -n '1s/samtools//p') + END_VERSIONS + """ +} diff --git a/modules/local/generatekaryotype.nf b/modules/local/generatekaryotype.nf new file mode 100644 index 00000000..0135b8a1 --- /dev/null +++ b/modules/local/generatekaryotype.nf @@ -0,0 +1,59 @@ +process GENERATEKARYOTYPE { + tag "${target_on_ref}.${seq_tag}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(target_on_ref), val(seq_tag), path(split_bundle_file), path(target_seq_len), path(ref_seq_len) + + output: + tuple val("${target_on_ref}.${seq_tag}"), path("*.karyotype") , emit: karyotype + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -W version | sed -n 's/mawk //p') + grep: \$(grep --version | sed -n '/grep (GNU grep) /s/grep //p') + sed: \$(sed --version | sed -n 's/^sed //p') + END_VERSIONS + + + ref_seqs=(\$(awk '{print \$1}' $split_bundle_file | sort | uniq)) + + if [ \${#ref_seqs[@]} -eq 0 ]; then + touch "${target_on_ref}.${seq_tag}.karyotype" + exit 0 + fi + + tmp_file=\$(mktemp) + printf '%s\\n' "\${ref_seqs[@]}" > "\$tmp_file" + + if [[ $seq_tag = "all" ]];then + cat $target_seq_len > filtered.target.seq.len + else + grep -w "$seq_tag" $target_seq_len > filtered.target.seq.len + fi + cat filtered.target.seq.len | awk '{print \$1,\$2,"grey"}' OFS="\\t" > colored.filtered.target.seq.len + + grep -w -f "\$tmp_file" $ref_seq_len > filtered.ref.seq.len + cat filtered.ref.seq.len | awk '{print \$1,\$2,"black"}' OFS="\\t" > colored.filtered.ref.seq.len + + cat colored.filtered.ref.seq.len | sort -k1V > merged.seq.lengths + cat colored.filtered.target.seq.len | sort -k1Vr >> merged.seq.lengths + sed -i '/^\$/d' merged.seq.lengths + + cat merged.seq.lengths \ + | awk '{print "chr -",\$1,\$1,"0",\$2-1,\$3}' OFS="\\t" \ + > "${target_on_ref}.${seq_tag}.karyotype" + + rm "\$tmp_file" + """ +} diff --git a/modules/local/genometools_gt_stat.nf b/modules/local/genometools_gt_stat.nf deleted file mode 100644 index e7bd29c1..00000000 --- a/modules/local/genometools_gt_stat.nf +++ /dev/null @@ -1,24 +0,0 @@ -nextflow.enable.dsl=2 - - -process GENOMETOOLS_GT_STAT { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.2--py310he7ef181_3': - 'quay.io/biocontainers/genometools-genometools:1.6.2--py310he7ef181_3' }" - - publishDir "${params.outdir}/genometools_gt_stat", mode: 'copy' - - input: - tuple val(hap_name), path(gff3_file) - - output: - path "${hap_name}_stats.csv" - - script: - """ - gt stat "${gff3_file}" | sed 's/:/,/1' > "${hap_name}_stats.csv" - """ -} diff --git a/modules/local/getfastalength.nf b/modules/local/getfastalength.nf new file mode 100644 index 00000000..fc12b2ef --- /dev/null +++ b/modules/local/getfastalength.nf @@ -0,0 +1,55 @@ +process GETFASTALENGTH { + tag "${target}.on.${reference}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1': + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(target), val(reference), path(filtered_ordered_target_fasta), path(filtered_ordered_ref_fasta) + + output: + tuple val("${target}.on.${reference}"), path("target.seq.lengths"), path("ref.seq.lengths") , emit: length + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + samtools \\ + faidx \\ + $filtered_ordered_target_fasta + + samtools \\ + faidx \\ + $filtered_ordered_ref_fasta + + cat \\ + "${filtered_ordered_target_fasta}.fai"\\ + | awk '{print \$1, \$2}' OFS="\\t" \\ + > target.seq.lengths + + cat \\ + "${filtered_ordered_ref_fasta}.fai" \\ + | awk '{print \$1, \$2}' OFS="\\t" \\ + > ref.seq.lengths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(samtools --version | sed -n '1s/samtools//p') + END_VERSIONS + """ + + stub: + """ + touch target.seq.lengths + touch ref.seq.lengths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(samtools --version | sed -n '1s/samtools//p') + END_VERSIONS + """ +} diff --git a/modules/local/gzip_fasta.nf b/modules/local/gzip_fasta.nf deleted file mode 100644 index 747a0ce1..00000000 --- a/modules/local/gzip_fasta.nf +++ /dev/null @@ -1,24 +0,0 @@ -nextflow.enable.dsl=2 - -process GZIP_FASTA { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(tag_label), path(fasta_file) - - output: - tuple val(tag_label), path("*.gzip.fa") - - script: - """ - input_file_name_var="\$(basename $fasta_file .gz)" - output_file_name="\${input_file_name_var%.*}.gzip.fa" - - gzip -cdf $fasta_file > \$output_file_name - """ -} diff --git a/modules/local/hic2html.nf b/modules/local/hic2html.nf new file mode 100644 index 00000000..0b907cde --- /dev/null +++ b/modules/local/hic2html.nf @@ -0,0 +1,27 @@ +process HIC2HTML { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + tuple val(sample_id_on_tag), path(hic_file) + + output: + path "*.html" , emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + file_name="$hic_file" + hic2html.py "$hic_file" > "\${file_name%.*}.html" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + END_VERSIONS + """ +} diff --git a/modules/local/hic_qc.nf b/modules/local/hic_qc.nf deleted file mode 100644 index b2f63f31..00000000 --- a/modules/local/hic_qc.nf +++ /dev/null @@ -1,23 +0,0 @@ -nextflow.enable.dsl=2 - -process HIC_QC { - tag "$sample_id_on_tag" - label "process_single" - - publishDir "${params.outdir}/hic/hic_qc", mode:'copy' - container "docker.io/gallvp/hic_qc:6881c33_ps" - - input: - tuple val(sample_id_on_tag), path(dedup_bam) - - output: - tuple val(sample_id_on_tag), path("*.pdf") - - script: - """ - hic_qc.py \ - -n 10000000 \ - -b "${dedup_bam}" \ - --outfile_prefix "$sample_id_on_tag" - """ -} diff --git a/modules/local/hicqc.nf b/modules/local/hicqc.nf new file mode 100644 index 00000000..102ff099 --- /dev/null +++ b/modules/local/hicqc.nf @@ -0,0 +1,39 @@ +process HICQC { + tag "$meta.id" + label 'process_single' + + container "docker.io/gallvp/hic_qc:6881c33_ps" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + hic_qc.py \\ + -n 10000000 \\ + -b $bam \\ + --outfile_prefix "$meta.id" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hic_qc.py: \$(hic_qc.py --version) + END_VERSIONS + """ + + stub: + """ + touch "${meta.id}.pdf" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hic_qc.py: \$(hic_qc.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/juicer_sort.nf b/modules/local/juicer_sort.nf index 4d7c15d1..4b0645bd 100644 --- a/modules/local/juicer_sort.nf +++ b/modules/local/juicer_sort.nf @@ -1,24 +1,31 @@ -nextflow.enable.dsl=2 - process JUICER_SORT { tag "$sample_id_on_tag" - label "process_high" + label 'process_high' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: - tuple val(sample_id_on_tag), path(out_links_txt) + tuple val(sample_id_on_tag), path(out_links_txt) output: - tuple val(sample_id_on_tag), path("*sorted.links.txt"), emit: sorted_links_txt_file + tuple val(sample_id_on_tag), path("*sorted.links.txt") , emit: links + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: - """ - sort --parallel=${task.cpus} \ - -k2,2 -k6,6 \ - $out_links_txt \ + """ + sort --parallel=${task.cpus} \\ + -k2,2 -k6,6 \\ + $out_links_txt \\ > out.sorted.links.txt - """ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sort: \$(sort --version | sed -n '/sort (GNU coreutils) / s/sort (GNU coreutils) //p') + END_VERSIONS + """ } diff --git a/modules/local/kraken2.nf b/modules/local/kraken2.nf new file mode 100644 index 00000000..53f722ac --- /dev/null +++ b/modules/local/kraken2.nf @@ -0,0 +1,46 @@ +process KRAKEN2 { + tag "${asm_tag}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kraken2:2.1.2--pl5321h9f5acd7_2': + 'biocontainers/kraken2:2.1.2--pl5321h9f5acd7_2' }" + + input: + tuple val(asm_tag), path(fasta_file) + path db_path + + output: + tuple val(asm_tag), path("*.kraken2.cut"), path("*.kraken2.report") , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + kraken2 \\ + --output "${asm_tag}.kraken2.cut" \\ + --report "${asm_tag}.kraken2.report" \\ + --use-names \\ + --db $db_path \\ + --threads ${task.cpus} \\ + $fasta_file > kraken2.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch "${asm_tag}.kraken2.cut" + touch "${asm_tag}.kraken2.report" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/kraken2_krona_plot.nf b/modules/local/kraken2_krona_plot.nf new file mode 100644 index 00000000..c4d73275 --- /dev/null +++ b/modules/local/kraken2_krona_plot.nf @@ -0,0 +1,27 @@ +process KRAKEN2_KRONA_PLOT { + tag "${hap_name}" + label 'process_single' + + container "docker.io/nanozoo/krona:2.7.1--e7615f7" + + input: + tuple val(hap_name), path(kraken2_cut), path(kraken2_report) + + output: + tuple path("*.kraken2.krona.cut"), path("*.kraken2.krona.html") , emit: plot + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + perl -lane '@a=split /\\t/; if (\$a[2] =~ /taxid\\s+(\\d+)/) {print "\$a[1]\\t\$1\\t1\\t\$a[3]";}' $kraken2_cut > "${hap_name}.kraken2.krona.cut" + ktImportTaxonomy -i -o "${hap_name}.kraken2.krona.html" -m "4" "${hap_name}.kraken2.krona.cut" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1') + END_VERSIONS + """ +} diff --git a/modules/local/make_agp_from_fasta.nf b/modules/local/make_agp_from_fasta.nf deleted file mode 100644 index 7734eed7..00000000 --- a/modules/local/make_agp_from_fasta.nf +++ /dev/null @@ -1,20 +0,0 @@ -nextflow.enable.dsl=2 - -process MAKE_AGP_FROM_FASTA { - tag "$sample_id_on_tag" - label "process_single" - - container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" - - input: - tuple val(sample_id_on_tag), path(assembly_fasta) - - output: - tuple val(sample_id_on_tag), path("*.agp"), emit: agp_file - - script: - """ - file_name="$assembly_fasta" - makeAgpFromFasta.py $assembly_fasta "\${file_name%%.*}.agp" - """ -} diff --git a/modules/local/makeagpfromfasta.nf b/modules/local/makeagpfromfasta.nf new file mode 100644 index 00000000..3b6a4e79 --- /dev/null +++ b/modules/local/makeagpfromfasta.nf @@ -0,0 +1,28 @@ +process MAKEAGPFROMFASTA { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" + + input: + tuple val(sample_id_on_tag), path(assembly_fasta) + + output: + tuple val(sample_id_on_tag), path("*.agp") , emit: agp + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def VERSION = '0.1.0' + """ + file_name="$assembly_fasta" + makeAgpFromFasta.py $assembly_fasta "\${file_name%%.*}.agp" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + juicebox_scripts: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/matlock_bam2_juicer.nf b/modules/local/matlock_bam2_juicer.nf index a37f7bd1..dadb03d2 100644 --- a/modules/local/matlock_bam2_juicer.nf +++ b/modules/local/matlock_bam2_juicer.nf @@ -1,21 +1,29 @@ -nextflow.enable.dsl=2 - process MATLOCK_BAM2_JUICER { tag "$sample_id_on_tag" - label "process_single" + label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/matlock:20181227--h4b03ef3_3': - 'quay.io/biocontainers/matlock:20181227--h4b03ef3_3' }" + 'biocontainers/matlock:20181227--h4b03ef3_3' }" input: - tuple val(sample_id_on_tag), path(hic_bam_scaffolds) + tuple val(sample_id_on_tag), path(hic_bam_scaffolds) output: - tuple val(sample_id_on_tag), path("out.links.txt") + tuple val(sample_id_on_tag), path("out.links.txt") , emit: links + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: - """ - matlock bam2 juicer $hic_bam_scaffolds out.links.txt - """ + def VERSION = '20181227' + """ + matlock bam2 juicer $hic_bam_scaffolds out.links.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + matlock: $VERSION + END_VERSIONS + """ } diff --git a/modules/local/mummer.nf b/modules/local/mummer.nf new file mode 100644 index 00000000..401dbaa1 --- /dev/null +++ b/modules/local/mummer.nf @@ -0,0 +1,41 @@ +process MUMMER { + tag "${target}.on.${reference}" + label 'process_high' + + container "docker.io/staphb/mummer:4.0.0" + + input: + tuple val(target), val(reference), path(target_fasta), path(ref_fasta) + + output: + tuple val("${target}.on.${reference}"), path("*.delta") , emit: delta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + nucmer \ + --mum \\ + -t ${task.cpus} \\ + -p "${target}.on.${reference}" \\ + $ref_fasta \\ + $target_fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nucmer: \$(nucmer -V) + END_VERSIONS + """ + + stub: + """ + touch "${target}.on.${reference}.delta" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nucmer: \$(nucmer -V) + END_VERSIONS + """ +} diff --git a/modules/local/ncbi_fcs_adaptor.nf b/modules/local/ncbi_fcs_adaptor.nf new file mode 100644 index 00000000..ec1b3dd0 --- /dev/null +++ b/modules/local/ncbi_fcs_adaptor.nf @@ -0,0 +1,58 @@ +process NCBI_FCS_ADAPTOR { + tag "${asm_tag}" + label 'process_single' + + // Warning: manually update version in script and stub + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.4.0/fcs-adaptor.sif': + 'docker.io/ncbi/fcs-adaptor:0.4.0' }" + + input: + tuple val(asm_tag), path(fasta_file) + val empire + + output: + tuple val(asm_tag), path("${asm_tag}_fcs_adaptor_report.tsv") , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_ADAPTOR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def VERSION = 0.4 + """ + mkdir "${asm_tag}_outputdir" + + /app/fcs/bin/av_screen_x \\ + -o "${asm_tag}_outputdir" \\ + --${empire} \\ + "${fasta_file}" + + mv "${asm_tag}_outputdir/fcs_adaptor_report.txt" \\ + "./${asm_tag}_fcs_adaptor_report.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + av_screen_x: $VERSION + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_ADAPTOR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def VERSION = 0.4 + """ + touch "${asm_tag}_fcs_adaptor_report.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + av_screen_x: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/ncbi_fcs_gx_krona_plot.nf b/modules/local/ncbi_fcs_gx_krona_plot.nf new file mode 100644 index 00000000..853689dc --- /dev/null +++ b/modules/local/ncbi_fcs_gx_krona_plot.nf @@ -0,0 +1,42 @@ +process NCBI_FCS_GX_KRONA_PLOT { + tag "${asm_tag}" + label 'process_single' + + container 'docker.io/nanozoo/krona:2.7.1--e7615f7' + + input: + tuple val(asm_tag), path(fcs_gx_taxonomy) + + output: + tuple path("${asm_tag}.inter.tax.rpt.tsv"), path("${asm_tag}.fcs.gx.krona.cut"), path("${asm_tag}.fcs.gx.krona.html") , emit: plot + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_GX_KRONA_PLOT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + cat $fcs_gx_taxonomy \\ + | awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \\ + > "${asm_tag}.inter.tax.rpt.tsv" + + cat "${asm_tag}.inter.tax.rpt.tsv" \\ + | awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \\ + > "${asm_tag}.fcs.gx.krona.cut" + + cat "${asm_tag}.inter.tax.rpt.tsv" \\ + | awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \\ + >> "${asm_tag}.fcs.gx.krona.cut" + + ktImportTaxonomy -i -o "${asm_tag}.fcs.gx.krona.html" -m "4" "${asm_tag}.fcs.gx.krona.cut" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1') + END_VERSIONS + """ +} diff --git a/modules/local/ncbi_fcs_gx_screen_samples.nf b/modules/local/ncbi_fcs_gx_screen_samples.nf new file mode 100644 index 00000000..f9ed5cc8 --- /dev/null +++ b/modules/local/ncbi_fcs_gx_screen_samples.nf @@ -0,0 +1,57 @@ +process NCBI_FCS_GX_SCREEN_SAMPLES { + tag 'all samples' + label 'process_high' + + conda "bioconda::ncbi-fcs-gx=0.5.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-fcs-gx:0.5.0--h4ac6f70_3': + 'biocontainers/ncbi-fcs-gx:0.5.0--h4ac6f70_3' }" + + input: + path samples + path db_path + val tax_id + + output: + path "*.fcs_gx_report.txt" , emit: fcs_gx_reports + path "*.taxonomy.rpt" , emit: fcs_gx_taxonomies + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def VERSION = 0.5 + """ + for sample_fasta in $samples; + do + sample_tag=\$(echo "\$sample_fasta" | sed 's/fasta.file.for.//g' | sed 's/.fasta//g') + run_gx.py --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${tax_id}" + + mv "\${sample_fasta%.fasta}.${tax_id}.fcs_gx_report.txt" "\${sample_tag}.fcs_gx_report.txt" + mv "\${sample_fasta%.fasta}.${tax_id}.taxonomy.rpt" "\${sample_tag}.taxonomy.rpt" + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fcs_gx: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = 0.5 + """ + for sample_fasta in $samples; + do + sample_tag=\$(echo "\$sample_fasta" | sed 's/fasta.file.for.//g' | sed 's/.fasta//g') + + touch "\${sample_tag}.fcs_gx_report.txt" + touch "\${sample_tag}.taxonomy.rpt" + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fcs_gx: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/ncbi_fcs_gx_setup_sample.nf b/modules/local/ncbi_fcs_gx_setup_sample.nf new file mode 100644 index 00000000..8b36ad8f --- /dev/null +++ b/modules/local/ncbi_fcs_gx_setup_sample.nf @@ -0,0 +1,32 @@ +process NCBI_FCS_GX_SETUP_SAMPLE { + tag "${asm_tag}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(asm_tag), path(fasta_file) + + output: + path 'fasta.file.for.*.fasta' , emit: fsata + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_GX_SETUP_SAMPLE module does not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + ln -s $fasta_file "fasta.file.for.${asm_tag}.fasta" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ubuntu: \$(cat /etc/issue | tr -d 'Ubuntu LTS[:space:]\\\\') + END_VERSIONS + """ +} diff --git a/modules/local/relabelbundlelinks.nf b/modules/local/relabelbundlelinks.nf new file mode 100644 index 00000000..0b95fb05 --- /dev/null +++ b/modules/local/relabelbundlelinks.nf @@ -0,0 +1,51 @@ +process RELABELBUNDLELINKS { + tag "${target_on_ref}" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + tuple val(target_on_ref), path(coloured_bundle_links), path(target_seq_list), path(ref_seq_list) + + output: + tuple val(target_on_ref), path("*.xcoords.bundle.coloured.relabeled.txt") , emit: relabeled_links + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env python + + import sys + import os + import pandas as pd + from platform import python_version + + # Write versions + with open(f"versions.yml", "w") as f_versions: + f_versions.write('"${task.process}":\\n') + f_versions.write(f" python: {python_version()}\\n") + + output_file_name = ".".join("$coloured_bundle_links".split(".")[0:-1]) + ".relabeled.txt" + + subs_target_seq = pd.read_csv('$target_seq_list', sep='\\t', header=None) + subs_target_seq_dict = dict(zip(subs_target_seq.iloc[:, 0], subs_target_seq.iloc[:, 1])) + + subs_ref_seq = pd.read_csv('$ref_seq_list', sep='\\t', header=None) + subs_ref_seq_dict = dict(zip(subs_ref_seq.iloc[:, 0], subs_ref_seq.iloc[:, 1])) + + if os.path.getsize('$coloured_bundle_links') == 0: + with open(output_file_name, 'w') as f: + f.write('') + sys.exit(0) + else: + df = pd.read_csv('$coloured_bundle_links', sep=' ', header=None) + + df.iloc[:, 3] = df.iloc[:, 3].replace(subs_target_seq_dict, regex=False) + df.iloc[:, 0] = df.iloc[:, 0].replace(subs_ref_seq_dict, regex=False) + + df.to_csv(output_file_name, sep=' ', index=False, header=None) + """ +} diff --git a/modules/local/relabelfastalength.nf b/modules/local/relabelfastalength.nf new file mode 100644 index 00000000..6c9615ab --- /dev/null +++ b/modules/local/relabelfastalength.nf @@ -0,0 +1,54 @@ +process RELABELFASTALENGTH { + tag "${target_on_ref}" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.6" + + input: + tuple val(target_on_ref), path(target_seq_lengths), path(ref_seq_lengths), path(target_seq_list), path(ref_seq_list) + + output: + tuple val(target_on_ref), path("relabeld.target.seq.lengths"), path("relabeld.ref.seq.lengths") , emit: relabeled_seq_lengths + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env python + + import pandas as pd + from platform import python_version + + subs_target_seq = pd.read_csv('$target_seq_list', sep='\\t', header=None) + subs_target_seq_dict = dict(zip(subs_target_seq.iloc[:, 0], subs_target_seq.iloc[:, 1])) + + subs_ref_seq = pd.read_csv('$ref_seq_list', sep='\\t', header=None) + subs_ref_seq_dict = dict(zip(subs_ref_seq.iloc[:, 0], subs_ref_seq.iloc[:, 1])) + + df_target_seq_lengths = pd.read_csv('$target_seq_lengths', sep='\\t', header=None) + df_target_seq_lengths.iloc[:, 0] = df_target_seq_lengths.iloc[:, 0].replace(subs_target_seq_dict, regex=False) + df_target_seq_lengths.to_csv("relabeld.target.seq.lengths", sep='\\t', index=False, header=None) + + df_ref_seq_lengths = pd.read_csv('$ref_seq_lengths', sep='\\t', header=None) + df_ref_seq_lengths.iloc[:, 0] = df_ref_seq_lengths.iloc[:, 0].replace(subs_ref_seq_dict, regex=False) + df_ref_seq_lengths.to_csv("relabeld.ref.seq.lengths", sep='\\t', index=False, header=None) + + # Write versions + with open(f"versions.yml", "w") as f_versions: + f_versions.write('"${task.process}":\\n') + f_versions.write(f" python: {python_version()}\\n") + """ + + stub: + """ + touch relabeld.target.seq.lengths + touch relabeld.ref.seq.lengths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | tr -d 'Python[:space:]') + END_VERSIONS + """ +} diff --git a/modules/local/run_assembly_visualizer.nf b/modules/local/run_assembly_visualizer.nf deleted file mode 100644 index e0cfac8e..00000000 --- a/modules/local/run_assembly_visualizer.nf +++ /dev/null @@ -1,24 +0,0 @@ -nextflow.enable.dsl=2 - -process RUN_ASSEMBLY_VISUALIZER { - tag "$sample_id_on_tag" - label "process_medium" - - publishDir "${params.outdir}/hic", mode:'copy' - container "gallvp/3d-dna:63029aa" - - input: - tuple val(sample_id_on_tag), path(agp_assembly_file), path(sorted_links_txt_file) - - output: - tuple val(sample_id_on_tag), path("*.hic"), emit: hic_file - - script: - // -p true/false Use GNU Parallel to speed up computation (default is true). - """ - assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') - file_name="${agp_assembly_file}" - /usr/src/3d-dna/visualize/run-assembly-visualizer.sh $agp_assembly_file $sorted_links_txt_file - mv "\${file_name%.*}.hic" "\${assembly_tag}.hic" - """ -} diff --git a/modules/local/runassemblyvisualizer.nf b/modules/local/runassemblyvisualizer.nf new file mode 100644 index 00000000..7b628d08 --- /dev/null +++ b/modules/local/runassemblyvisualizer.nf @@ -0,0 +1,44 @@ +process RUNASSEMBLYVISUALIZER { + tag "$sample_id_on_tag" + label 'process_medium' + + container "docker.io/gallvp/3d-dna:63029aa" + + input: + tuple val(sample_id_on_tag), path(agp_assembly_file), path(sorted_links_txt_file) + + output: + tuple val(sample_id_on_tag), path("*.hic") , emit: hic + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // -p true/false Use GNU Parallel to speed up computation (default is true). + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + file_name="${agp_assembly_file}" + + /usr/src/3d-dna/visualize/run-assembly-visualizer.sh \\ + $agp_assembly_file $sorted_links_txt_file + + mv "\${file_name%.*}.hic" "\${assembly_tag}.hic" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + run-assembly-visualizer.sh: \$(/usr/src/3d-dna/visualize/run-assembly-visualizer.sh -h | sed -n '/Visualizing draft genomes in juicebox:/ s/Visualizing draft genomes in juicebox: //p') + END_VERSIONS + """ + + stub: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + touch "\${assembly_tag}.hic" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + run-assembly-visualizer.sh: \$(/usr/src/3d-dna/visualize/run-assembly-visualizer.sh -h | sed -n '/Visualizing draft genomes in juicebox:/ s/Visualizing draft genomes in juicebox: //p') + END_VERSIONS + """ +} diff --git a/modules/local/splitbundlefile.nf b/modules/local/splitbundlefile.nf new file mode 100644 index 00000000..093c21f4 --- /dev/null +++ b/modules/local/splitbundlefile.nf @@ -0,0 +1,42 @@ +process SPLITBUNDLEFILE { + tag "${target_on_ref}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(target_on_ref), path(coloured_bundle_links) + val plot_1_vs_all + + output: + tuple val(target_on_ref), path("*.split.bundle.txt"), emit: split_file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def plot_1_vs_all_bash = plot_1_vs_all ? '1' : '0' + """ + if [[ "$plot_1_vs_all_bash" = "1" ]];then + target_seqs=(\$(awk '{print \$4}' $coloured_bundle_links | sort | uniq)) + + for i in "\${!target_seqs[@]}" + do + target_seq=\${target_seqs[\$i]} + awk -v seq="\$target_seq" '\$4==seq {print \$0}' $coloured_bundle_links > "${target_on_ref}.\${target_seq}.split.bundle.txt" + done + fi + + cat \\ + $coloured_bundle_links \\ + > "${target_on_ref}.all.split.bundle.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -W version | sed -n 's/mawk //p') + END_VERSIONS + """ +} diff --git a/modules/local/utils.nf b/modules/local/utils.nf deleted file mode 100644 index 9075beb0..00000000 --- a/modules/local/utils.nf +++ /dev/null @@ -1,230 +0,0 @@ -nextflow.enable.dsl=2 - -import groovy.json.JsonOutput - -def jsonifyParams(params) { - return JsonOutput.toJson(params) -} - -def validateParams(params) { - validateFastaTags(params) - validateGff3Tags(params) - validateGff3FastaCorrespondence(params) - - validateBUSCOParameters(params) - validateLAIParameters(params) - validateSyntenyParameters(params) -} - -def validateFastaTags(params) { - def listOfFastaTuples = params["target_assemblies"] - - if (isNotListOfLists(listOfFastaTuples, 2)) { - error 'Error: target_assemblies must be a list of sublists, with each sublist containing 2 elements' - } - - def fastaTags = listOfFastaTuples.collect { it[0] } - - fastaTags.each { - if (!(it =~ /^\w+$/)) { - error "Error: $it is not a valid tag in target_assemblies" - } - } - - if (fastaTags.size() != (fastaTags as Set).size()) { - error "All the tags in target_assemblies should be unique" - } -} - -def validateGff3Tags(params) { - def listOfGff3Tuples = params["assembly_gff3"] - - if (listOfGff3Tuples.isEmpty()) { - return - } - - if (isNotListOfLists(listOfGff3Tuples, 2)) { - error 'Error: assembly_gff3 must be a list of sublists, with each sublist containing 2 elements' - } - - def gff3Tags = listOfGff3Tuples.collect { it[0] } - - gff3Tags.each { - if (!(it =~ /^\w+$/)) { - error "Error: $it is not a valid tag in assembly_gff3" - } - } -} - -def validateGff3FastaCorrespondence(params) { - - def listOfGff3Tuples = params["assembly_gff3"] - def listOfFastaTuples = params["target_assemblies"] - - def fastaTags = listOfFastaTuples.collect { it[0] } - def gff3Tags = listOfGff3Tuples.collect { it[0] } - - gff3Tags.each { - if(!fastaTags.contains(it)) { - error "Error: $it in assembly_gff3 does not have a corresponding tag in target_assemblies" - } - } -} - -def validateBUSCOParameters(params) { - - if (params["busco"]["skip"] == 1) { - return - } - - listOfBUSCOLineages = params["busco"]["lineage_datasets"] - - if (!(listOfBUSCOLineages instanceof List)) { - error 'Error: busco::lineage_datasets must be a list of lineage(s)' - } -} - -def validateLAIParameters(params) { - - if (params["lai"]["skip"] == 1) { - return - } - - validateLAIMonoploidSeqs(params) -} - -def validateLAIMonoploidSeqs(params) { - - def listOfMonoploidSeqs = params["lai"]["monoploid_seqs"] - def listOfFastaTuples = params["target_assemblies"] - - if (listOfMonoploidSeqs.isEmpty()) { - return - } - - if (isNotListOfLists(listOfMonoploidSeqs, 2)) { - error 'Error: lai::monoploid_seqs must be a list of sublists, with each sublist containing 2 elements' - } - - def fastaTags = listOfFastaTuples.collect { it[0] } - def monoSeqTags = listOfFastaTuples.collect { it[0] } - - monoSeqTags.each { - if(!fastaTags.contains(it)) { - error "Error: $it in lai::monoploid_seqs does not have a corresponding tag in target_assemblies" - } - } - - listOfMonoploidSeqs.each { - validateMonoSeqs(it[1]) - } -} - -def validateSyntenyParameters(params) { - if (params["synteny"]["skip"] == 1) { - return - } - - def listOfFastaTuples = params["target_assemblies"] - def listOfTargetSeqLists = params["synteny"]["assembly_seq_list"] - - if (isNotListOfLists(listOfTargetSeqLists, 2)) { - error 'Error: synteny::assembly_seq_list must be a list of sublists, with each sublist containing 2 elements' - } - - if (listOfTargetSeqLists.size() != listOfFastaTuples.size()) { - error "Error: The number of elements in synteny::assembly_seq_list and target_assemblies should be equal" - } - - def fastaTags = listOfFastaTuples.collect { it[0] } - def seqListTags = listOfTargetSeqLists.collect { it[0] } - - if (!seqListTags.containsAll(fastaTags)) { - error "Error: The tags in synteny::assembly_seq_list should match the tags in target_assemblies" - } - - listOfTargetSeqLists.each { - validateSeqList(it[1]) - } - - def listOfXRefAssemblies = params["synteny"]["xref_assemblies"] - - if (listOfXRefAssemblies.isEmpty()) { - return - } - - if (isNotListOfLists(listOfXRefAssemblies, 3)) { - error 'Error: synteny::xref_assemblies must be a list of sublists, with each sublist containing 3 elements' - } - - def xrefTags = listOfXRefAssemblies.collect { it[0] } - - xrefTags.each { - if (!(it =~ /^\w+$/)) { - error "Error: $it is not a valid tag in synteny::xref_assemblies" - } - } - - if (xrefTags.size() != (xrefTags as Set).size()) { - error "All the tags in synteny::xref_assemblies should be unique" - } - - xrefTags.each { - if (fastaTags.contains(it)) { - error "Error: Tag $it in synteny::xref_assemblies is already included in target_assemblies" - } - } - - listOfXRefAssemblies.each { - validateSeqList(it[2]) - } -} - -def isNotListOfLists(thisOne, subListSize) { - return (!(thisOne instanceof List) || thisOne.isEmpty() || thisOne.any { !(it instanceof List) || it.size() != subListSize }) -} - -def validateSeqList(seqListPath) { - def seqListFile = file(seqListPath, checkIfExists: true) - - def lines = seqListFile.readLines() - if (lines.isEmpty()) { - error "${seqListPath} is empty. It should be a 2 column tab-delimited file" - } - - lines.each { line -> - def columns = line.split("\t") - if (columns.size() != 2) { - error "${seqListPath} should be a 2 column tab-delimited file" - } - } - - def column1Set = new HashSet<>() - def column2Set = new HashSet<>() - def hasUniqueElements = lines.every { line -> - def columns = line.split("\t") - def element1 = columns[0] - def element2 = columns[1] - column1Set.add(element1) && column2Set.add(element2) - } - - if (!hasUniqueElements) { - error "${seqListPath} contains duplicate elements in one or both columns" - } -} - -def validateMonoSeqs(monoSeqsPath) { - def monoSeqsFile = file(monoSeqsPath, checkIfExists: true) - - def lines = monoSeqsFile.readLines() - if (lines.isEmpty()) { - error "${monoSeqsPath} is empty. It should be a single column text file" - } - - lines.each { line -> - def literals = line.split() - if (literals.size() != 1) { - error "${monoSeqsPath} should be a single column text file" - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..b48ced26 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 00000000..105f9265 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 00000000..5f15a5fd --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..b1e1630b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..5f59a936 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml new file mode 100644 index 00000000..70f346ef --- /dev/null +++ b/modules/nf-core/fastavalidator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "fastavalidator" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::py_fasta_validator=0.6" diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf new file mode 100644 index 00000000..ac5470fb --- /dev/null +++ b/modules/nf-core/fastavalidator/main.nf @@ -0,0 +1,62 @@ +process FASTAVALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0': + 'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + py_fasta_validator \\ + -f $fasta \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from fasta_validate printed to ${prefix}.error.log" + + if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then + echo "Validation failed..." + + cat \\ + "${prefix}.error.log" + else + echo "Validation successful..." + + mv \\ + "${prefix}.error.log" \\ + fasta_validate.stderr + + echo "Validation successful..." \\ + > "${prefix}.success.log" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "Validation successful..." \\ + > "${prefix}.success.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml new file mode 100644 index 00000000..c5c4371c --- /dev/null +++ b/modules/nf-core/fastavalidator/meta.yml @@ -0,0 +1,53 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastavalidator" +description: | + "Python C-extension for a simple validator for fasta files. The module emits the validated file or an + error log upon validation failure." +keywords: + - fasta + - validation + - genome +tools: + - fasta_validate: + description: | + "Python C-extension for a simple C code to validate a fasta file. It only checks a few things, + and by default only sets its response via the return code, + so you will need to check that!" + homepage: "https://github.com/linsalrob/py_fasta_validator" + documentation: "https://github.com/linsalrob/py_fasta_validator" + tool_dev_url: "https://github.com/linsalrob/py_fasta_validator" + doi: "10.5281/zenodo.5002710" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.fasta" +output: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - success_log: + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@gallvp" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test new file mode 100644 index 00000000..bb8c22cf --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process FASTAVALIDATOR" + script "../main.nf" + process "FASTAVALIDATOR" + + tag "modules" + tag "modules_nfcore" + tag "fastavalidator" + + test("sarscov2-fasta-valid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log != null }, + { assert process.out.error_log == [] }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") } + ) + } + + } + + test("sarscov2-gff3-invalid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") } + ) + } + + } +} diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap new file mode 100644 index 00000000..382dee72 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2-fasta-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:25.106872" + }, + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:29.40324" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml new file mode 100644 index 00000000..c3c77576 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/tags.yml @@ -0,0 +1,2 @@ +fastavalidator: + - "modules/nf-core/fastavalidator/**" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..2a3b679e --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,120 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..c22a16ab --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,75 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..9b3f9a38 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,723 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..86f7c311 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..25910b34 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..468a6f28 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,48 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..231034f2 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,39 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..6406008e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..720fd9ff --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/pfr/lai/environment.yml b/modules/nf-core/seqkit/seq/environment.yml similarity index 79% rename from modules/pfr/lai/environment.yml rename to modules/nf-core/seqkit/seq/environment.yml index 94fadbd2..9019d269 100644 --- a/modules/pfr/lai/environment.yml +++ b/modules/nf-core/seqkit/seq/environment.yml @@ -1,9 +1,9 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "lai" +name: "seqkit_seq" channels: - conda-forge - bioconda - defaults dependencies: - - "bioconda::LTR_retriever=2.9.0" + - "bioconda::seqkit=2.6.1" diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf new file mode 100644 index 00000000..7fc742b3 --- /dev/null +++ b/modules/nf-core/seqkit/seq/main.nf @@ -0,0 +1,63 @@ +process SEQKIT_SEQ { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0': + 'biocontainers/seqkit:2.6.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.*") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + seq \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + $call_gzip \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/seq/meta.yml b/modules/nf-core/seqkit/seq/meta.yml new file mode 100644 index 00000000..8d4e2b16 --- /dev/null +++ b/modules/nf-core/seqkit/seq/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_seq" +description: Transforms sequences (extract ID, filter by length, remove gaps, reverse complement...) +keywords: + - genomics + - fasta + - fastq + - transform + - filter + - gaps + - complement +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test b/modules/nf-core/seqkit/seq/tests/main.nf.test new file mode 100644 index 00000000..aa9c283e --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test @@ -0,0 +1,149 @@ +nextflow_process { + + name "Test Process SEQKIT_SEQ" + script "../main.nf" + process "SEQKIT_SEQ" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/seq" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test.snap b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap new file mode 100644 index 00000000..91b4d17a --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ] + } + ], + "timestamp": "2023-12-18T10:34:00.37449" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ] + ], + "timestamp": "2023-12-17T13:56:53.318962" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ] + } + ], + "timestamp": "2023-12-18T10:33:53.528342" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ] + } + ], + "timestamp": "2023-12-18T10:33:44.757686" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f" + ] + } + ], + "timestamp": "2023-12-18T10:33:49.115171" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/seq/tests/nextflow.config b/modules/nf-core/seqkit/seq/tests/nextflow.config new file mode 100644 index 00000000..d8e3c66a --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args2 = '-n' +} diff --git a/modules/nf-core/seqkit/seq/tests/tags.yml b/modules/nf-core/seqkit/seq/tests/tags.yml new file mode 100644 index 00000000..5eeca7e3 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/seq: + - "modules/nf-core/seqkit/seq/**" diff --git a/modules/pfr/edta/ltrharvest/environment.yml b/modules/nf-core/seqkit/sort/environment.yml similarity index 78% rename from modules/pfr/edta/ltrharvest/environment.yml rename to modules/nf-core/seqkit/sort/environment.yml index ac836d2a..820c5707 100644 --- a/modules/pfr/edta/ltrharvest/environment.yml +++ b/modules/nf-core/seqkit/sort/environment.yml @@ -1,9 +1,9 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "edta_ltrharvest" +name: "seqkit_sort" channels: - conda-forge - bioconda - defaults dependencies: - - "bioconda::edta=2.1.0" + - "bioconda::seqkit=2.6.1" diff --git a/modules/nf-core/seqkit/sort/main.nf b/modules/nf-core/seqkit/sort/main.nf new file mode 100644 index 00000000..08f86eb1 --- /dev/null +++ b/modules/nf-core/seqkit/sort/main.nf @@ -0,0 +1,63 @@ +process SEQKIT_SORT { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0': + 'biocontainers/seqkit:2.6.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.*") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2 " : '' + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + sort \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + $call_gzip \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/sort/meta.yml b/modules/nf-core/seqkit/sort/meta.yml new file mode 100644 index 00000000..2e61ce15 --- /dev/null +++ b/modules/nf-core/seqkit/sort/meta.yml @@ -0,0 +1,45 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_sort" +description: Sorts sequences by id/name/sequence/length +keywords: + - genomics + - fasta + - fastq + - sort +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta.gz,fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/seqkit/sort/tests/main.nf.test b/modules/nf-core/seqkit/sort/tests/main.nf.test new file mode 100644 index 00000000..0c2f4e2d --- /dev/null +++ b/modules/nf-core/seqkit/sort/tests/main.nf.test @@ -0,0 +1,149 @@ +nextflow_process { + + name "Test Process SEQKIT_SORT" + script "../main.nf" + process "SEQKIT_SORT" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/sort" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same,") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/sort/tests/main.nf.test.snap b/modules/nf-core/seqkit/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..94e2cb35 --- /dev/null +++ b/modules/nf-core/seqkit/sort/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ] + } + ], + "timestamp": "2023-12-18T10:07:19.28815" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ] + ], + "timestamp": "2023-12-18T10:07:15.341516" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,80d9fd1976648214f44e42bc3d705972" + ] + ], + "1": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,80d9fd1976648214f44e42bc3d705972" + ] + ], + "versions": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ] + } + ], + "timestamp": "2023-12-18T10:12:25.704893" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ] + } + ], + "timestamp": "2023-12-18T10:07:15.293713" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998" + ] + } + ], + "timestamp": "2023-12-18T10:09:20.306713" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/sort/tests/nextflow.config b/modules/nf-core/seqkit/sort/tests/nextflow.config new file mode 100644 index 00000000..d8e3c66a --- /dev/null +++ b/modules/nf-core/seqkit/sort/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args2 = '-n' +} diff --git a/modules/nf-core/seqkit/sort/tests/tags.yml b/modules/nf-core/seqkit/sort/tests/tags.yml new file mode 100644 index 00000000..c839b58c --- /dev/null +++ b/modules/nf-core/seqkit/sort/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/sort: + - "modules/nf-core/seqkit/sort/**" diff --git a/modules/pfr/ltrretriever/environment.yml b/modules/nf-core/tidk/explore/environment.yml similarity index 77% rename from modules/pfr/ltrretriever/environment.yml rename to modules/nf-core/tidk/explore/environment.yml index 8e870fdc..4fe55407 100644 --- a/modules/pfr/ltrretriever/environment.yml +++ b/modules/nf-core/tidk/explore/environment.yml @@ -1,9 +1,9 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "ltrretriever" +name: "tidk_explore" channels: - conda-forge - bioconda - defaults dependencies: - - "bioconda::LTR_retriever=2.9.0" + - "bioconda::tidk=0.2.41" diff --git a/modules/nf-core/tidk/explore/main.nf b/modules/nf-core/tidk/explore/main.nf new file mode 100644 index 00000000..3de67531 --- /dev/null +++ b/modules/nf-core/tidk/explore/main.nf @@ -0,0 +1,57 @@ +process TIDK_EXPLORE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tidk:0.2.41--hdbdd923_0': + 'biocontainers/tidk:0.2.41--hdbdd923_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.tidk.explore.tsv") , emit: explore_tsv + tuple val(meta), path("*.top.sequence.txt") , emit: top_sequence, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + tidk \\ + explore \\ + $args \\ + $fasta \\ + > ${prefix}.tidk.explore.tsv + + [[ \$(cat ${prefix}.tidk.explore.tsv | wc -l) -gt 1 ]] \\ + && cat \\ + ${prefix}.tidk.explore.tsv \\ + | sed -n 2p \\ + | awk '{print \$1;}' \\ + > ${prefix}.top.sequence.txt \\ + || echo "No sequence identified" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tidk.explore.tsv + touch ${prefix}.top.sequence.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tidk/explore/meta.yml b/modules/nf-core/tidk/explore/meta.yml new file mode 100644 index 00000000..582aaf56 --- /dev/null +++ b/modules/nf-core/tidk/explore/meta.yml @@ -0,0 +1,52 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tidk_explore" +description: | + `tidk explore` attempts to find the simple telomeric repeat unit in the genome provided. + It will report this repeat in its canonical form (e.g. TTAGG -> AACCT). +keywords: + - genomics + - telomere + - search +tools: + - "tidk": + description: tidk is a toolkit to identify and visualise telomeric repeats in genomes + homepage: "https://github.com/tolkit/telomeric-identifier" + documentation: "https://github.com/tolkit/telomeric-identifier" + tool_dev_url: "https://github.com/tolkit/telomeric-identifier" + doi: "10.5281/zenodo.10091385" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: The input fasta file + pattern: "*.{fsa,fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - explore_tsv: + type: file + description: Telomeres and their frequencies in TSV format + pattern: "*.tidk.explore.tsv" + - top_sequence: + type: file + description: | + The most frequent telomere sequence if one or more + sequences are identified by the toolkit + pattern: "*.top.sequence.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/tidk/explore/tests/main.nf.test b/modules/nf-core/tidk/explore/tests/main.nf.test new file mode 100644 index 00000000..a04fee61 --- /dev/null +++ b/modules/nf-core/tidk/explore/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process TIDK_EXPLORE" + script "../main.nf" + process "TIDK_EXPLORE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "tidk" + tag "tidk/explore" + + test("homo_sapiens-genome_21_fasta-success") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-genome_fasta-no_top_sequence") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.explore_tsv).match("no_top_sequence_explore_tsv") }, + { assert process.out.top_sequence == [] }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.explore_tsv != null }, + { assert process.out.top_sequence != null }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/tidk/explore/tests/main.nf.test.snap b/modules/nf-core/tidk/explore/tests/main.nf.test.snap new file mode 100644 index 00000000..c46d31c1 --- /dev/null +++ b/modules/nf-core/tidk/explore/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "homo_sapiens-genome_21_fasta-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tidk.explore.tsv:md5,89de91ef36eb0925aefca61757f5275f" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,bd63900958df06516e45b887072d788f" + ] + ], + "2": [ + "versions.yml:md5,8de27958aee1d1fc9075e8046114bdb5" + ], + "explore_tsv": [ + [ + { + "id": "test" + }, + "test.tidk.explore.tsv:md5,89de91ef36eb0925aefca61757f5275f" + ] + ], + "top_sequence": [ + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,bd63900958df06516e45b887072d788f" + ] + ], + "versions": [ + "versions.yml:md5,8de27958aee1d1fc9075e8046114bdb5" + ] + } + ], + "timestamp": "2023-12-05T14:13:24.14906" + }, + "no_top_sequence_explore_tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tidk.explore.tsv:md5,27b8513be5478ea2b846a407b3639c01" + ] + ] + ], + "timestamp": "2023-12-05T14:16:18.982423" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,8de27958aee1d1fc9075e8046114bdb5" + ] + ], + "timestamp": "2023-12-05T14:13:24.198311" + } +} \ No newline at end of file diff --git a/modules/nf-core/tidk/explore/tests/nextflow.config b/modules/nf-core/tidk/explore/tests/nextflow.config new file mode 100644 index 00000000..d8f33fe0 --- /dev/null +++ b/modules/nf-core/tidk/explore/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--minimum 5 --maximum 30' +} diff --git a/modules/nf-core/tidk/explore/tests/tags.yml b/modules/nf-core/tidk/explore/tests/tags.yml new file mode 100644 index 00000000..af666655 --- /dev/null +++ b/modules/nf-core/tidk/explore/tests/tags.yml @@ -0,0 +1,2 @@ +tidk/explore: + - "modules/nf-core/tidk/explore/**" diff --git a/modules/nf-core/tidk/plot/environment.yml b/modules/nf-core/tidk/plot/environment.yml new file mode 100644 index 00000000..ed1706e3 --- /dev/null +++ b/modules/nf-core/tidk/plot/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "tidk_plot" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::tidk=0.2.41" diff --git a/modules/nf-core/tidk/plot/main.nf b/modules/nf-core/tidk/plot/main.nf new file mode 100644 index 00000000..ef56f567 --- /dev/null +++ b/modules/nf-core/tidk/plot/main.nf @@ -0,0 +1,47 @@ +process TIDK_PLOT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tidk:0.2.41--hdbdd923_0': + 'biocontainers/tidk:0.2.41--hdbdd923_0' }" + + input: + tuple val(meta), path(tsv) + + output: + tuple val(meta), path("*.svg"), emit: svg + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + tidk \\ + plot \\ + --output $prefix \\ + $args \\ + --tsv "$tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.svg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tidk/plot/meta.yml b/modules/nf-core/tidk/plot/meta.yml new file mode 100644 index 00000000..451195c8 --- /dev/null +++ b/modules/nf-core/tidk/plot/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tidk_plot" +description: | + Plots telomeric repeat frequency against sliding window location + using data produced by `tidk/search` +keywords: + - genomics + - telomere + - search + - plot +tools: + - "tidk": + description: tidk is a toolkit to identify and visualise telomeric repeats in genomes + homepage: "https://github.com/tolkit/telomeric-identifier" + documentation: "https://github.com/tolkit/telomeric-identifier" + tool_dev_url: "https://github.com/tolkit/telomeric-identifier" + doi: "10.5281/zenodo.10091385" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tsv: + type: file + description: Search results in TSV format from `tidk search` + pattern: "*.tsv" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - svg: + type: file + description: Telomere search plot + pattern: "*.svg" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/tidk/plot/tests/main.nf.test b/modules/nf-core/tidk/plot/tests/main.nf.test new file mode 100644 index 00000000..e267c157 --- /dev/null +++ b/modules/nf-core/tidk/plot/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process TIDK_PLOT" + script "../main.nf" + process "TIDK_PLOT" + + tag "modules" + tag "modules_nfcore" + tag "tidk" + tag "tidk/search" + tag "tidk/plot" + + test("homo_sapiens-genome_21_fasta-success") { + + setup { + run("TIDK_SEARCH") { + script "../../../../nf-core/tidk/search" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[1] = 'TTAGGG' + """ + } + } + } + + when { + process { + """ + input[0] = TIDK_SEARCH.out.tsv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/tidk/plot/tests/main.nf.test.snap b/modules/nf-core/tidk/plot/tests/main.nf.test.snap new file mode 100644 index 00000000..5de9533e --- /dev/null +++ b/modules/nf-core/tidk/plot/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "homo_sapiens-genome_21_fasta-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.svg:md5,e76985fdc220867a05967984c2c4675d" + ] + ], + "1": [ + "versions.yml:md5,b850b0d8b9add80d7863cc62e3f32c99" + ], + "svg": [ + [ + { + "id": "test" + }, + "test.svg:md5,e76985fdc220867a05967984c2c4675d" + ] + ], + "versions": [ + "versions.yml:md5,b850b0d8b9add80d7863cc62e3f32c99" + ] + } + ], + "timestamp": "2023-12-05T14:29:24.721706" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,b850b0d8b9add80d7863cc62e3f32c99" + ] + ], + "timestamp": "2023-12-05T14:29:55.412674" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b850b0d8b9add80d7863cc62e3f32c99" + ], + "svg": [ + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b850b0d8b9add80d7863cc62e3f32c99" + ] + } + ], + "timestamp": "2023-12-16T18:50:29.344319" + } +} \ No newline at end of file diff --git a/modules/nf-core/tidk/plot/tests/tags.yml b/modules/nf-core/tidk/plot/tests/tags.yml new file mode 100644 index 00000000..91228695 --- /dev/null +++ b/modules/nf-core/tidk/plot/tests/tags.yml @@ -0,0 +1,2 @@ +tidk/plot: + - "modules/nf-core/tidk/plot/**" diff --git a/modules/nf-core/tidk/search/environment.yml b/modules/nf-core/tidk/search/environment.yml new file mode 100644 index 00000000..eaf92acc --- /dev/null +++ b/modules/nf-core/tidk/search/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "tidk_search" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::tidk=0.2.41" diff --git a/modules/nf-core/tidk/search/main.nf b/modules/nf-core/tidk/search/main.nf new file mode 100644 index 00000000..820f8fda --- /dev/null +++ b/modules/nf-core/tidk/search/main.nf @@ -0,0 +1,62 @@ +process TIDK_SEARCH { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tidk:0.2.41--hdbdd923_0': + 'biocontainers/tidk:0.2.41--hdbdd923_0' }" + + input: + tuple val(meta), path(fasta) + val string + + output: + tuple val(meta), path("*.tsv") , emit: tsv , optional: true + tuple val(meta), path("*.bedgraph") , emit: bedgraph , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + tidk \\ + search \\ + --string $string \\ + --output $prefix \\ + --dir tidk \\ + $args \\ + $fasta + + mv \\ + tidk/${prefix}_telomeric_repeat_windows.tsv \\ + ${prefix}.tsv \\ + || echo "TSV file was not produced" + + mv \\ + tidk/${prefix}_telomeric_repeat_windows.bedgraph \\ + ${prefix}.bedgraph \\ + || echo "BEDGRAPH file was not produced" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--extension bedgraph") ? 'bedgraph' : 'tsv' + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tidk: \$(tidk --version | sed 's/tidk //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tidk/search/meta.yml b/modules/nf-core/tidk/search/meta.yml new file mode 100644 index 00000000..8ba07350 --- /dev/null +++ b/modules/nf-core/tidk/search/meta.yml @@ -0,0 +1,51 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tidk_search" +description: Searches a genome for a telomere string such as TTAGGG +keywords: + - genomics + - telomere + - search +tools: + - "tidk": + description: tidk is a toolkit to identify and visualise telomeric repeats in genomes + homepage: "https://github.com/tolkit/telomeric-identifier" + documentation: "https://github.com/tolkit/telomeric-identifier" + tool_dev_url: "https://github.com/tolkit/telomeric-identifier" + doi: "10.5281/zenodo.10091385" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: The input fasta file + pattern: "*.{fsa,fa,fasta}" + - string: + type: string + description: Search string such as TTAGGG +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tsv: + type: file + description: Search results in TSV format + pattern: "*.tsv" + - bedgraph: + type: file + description: Search results in BEDGRAPH format + pattern: "*.bedgraph" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/tidk/search/tests/main.nf.test b/modules/nf-core/tidk/search/tests/main.nf.test new file mode 100644 index 00000000..6dcbe577 --- /dev/null +++ b/modules/nf-core/tidk/search/tests/main.nf.test @@ -0,0 +1,119 @@ +nextflow_process { + + name "Test Process TIDK_SEARCH" + script "../main.nf" + process "TIDK_SEARCH" + + tag "modules" + tag "modules_nfcore" + tag "tidk" + tag "tidk/search" + + test("homo_sapiens-genome_fasta-bedgraph") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 'TTAGGG' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bedgraph).match("bedgraph") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.tsv == [] } + ) + } + + } + + test("homo_sapiens-genome_fasta-tsv") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 'TTAGGG' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.bedgraph == [] } + ) + } + + } + + test("stub-bedgraph") { + + options '-stub' + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 'TTAGGG' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bedgraph != null }, + { assert process.out.tsv == [] }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub-tsv") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 'TTAGGG' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bedgraph == [] }, + { assert process.out.tsv != null }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/tidk/search/tests/main.nf.test.snap b/modules/nf-core/tidk/search/tests/main.nf.test.snap new file mode 100644 index 00000000..15aedd25 --- /dev/null +++ b/modules/nf-core/tidk/search/tests/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "bedgraph": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.bedgraph:md5,8277cc74bf083f51584ba6d2b8a8013f" + ] + ] + ], + "timestamp": "2023-12-05T09:53:52.893139" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,6baeeb0e50c9bea6975173a329179f8e" + ] + ], + "timestamp": "2023-12-05T09:53:52.89726" + }, + "tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,5b44a89396f412eb571ea240ef9deedd" + ] + ] + ], + "timestamp": "2023-12-05T09:53:56.455271" + } +} \ No newline at end of file diff --git a/modules/nf-core/tidk/search/tests/nextflow.config b/modules/nf-core/tidk/search/tests/nextflow.config new file mode 100644 index 00000000..5d36acfb --- /dev/null +++ b/modules/nf-core/tidk/search/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--extension bedgraph' +} diff --git a/modules/nf-core/tidk/search/tests/tags.yml b/modules/nf-core/tidk/search/tests/tags.yml new file mode 100644 index 00000000..6d27fc6d --- /dev/null +++ b/modules/nf-core/tidk/search/tests/tags.yml @@ -0,0 +1,2 @@ +tidk/search: + - "modules/nf-core/tidk/search/**" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 250172e1..8a75bb95 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,9 +2,10 @@ process UNTAR { tag "$archive" label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'quay.io/nf-core/ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test index 1106fa01..679e83c7 100644 --- a/modules/nf-core/untar/tests/main.nf.test +++ b/modules/nf-core/untar/tests/main.nf.test @@ -52,4 +52,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/bwa/index/main.nf b/modules/pfr/bwa/index/main.nf index 3a3131ec..24b5a2ea 100644 --- a/modules/pfr/bwa/index/main.nf +++ b/modules/pfr/bwa/index/main.nf @@ -2,9 +2,10 @@ process BWA_INDEX { tag "$fasta" label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'biocontainers/bwa:0.7.17--hed695b0_7' }" input: tuple val(meta), path(fasta) diff --git a/modules/pfr/bwa/index/tests/main.nf.test b/modules/pfr/bwa/index/tests/main.nf.test index 2a3a1608..af33e73c 100644 --- a/modules/pfr/bwa/index/tests/main.nf.test +++ b/modules/pfr/bwa/index/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -30,4 +30,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/bwa/mem/environment.yml b/modules/pfr/bwa/mem/environment.yml index 1818cea3..3f136d0a 100644 --- a/modules/pfr/bwa/mem/environment.yml +++ b/modules/pfr/bwa/mem/environment.yml @@ -6,5 +6,5 @@ channels: dependencies: - bwa=0.7.17 # renovate: datasource=conda depName=bioconda/samtools - - samtools=1.18 - - htslib=1.18 + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/pfr/bwa/mem/main.nf b/modules/pfr/bwa/mem/main.nf index bc60783f..54ec0f16 100644 --- a/modules/pfr/bwa/mem/main.nf +++ b/modules/pfr/bwa/mem/main.nf @@ -1,11 +1,11 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - label 'process_two_days_long' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:9c0128851101dafef65cef649826d2dbe6bedd7e-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:9c0128851101dafef65cef649826d2dbe6bedd7e-0' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' }" input: tuple val(meta), path(reads) diff --git a/modules/pfr/bwa/mem/tests/main.nf.test b/modules/pfr/bwa/mem/tests/main.nf.test index 20cb7e95..2696e4bf 100644 --- a/modules/pfr/bwa/mem/tests/main.nf.test +++ b/modules/pfr/bwa/mem/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -31,7 +31,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -58,7 +58,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -71,7 +71,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -98,7 +98,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -111,8 +111,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -139,7 +139,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -152,8 +152,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -170,4 +170,4 @@ nextflow_process { } } -} \ No newline at end of file +} diff --git a/modules/pfr/bwa/mem/tests/main.nf.test.snap b/modules/pfr/bwa/mem/tests/main.nf.test.snap index bfb55fc7..e4fd8cc0 100644 --- a/modules/pfr/bwa/mem/tests/main.nf.test.snap +++ b/modules/pfr/bwa/mem/tests/main.nf.test.snap @@ -8,11 +8,11 @@ "id": "test", "single_end": true }, - "test.bam:md5,df203d7c7e8fef351408a909570c7952" + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" ] ], "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ], "bam": [ [ @@ -20,15 +20,19 @@ "id": "test", "single_end": true }, - "test.bam:md5,df203d7c7e8fef351408a909570c7952" + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" ] ], "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ] } ], - "timestamp": "2023-12-04T11:01:22.483594641" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:48.440661587" }, "Single-End Sort": { "content": [ @@ -39,11 +43,11 @@ "id": "test", "single_end": true }, - "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" ] ], "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ], "bam": [ [ @@ -51,15 +55,19 @@ "id": "test", "single_end": true }, - "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" ] ], "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ] } ], - "timestamp": "2023-12-04T11:01:30.180783483" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:56.086493265" }, "Paired-End": { "content": [ @@ -70,11 +78,11 @@ "id": "test", "single_end": false }, - "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" + "test.bam:md5,aea123a3828a99da1906126355f15a12" ] ], "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ], "bam": [ [ @@ -82,15 +90,19 @@ "id": "test", "single_end": false }, - "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" + "test.bam:md5,aea123a3828a99da1906126355f15a12" ] ], "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ] } ], - "timestamp": "2023-12-04T11:01:38.761983007" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:03.474974773" }, "Paired-End Sort": { "content": [ @@ -101,11 +113,11 @@ "id": "test", "single_end": false }, - "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" ] ], "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ], "bam": [ [ @@ -113,14 +125,18 @@ "id": "test", "single_end": false }, - "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" ] ], "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" ] } ], - "timestamp": "2023-12-04T11:01:46.284587802" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:10.721510817" } } \ No newline at end of file diff --git a/modules/pfr/cat/cat/main.nf b/modules/pfr/cat/cat/main.nf index 99ac1985..adbdbd7b 100644 --- a/modules/pfr/cat/cat/main.nf +++ b/modules/pfr/cat/cat/main.nf @@ -2,9 +2,10 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/pigz:2.3.4': - 'quay.io/biocontainers/pigz:2.3.4' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" input: tuple val(meta), path(files_in) @@ -21,6 +22,8 @@ process CAT_CAT { def args2 = task.ext.args2 ?: '' def file_list = files_in.collect { it.toString() } + // choose appropriate concatenation tool depending on input and output format + // | input | output | command1 | command2 | // |-----------|------------|----------|----------| // | gzipped | gzipped | cat | | @@ -29,7 +32,7 @@ process CAT_CAT { // | ungzipped | gzipped | cat | pigz | // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' @@ -67,3 +70,10 @@ process CAT_CAT { END_VERSIONS """ } + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/pfr/cat/cat/tests/main.nf.test b/modules/pfr/cat/cat/tests/main.nf.test index b4056a81..fcee2d19 100644 --- a/modules/pfr/cat/cat/tests/main.nf.test +++ b/modules/pfr/cat/cat/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { [ [ id:'genome', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -45,8 +45,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -72,8 +72,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -102,8 +102,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -131,8 +131,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -160,7 +160,7 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] """ @@ -175,4 +175,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/environment.yml b/modules/pfr/custom/checkgff3fastacorrespondence/environment.yml new file mode 100644 index 00000000..ec0e86d1 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_checkgff3fastacorrespondence" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::samtools=1.18" diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/main.nf b/modules/pfr/custom/checkgff3fastacorrespondence/main.nf new file mode 100644 index 00000000..c1abb6f4 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/main.nf @@ -0,0 +1,25 @@ +process CUSTOM_CHECKGFF3FASTACORRESPONDENCE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1': + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(gff3) + path(fasta) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + shell: + prefix = task.ext.prefix ?: "${meta.id}" + template 'check_gff3_fasta_correspondence.sh' +} diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/meta.yml b/modules/pfr/custom/checkgff3fastacorrespondence/meta.yml new file mode 100644 index 00000000..69bbd053 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/meta.yml @@ -0,0 +1,56 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_checkgff3fastacorrespondence" +description: "A custom bash script which checks the correspondence of a gff3 file with a fasta file" +keywords: + - genome + - gff3 + - annotation + - validation +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" + - fasta: + type: file + description: Input fasta file + pattern: "*.{fsa,fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - success_log: + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh b/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh new file mode 100755 index 00000000..611c64b3 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +# Bump VERSION on edit +VERSION="v1" + +gff3_file="!{gff3}" +fasta_file="!{fasta}" +out_prefix="!{prefix}" +task_process="!{task.process}" + +# Record versions +cat <<-END_VERSIONS > versions.yml +"${task_process}": + samtools: $(echo $(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*$//' ) +END_VERSIONS + +# Requires +# samtools faidx + +## STEP 1 +# Check that gff3 has no identifers that are not in fasta (fasta can +# have ids that are not in gff3 since not all assembly units have gff3 records + +# Extract identifiers from the GFF3 file +gff3_identifiers=$(grep -v '^#' "$gff3_file" | awk '{print $1}' | sort -u) + +# Extract identifiers from the FASTA file +fasta_identifiers=$(grep '^>' "$fasta_file" | awk '{print substr($1, 2)}' | sort -u) + +# Compare identifiers and find any that are present in the GFF3 but not in the FASTA +missing_identifiers=$(comm -23 <(echo "$gff3_identifiers") <(echo "$fasta_identifiers")) + +# Check if any missing identifiers were found +if [[ -n "$missing_identifiers" ]]; then + touch "${out_prefix}.error.log" + echo "Failed to validate gff3 file for: $tag_label" >> "${out_prefix}.error.log" + echo "Fasta file: $fasta_file" >> "${out_prefix}.error.log" + echo "Gff3 file: $gff3_file" >> "${out_prefix}.error.log" + echo "GFF3 file contains identifiers not present in FASTA:" >> "${out_prefix}.error.log" + echo "$missing_identifiers" >> "${out_prefix}.error.log" + exit 0 +fi + +## STEP 2 +# check that there are no coordiantes in gff3 for any seqid that are +# greater than the seq length of the paretn fasta entry + +# Compute sequence lengths using samtools faidx +samtools faidx "$fasta_file" | cut -f 1,2 > sequence_lengths.txt + +# Check GFF3 file for coordinates exceeding sequence lengths +while IFS=$'\t' read -r seqname source feature start end score strand frame attributes && \ + read -r seq seq_length <&3; do + if [[ $start -gt $seq_length || $end -gt $seq_length ]]; then + touch "${out_prefix}.error.log" + echo "Failed to validate gff3 file for: $tag_label" >> "${out_prefix}.error.log" + echo "Fasta file: $fasta_file" >> "${out_prefix}.error.log" + echo "Gff3 file: $gff3_file" >> "${out_prefix}.error.log" + echo "Coordinates exceed sequence length in GFF3 file:" >> "${out_prefix}.error.log" + echo "Sequence: $seqname" >> "${out_prefix}.error.log" + echo "Sequence length: $seq_length" >> "${out_prefix}.error.log" + echo "Start: $start" >> "${out_prefix}.error.log" + echo "End: $end" >> "${out_prefix}.error.log" + exit 0 + fi +done < "$gff3_file" 3< "sequence_lengths.txt" + +touch "${out_prefix}.success.log" +echo "All tests passed..." >> "${out_prefix}.success.log" +exit 0 diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test b/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test new file mode 100644 index 00000000..91578e5b --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process CUSTOM_CHECKGFF3FASTACORRESPONDENCE" + script "../main.nf" + process "CUSTOM_CHECKGFF3FASTACORRESPONDENCE" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/checkgff3fastacorrespondence" + + test("sarscov2-fasta-gff3-success") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + + input[1] = [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.error_log == [] }, + { assert process.out.success_log != null }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("All tests passed...")}, + ) + } + + } + + test("sarscov2-gff3-homo_sapiens-fasta-error") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + + input[1] = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("GFF3 file contains identifiers not present in FASTA")}, + ) + } + + } + +} diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap b/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap new file mode 100644 index 00000000..261e0dc3 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2-gff3-homo_sapiens-fasta-error": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.error.log:md5,8a119170625dc95fb2faa6843fad2c3f" + ] + ], + "2": [ + "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802" + ], + "error_log": [ + [ + { + "id": "test" + }, + "test.error.log:md5,8a119170625dc95fb2faa6843fad2c3f" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802" + ] + } + ], + "timestamp": "2023-11-29T12:24:08.677505" + }, + "sarscov2-fasta-gff3-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.success.log:md5,5cad27984e6af4889f7dcf12264fe47b" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test" + }, + "test.success.log:md5,5cad27984e6af4889f7dcf12264fe47b" + ] + ], + "versions": [ + "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802" + ] + } + ], + "timestamp": "2023-11-29T12:24:04.530428" + } +} \ No newline at end of file diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml b/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml new file mode 100644 index 00000000..708130d8 --- /dev/null +++ b/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml @@ -0,0 +1,2 @@ +custom/checkgff3fastacorrespondence: + - "modules/pfr/custom/checkgff3fastacorrespondence/**" diff --git a/modules/pfr/custom/restoregffids/main.nf b/modules/pfr/custom/restoregffids/main.nf index b215f439..14e2c077 100644 --- a/modules/pfr/custom/restoregffids/main.nf +++ b/modules/pfr/custom/restoregffids/main.nf @@ -2,9 +2,10 @@ process CUSTOM_RESTOREGFFIDS { tag "$meta.id" label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.10.2': - 'quay.io/biocontainers/python:3.10.2' }" + 'biocontainers/python:3.10.2' }" input: tuple val(meta), path(gff3) diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test index cc374b76..521b9248 100644 --- a/modules/pfr/custom/restoregffids/tests/main.nf.test +++ b/modules/pfr/custom/restoregffids/tests/main.nf.test @@ -60,4 +60,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/pfr/custom/shortenfastaids/main.nf index f268d777..92762ef6 100644 --- a/modules/pfr/custom/shortenfastaids/main.nf +++ b/modules/pfr/custom/shortenfastaids/main.nf @@ -2,9 +2,10 @@ process CUSTOM_SHORTENFASTAIDS { tag "$meta.id" label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/biopython:1.75': - 'quay.io/biocontainers/biopython:1.75' }" + 'biocontainers/biopython:1.75' }" input: tuple val(meta), path(fasta) diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test index efff639e..dc46bae5 100644 --- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test +++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test @@ -128,4 +128,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/edta/ltrharvest/tests/tags.yml b/modules/pfr/edta/ltrharvest/tests/tags.yml deleted file mode 100644 index a625c6e8..00000000 --- a/modules/pfr/edta/ltrharvest/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -edta/ltrharvest: - - "modules/pfr/edta/ltrharvest/**" diff --git a/modules/pfr/gt/gff3/environment.yml b/modules/pfr/gt/gff3/environment.yml new file mode 100644 index 00000000..8289fb31 --- /dev/null +++ b/modules/pfr/gt/gff3/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "gt_gff3" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::genometools-genometools=1.6.5" diff --git a/modules/pfr/gt/gff3/main.nf b/modules/pfr/gt/gff3/main.nf new file mode 100644 index 00000000..d27e2bb9 --- /dev/null +++ b/modules/pfr/gt/gff3/main.nf @@ -0,0 +1,51 @@ +process GT_GFF3 { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.5--py310h3db02ab_0': + 'biocontainers/genometools-genometools:1.6.5--py310h3db02ab_0' }" + + input: + tuple val(meta), path(gff3) + + output: + tuple val(meta), path("*.gt.gff3") , emit: gt_gff3 , optional: true + tuple val(meta), path("*.error.log"), emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gt \\ + gff3 \\ + $args \\ + "$gff3" \\ + > "${prefix}.gt.gff3" \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from gt-gff3 printed to ${prefix}.error.log" + + if grep -q "gt gff3: error:" "${prefix}.error.log"; then + echo "gt-gff3 failed to parse $gff3" + + rm \\ + "${prefix}.gt.gff3" + else + echo "gt-gff3 successfully parsed $gff3" + + mv \\ + "${prefix}.error.log" \\ + gt_gff3.stderr + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //') + END_VERSIONS + """ +} diff --git a/modules/pfr/gt/gff3/meta.yml b/modules/pfr/gt/gff3/meta.yml new file mode 100644 index 00000000..5cecd8d0 --- /dev/null +++ b/modules/pfr/gt/gff3/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gt_gff3" +description: "GenomeTools gt-gff3 utility to parse, possibly transform, and output GFF3 files" +keywords: + - genome + - gff3 + - annotation +tools: + - "gt": + description: "The GenomeTools genome analysis system" + homepage: "https://genometools.org/index.html" + documentation: "https://genometools.org/documentation.html" + tool_dev_url: "https://github.com/genometools/genometools" + doi: "10.1109/TCBB.2013.68" + licence: ["ISC"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gt_gff3: + type: file + description: Parsed gff3 file produced only if there is no parsing error + pattern: "*.gt.gff3" + - error_log: + type: file + description: Error log if gt-gff3 failed to parse the input gff3 file + pattern: "*.error.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@gallvp" +maintainers: + - "@gallvp" diff --git a/modules/pfr/gt/gff3/tests/main.nf.test b/modules/pfr/gt/gff3/tests/main.nf.test new file mode 100644 index 00000000..cb44bc8f --- /dev/null +++ b/modules/pfr/gt/gff3/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process GT_GFF3" + script "../main.nf" + process "GT_GFF3" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gt" + tag "gt/gff3" + + test("sarscov2-gff3-valid") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gt_gff3 != null }, + { assert process.out.error_log == [] } + ) + } + + } + + test("sarscov2-gff3-invalid") { + when { + process { + """ + input[0] = Channel.of( + '##gff-version 3', + 'chr22\tID=gene:ENSG00000233995;Name=AP000547.1' + ) + .collectFile(name: 'sample.gff3', newLine: true) + .map { file -> [ [ id:'test' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gt_gff3 == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("gt gff3: error:") } + ) + } + } + +} diff --git a/modules/pfr/gt/gff3/tests/main.nf.test.snap b/modules/pfr/gt/gff3/tests/main.nf.test.snap new file mode 100644 index 00000000..f31e8d1c --- /dev/null +++ b/modules/pfr/gt/gff3/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.error.log:md5,31e6117c516f936ec403f792c732bc76" + ] + ], + "2": [ + "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb" + ], + "error_log": [ + [ + { + "id": "test" + }, + "test.error.log:md5,31e6117c516f936ec403f792c732bc76" + ] + ], + "gt_gff3": [ + + ], + "versions": [ + "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb" + ] + } + ], + "timestamp": "2023-11-28T13:43:34.620429" + }, + "sarscov2-gff3-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb" + ], + "error_log": [ + + ], + "gt_gff3": [ + [ + { + "id": "test" + }, + "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85" + ] + ], + "versions": [ + "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb" + ] + } + ], + "timestamp": "2023-11-28T13:43:31.065832" + } +} \ No newline at end of file diff --git a/modules/pfr/gt/gff3/tests/nextflow.config b/modules/pfr/gt/gff3/tests/nextflow.config new file mode 100644 index 00000000..af562267 --- /dev/null +++ b/modules/pfr/gt/gff3/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '-tidy -retainids' +} diff --git a/modules/pfr/gt/gff3/tests/tags.yml b/modules/pfr/gt/gff3/tests/tags.yml new file mode 100644 index 00000000..ae040309 --- /dev/null +++ b/modules/pfr/gt/gff3/tests/tags.yml @@ -0,0 +1,2 @@ +gt/gff3: + - "modules/pfr/gt/gff3/**" diff --git a/modules/pfr/gt/gff3validator/environment.yml b/modules/pfr/gt/gff3validator/environment.yml new file mode 100644 index 00000000..ea57ebe0 --- /dev/null +++ b/modules/pfr/gt/gff3validator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "gt_gff3validator" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::genometools-genometools=1.6.5" diff --git a/modules/pfr/gt/gff3validator/main.nf b/modules/pfr/gt/gff3validator/main.nf new file mode 100644 index 00000000..ae7ec9e7 --- /dev/null +++ b/modules/pfr/gt/gff3validator/main.nf @@ -0,0 +1,50 @@ +process GT_GFF3VALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.5--py310h3db02ab_0': + 'biocontainers/genometools-genometools:1.6.5--py310h3db02ab_0' }" + + input: + tuple val(meta), path(gff3) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gt \\ + gff3validator \\ + "$gff3" \\ + > "${prefix}.success.log" \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from gt-gff3validator printed to ${prefix}.error.log" + + if grep -q "input is valid GFF3" "${prefix}.success.log"; then + echo "Validation successful..." + + mv \\ + "${prefix}.error.log" \\ + gt_gff3validator.stderr + else + echo "Validation failed..." + + mv \\ + "${prefix}.success.log" \\ + gt_gff3validator.stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //') + END_VERSIONS + """ +} diff --git a/modules/pfr/gt/gff3validator/meta.yml b/modules/pfr/gt/gff3validator/meta.yml new file mode 100644 index 00000000..3322faf9 --- /dev/null +++ b/modules/pfr/gt/gff3validator/meta.yml @@ -0,0 +1,49 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gt_gff3validator" +description: "GenomeTools gt-gff3validator utility to strictly validate a GFF3 file" +keywords: + - genome + - gff3 + - annotation + - validation +tools: + - "gt": + description: "The GenomeTools genome analysis system" + homepage: "https://genometools.org/index.html" + documentation: "https://genometools.org/documentation.html" + tool_dev_url: "https://github.com/genometools/genometools" + doi: "10.1109/TCBB.2013.68" + licence: ["ISC"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - success_log: + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/gt/gff3validator/tests/main.nf.test b/modules/pfr/gt/gff3validator/tests/main.nf.test new file mode 100644 index 00000000..1b99e551 --- /dev/null +++ b/modules/pfr/gt/gff3validator/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process GT_GFF3VALIDATOR" + script "../main.nf" + process "GT_GFF3VALIDATOR" + + tag "modules" + tag "modules_nfcore" + tag "gt" + tag "gt/gff3validator" + + test("custom-gff3-valid") { + + when { + process { + """ + input[0] = Channel.of( + '##gff-version 3', + 'chr22\thavana\tpseudogene\t16572027\t16574637\t.\t+\t.\tID=gene:ENSG00000233995;Name=AP000547.1' + ) + .collectFile(name: 'sample.gff3', newLine: true) + .map { file -> [ [ id:'test' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.error_log == [] }, + { assert process.out.success_log != null }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("input is valid GFF3") } + ) + } + + } + + test("sarscov2-gff3-invalid") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_gff3'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("gt gff3validator: error:") } + ) + } + + } +} diff --git a/modules/pfr/gt/gff3validator/tests/main.nf.test.snap b/modules/pfr/gt/gff3validator/tests/main.nf.test.snap new file mode 100644 index 00000000..0b6f065a --- /dev/null +++ b/modules/pfr/gt/gff3validator/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.error.log:md5,c5d16b263a87072a13cca44fd811b8e2" + ] + ], + "2": [ + "versions.yml:md5,5927673eb73a8c22408643d224414215" + ], + "error_log": [ + [ + { + "id": "test" + }, + "test.error.log:md5,c5d16b263a87072a13cca44fd811b8e2" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,5927673eb73a8c22408643d224414215" + ] + } + ], + "timestamp": "2023-11-29T11:09:23.708792" + }, + "custom-gff3-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.success.log:md5,b11ca5c18c865fc808ea0fef0b07da30" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,5927673eb73a8c22408643d224414215" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test" + }, + "test.success.log:md5,b11ca5c18c865fc808ea0fef0b07da30" + ] + ], + "versions": [ + "versions.yml:md5,5927673eb73a8c22408643d224414215" + ] + } + ], + "timestamp": "2023-11-29T11:09:19.530068" + } +} \ No newline at end of file diff --git a/modules/pfr/gt/gff3validator/tests/tags.yml b/modules/pfr/gt/gff3validator/tests/tags.yml new file mode 100644 index 00000000..e247d55e --- /dev/null +++ b/modules/pfr/gt/gff3validator/tests/tags.yml @@ -0,0 +1,2 @@ +gt/gff3validator: + - "modules/pfr/gt/gff3validator/**" diff --git a/modules/pfr/gt/stat/environment.yml b/modules/pfr/gt/stat/environment.yml new file mode 100644 index 00000000..dca959b3 --- /dev/null +++ b/modules/pfr/gt/stat/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "gt_stat" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::genometools-genometools=1.6.5" diff --git a/modules/pfr/gt/stat/main.nf b/modules/pfr/gt/stat/main.nf new file mode 100644 index 00000000..3308b562 --- /dev/null +++ b/modules/pfr/gt/stat/main.nf @@ -0,0 +1,35 @@ +process GT_STAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.5--py310h3db02ab_0': + 'biocontainers/genometools-genometools:1.6.5--py310h3db02ab_0' }" + + input: + tuple val(meta), path(gff3) + + output: + tuple val(meta), path("*.gt.stat.yml") , emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gt \\ + stat \\ + $args \\ + "$gff3" \\ + > "${prefix}.gt.stat.yml" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //') + END_VERSIONS + """ +} diff --git a/modules/pfr/gt/stat/meta.yml b/modules/pfr/gt/stat/meta.yml new file mode 100644 index 00000000..203059a6 --- /dev/null +++ b/modules/pfr/gt/stat/meta.yml @@ -0,0 +1,46 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gt_stat" +description: "GenomeTools gt-stat utility to show statistics about features contained in GFF3 files" +keywords: + - genome + - gff3 + - annotation + - statistics + - stats +tools: + - "gt": + description: "The GenomeTools genome analysis system" + homepage: "https://genometools.org/index.html" + documentation: "https://genometools.org/documentation.html" + tool_dev_url: "https://github.com/genometools/genometools" + doi: "10.1109/TCBB.2013.68" + licence: ["ISC"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - stats: + type: file + description: Stats file in yaml format + pattern: "*.gt.stat.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/gt/stat/tests/main.nf.test b/modules/pfr/gt/stat/tests/main.nf.test new file mode 100644 index 00000000..57f5992c --- /dev/null +++ b/modules/pfr/gt/stat/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process GT_STAT" + script "../main.nf" + process "GT_STAT" + + tag "modules" + tag "modules_nfcore" + tag "gt" + tag "gt/stat" + + test("sarscov2-gff3") { + + when { + process { + """ + input[0] = Channel.of( + "##gff-version 3" + + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true).getText().toLowerCase() + ) + .collectFile(name: 'sample.gff3', newLine: true) + .map { file -> [ [ id:'test' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.stats.get(0).get(1)).getText().contains("cdss: 12") } + ) + } + + } + +} diff --git a/modules/pfr/gt/stat/tests/main.nf.test.snap b/modules/pfr/gt/stat/tests/main.nf.test.snap new file mode 100644 index 00000000..2fcfb8a8 --- /dev/null +++ b/modules/pfr/gt/stat/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "sarscov2-gff3": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gt.stat.yml:md5,ebba7831ddbf916b8bbea675ba8693b5" + ] + ], + "1": [ + "versions.yml:md5,a184b50afb2ad6dd2d3d37b0a211dd71" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.gt.stat.yml:md5,ebba7831ddbf916b8bbea675ba8693b5" + ] + ], + "versions": [ + "versions.yml:md5,a184b50afb2ad6dd2d3d37b0a211dd71" + ] + } + ], + "timestamp": "2023-11-29T11:34:48.057277" + } +} \ No newline at end of file diff --git a/modules/pfr/gt/stat/tests/tags.yml b/modules/pfr/gt/stat/tests/tags.yml new file mode 100644 index 00000000..46be6341 --- /dev/null +++ b/modules/pfr/gt/stat/tests/tags.yml @@ -0,0 +1,2 @@ +gt/stat: + - "modules/pfr/gt/stat/**" diff --git a/modules/pfr/lai/main.nf b/modules/pfr/lai/main.nf deleted file mode 100644 index 7d2cf3be..00000000 --- a/modules/pfr/lai/main.nf +++ /dev/null @@ -1,68 +0,0 @@ -process LAI { - tag "$meta.id" - label 'process_high' - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2': - 'quay.io/biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }" - - input: - tuple val(meta), path(fasta) - path pass_list - path annotation_out - path monoploid_seqs - - output: - tuple val(meta), path("*.LAI.log") , emit: log - tuple val(meta), path("*.LAI.out") , emit: lai_out , optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : '' - def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI" - """ - # Remove comments from genome fasta, - # otherwise LAI triggers its sequence name change logic - - sed \\ - '/^>/ s/\\s.*\$//' \\ - $fasta \\ - > for_lai_no_comments.fsa - - LAI \\ - -genome for_lai_no_comments.fsa \\ - -intact $pass_list \\ - -all $annotation_out \\ - -t $task.cpus \\ - $monoploid_param \\ - $args \\ - > "${prefix}.LAI.log" - - mv \\ - $lai_output_name \\ - "${prefix}.LAI.out" \\ - || echo "LAI did not produce the output file" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.LAI.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//') - END_VERSIONS - """ -} diff --git a/modules/pfr/lai/tests/main.nf.test b/modules/pfr/lai/tests/main.nf.test deleted file mode 100644 index 0787d6b6..00000000 --- a/modules/pfr/lai/tests/main.nf.test +++ /dev/null @@ -1,120 +0,0 @@ -nextflow_process { - - name "Test Process LAI" - script "../main.nf" - process "LAI" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "lai" - tag "gt/suffixerator" - tag "nf-core/gunzip" - tag "gt/ltrharvest" - tag "ltrretriever" - - test("homo_sapiens-genome_21_fasta-success") { - - setup { - run("GUNZIP") { - script "../../../nf-core/gunzip" - - process { - """ - input[0] = [ - [ id:'test' ], - file('/Users/hrauxr/Projects/nxf-modules/tests/data/chr1.fa.gz', checkIfExists: true) - ] - """ - } - } - - run("GT_SUFFIXERATOR") { - script "../../../pfr/gt/suffixerator" - - process { - """ - input[0] = GUNZIP.out.gunzip - """ - } - } - - run("GT_LTRHARVEST") { - script "../../../pfr/gt/ltrharvest" - - process { - """ - input[0] = GT_SUFFIXERATOR.out.index - """ - } - } - - run("LTRRETRIEVER") { - script "../../../pfr/ltrretriever" - - process { - """ - input[0] = GUNZIP.out.gunzip - input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout } - input[2] = [] - input[3] = [] - input[4] = [] - """ - } - } - } - - when { - process { - """ - input[0] = GUNZIP.out.gunzip - input[1] = LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list } - input[2] = LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out } - input[3] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert path(process.out.log.get(0).get(1)).getText().contains("Dependency checking: Passed!") }, - { assert path(process.out.log.get(0).get(1)).getText().contains("Calculate LAI:") }, - { assert path(process.out.log.get(0).get(1)).getText().contains("Total LTR sequence content (0%) is too low for accurate LAI calculation") }, - { assert path(process.out.log.get(0).get(1)).getText().contains("Sorry, LAI is not applicable on the current genome assembly.") }, - { assert process.out.lai_out == [] }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - - test("stub") { - - options '-stub' - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - ] - input[1] = [] - input[2] = [] - input[3] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/pfr/lai/tests/main.nf.test.snap b/modules/pfr/lai/tests/main.nf.test.snap deleted file mode 100644 index 751ddb60..00000000 --- a/modules/pfr/lai/tests/main.nf.test.snap +++ /dev/null @@ -1,10 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,2ac93e1e6324236af6f9a794bbac2099" - ] - ], - "timestamp": "2023-12-05T12:15:32.969684" - } -} \ No newline at end of file diff --git a/modules/pfr/lai/tests/nextflow.config b/modules/pfr/lai/tests/nextflow.config deleted file mode 100644 index 516a3e27..00000000 --- a/modules/pfr/lai/tests/nextflow.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: GT_SUFFIXERATOR { - ext.args = '-tis -suf -lcp -des -ssp -sds -dna' - } - - withName: GT_LTRHARVEST { - ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes' - } -} diff --git a/modules/pfr/lai/tests/tags.yml b/modules/pfr/lai/tests/tags.yml deleted file mode 100644 index 252295d7..00000000 --- a/modules/pfr/lai/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -lai: - - "modules/pfr/lai/**" diff --git a/modules/pfr/ltrfinder/environment.yml b/modules/pfr/ltrfinder/environment.yml index 3ffa7c33..7f354597 100644 --- a/modules/pfr/ltrfinder/environment.yml +++ b/modules/pfr/ltrfinder/environment.yml @@ -6,4 +6,4 @@ channels: - bioconda - defaults dependencies: - - "bioconda::edta=2.1.0" + - "bioconda::ltr_finder_parallel=1.1" diff --git a/modules/pfr/ltrfinder/main.nf b/modules/pfr/ltrfinder/main.nf index 6555c235..3e59e3c7 100644 --- a/modules/pfr/ltrfinder/main.nf +++ b/modules/pfr/ltrfinder/main.nf @@ -2,9 +2,10 @@ process LTRFINDER { tag "$meta.id" label 'process_high' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1': - 'quay.io/biocontainers/edta:2.1.0--hdfd78af_1' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_finder_parallel:1.1--hdfd78af_0': + 'biocontainers/ltr_finder_parallel:1.1--hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -18,10 +19,10 @@ process LTRFINDER { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - /usr/local/share/EDTA/bin/LTR_FINDER_parallel/LTR_FINDER_parallel \\ + LTR_FINDER_parallel \\ -seq $fasta \\ -threads $task.cpus \\ $args @@ -31,21 +32,21 @@ process LTRFINDER { cat <<-END_VERSIONS > versions.yml "${task.process}": - LTR_FINDER_parallel: \$(/usr/local/share/EDTA/bin/LTR_FINDER_parallel/LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') + LTR_FINDER_parallel: \$(LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') ltr_finder: \$(ltr_finder -h 2>&1 | grep 'ltr_finder' | sed 's/ltr_finder //') END_VERSIONS """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ touch "${prefix}.scn" touch "${prefix}.gff3" cat <<-END_VERSIONS > versions.yml "${task.process}": - LTR_FINDER_parallel: \$(/usr/local/share/EDTA/bin/LTR_FINDER_parallel/LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') + LTR_FINDER_parallel: \$(LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') ltr_finder: \$(ltr_finder -h 2>&1 | grep 'ltr_finder' | sed 's/ltr_finder //') END_VERSIONS """ diff --git a/modules/pfr/ltrfinder/meta.yml b/modules/pfr/ltrfinder/meta.yml index 7fdfa5f9..e3c672b9 100644 --- a/modules/pfr/ltrfinder/meta.yml +++ b/modules/pfr/ltrfinder/meta.yml @@ -13,7 +13,7 @@ keywords: - retrotransposon tools: - "LTR_FINDER_parallel": - description: A Perl wrapper for LTR_FINEDR + description: A Perl wrapper for LTR_FINDER homepage: "https://github.com/oushujun/LTR_FINDER_parallel" documentation: "https://github.com/oushujun/LTR_FINDER_parallel" tool_dev_url: "https://github.com/oushujun/LTR_FINDER_parallel" diff --git a/modules/pfr/ltrfinder/tests/main.nf.test b/modules/pfr/ltrfinder/tests/main.nf.test index 9b2fbb0b..447ce34d 100644 --- a/modules/pfr/ltrfinder/tests/main.nf.test +++ b/modules/pfr/ltrfinder/tests/main.nf.test @@ -7,16 +7,29 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "ltrfinder" + tag "gunzip/main" - test("homo_sapiens-genome_fasta-success") { + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + run('GUNZIP') { + script "../../gunzip/main" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + } when { process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = GUNZIP.out.gunzip """ } } @@ -25,7 +38,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(path(process.out.versions[0]).text).match("versions") } ) } @@ -40,7 +53,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) ] """ } @@ -49,12 +62,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert process.out.scn != null }, - { assert process.out.gff != null }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.versions[0]).text).match("stub_versions") } ) } } -} \ No newline at end of file +} diff --git a/modules/pfr/ltrfinder/tests/main.nf.test.snap b/modules/pfr/ltrfinder/tests/main.nf.test.snap index c0b7a205..54a2cee1 100644 --- a/modules/pfr/ltrfinder/tests/main.nf.test.snap +++ b/modules/pfr/ltrfinder/tests/main.nf.test.snap @@ -1,13 +1,64 @@ { + "actinidia_chinensis-genome_21_fasta_gz-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.scn:md5,006193c9eaf3f552ccb0369f159e7660" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,96e5305163939e4381e1b94b660dc0a2" + ] + ], + "2": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,96e5305163939e4381e1b94b660dc0a2" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,006193c9eaf3f552ccb0369f159e7660" + ] + ], + "versions": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:14:38.509965" + }, "versions": { "content": [ - [ - "versions.yml:md5,2cff73621bfc5c4abc40613c33bd92b8" - ] + "\"LTRFINDER\":\n LTR_FINDER_parallel: v1.1\n ltr_finder: v1.07\n" ], - "timestamp": "2023-12-05T09:09:31.335554" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:16:55.301422" }, - "homo_sapiens-genome_fasta-success": { + "stub": { "content": [ { "0": [ @@ -15,7 +66,7 @@ { "id": "test" }, - "test.scn:md5,2ce449dff751e59dbc292b6888491954" + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -23,18 +74,18 @@ { "id": "test" }, - "test.gff3:md5,a91c388a54d7694bd14a4b085935759c" + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ - "versions.yml:md5,2cff73621bfc5c4abc40613c33bd92b8" + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" ], "gff": [ [ { "id": "test" }, - "test.gff3:md5,a91c388a54d7694bd14a4b085935759c" + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "scn": [ @@ -42,14 +93,28 @@ { "id": "test" }, - "test.scn:md5,2ce449dff751e59dbc292b6888491954" + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,2cff73621bfc5c4abc40613c33bd92b8" + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" ] } ], - "timestamp": "2023-12-05T09:09:31.32397" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:14:43.054758" + }, + "stub_versions": { + "content": [ + "\"LTRFINDER\":\n LTR_FINDER_parallel: v1.1\n ltr_finder: v1.07\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:16:59.800724" } } \ No newline at end of file diff --git a/modules/pfr/ltrfinder/tests/tags.yml b/modules/pfr/ltrfinder/tests/tags.yml index 4f613338..006ded2c 100644 --- a/modules/pfr/ltrfinder/tests/tags.yml +++ b/modules/pfr/ltrfinder/tests/tags.yml @@ -1,2 +1,2 @@ ltrfinder: - - "modules/pfr/ltrfinder/**" + - "modules/nf-core/ltrfinder/**" diff --git a/modules/pfr/ltrharvest/environment.yml b/modules/pfr/ltrharvest/environment.yml new file mode 100644 index 00000000..9337fe23 --- /dev/null +++ b/modules/pfr/ltrharvest/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ltrharvest" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ltr_harvest_parallel=1.1" diff --git a/modules/pfr/edta/ltrharvest/main.nf b/modules/pfr/ltrharvest/main.nf similarity index 63% rename from modules/pfr/edta/ltrharvest/main.nf rename to modules/pfr/ltrharvest/main.nf index 06887f1c..1e5e06d2 100644 --- a/modules/pfr/edta/ltrharvest/main.nf +++ b/modules/pfr/ltrharvest/main.nf @@ -1,10 +1,11 @@ -process EDTA_LTRHARVEST { +process LTRHARVEST { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1': - 'quay.io/biocontainers/edta:2.1.0--hdfd78af_1' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_harvest_parallel:1.1--hdfd78af_0': + 'biocontainers/ltr_harvest_parallel:1.1--hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -21,7 +22,7 @@ process EDTA_LTRHARVEST { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - /usr/local/share/EDTA/bin/LTR_HARVEST_parallel/LTR_HARVEST_parallel \\ + LTR_HARVEST_parallel \\ -seq $fasta \\ $args \\ -threads $task.cpus @@ -34,7 +35,7 @@ process EDTA_LTRHARVEST { cat <<-END_VERSIONS > versions.yml "${task.process}": - LTR_HARVEST_parallel: \$(/usr/local/share/EDTA/bin/LTR_HARVEST_parallel/LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') + LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //') END_VERSIONS """ @@ -48,7 +49,7 @@ process EDTA_LTRHARVEST { cat <<-END_VERSIONS > versions.yml "${task.process}": - LTR_HARVEST_parallel: \$(/usr/local/share/EDTA/bin/LTR_HARVEST_parallel/LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') + LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //') END_VERSIONS """ diff --git a/modules/pfr/edta/ltrharvest/meta.yml b/modules/pfr/ltrharvest/meta.yml similarity index 78% rename from modules/pfr/edta/ltrharvest/meta.yml rename to modules/pfr/ltrharvest/meta.yml index 1b6a969c..256b3ce5 100644 --- a/modules/pfr/edta/ltrharvest/meta.yml +++ b/modules/pfr/ltrharvest/meta.yml @@ -1,6 +1,6 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "edta_ltrharvest" +name: "ltrharvest" description: | Predicts LTR retrotransposons using the parallel version of GenomeTools gt-ltrharvest utility included in the EDTA toolchain @@ -12,13 +12,12 @@ keywords: - transposons - retrotransposons tools: - - "edta": - description: Extensive de-novo TE Annotator (EDTA) - homepage: "https://github.com/oushujun/EDTA" - documentation: "https://github.com/oushujun/EDTA" - tool_dev_url: "https://github.com/oushujun/EDTA" - doi: "10.1186/s13059-019-1905-y" - licence: ["GPL v3"] + - "LTR_HARVEST_parallel": + description: A Perl wrapper for LTR_harvest + homepage: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + documentation: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + tool_dev_url: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + licence: ["MIT"] - "gt": description: "The GenomeTools genome analysis system" homepage: "https://genometools.org/index.html" diff --git a/modules/pfr/edta/ltrharvest/tests/main.nf.test b/modules/pfr/ltrharvest/tests/main.nf.test similarity index 80% rename from modules/pfr/edta/ltrharvest/tests/main.nf.test rename to modules/pfr/ltrharvest/tests/main.nf.test index 5f181873..9226bc0c 100644 --- a/modules/pfr/edta/ltrharvest/tests/main.nf.test +++ b/modules/pfr/ltrharvest/tests/main.nf.test @@ -1,13 +1,12 @@ nextflow_process { - name "Test Process EDTA_LTRHARVEST" + name "Test Process LTRHARVEST" script "../main.nf" - process "EDTA_LTRHARVEST" + process "LTRHARVEST" tag "modules" tag "modules_nfcore" - tag "edta" - tag "edta/ltrharvest" + tag "ltrharvest" test("homo_sapiens-genome_21_fasta") { @@ -25,7 +24,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot(process.out.gff3).match("gff3") }, + { assert path(process.out.scn[0][1]).text.contains("46510803 46520182 9380 46510803 46510940 138 46520042 46520182 141 86.52 0 chr21") }, { assert snapshot(path(process.out.versions[0]).text).match("script_versions") } ) } @@ -57,4 +57,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/ltrharvest/tests/main.nf.test.snap b/modules/pfr/ltrharvest/tests/main.nf.test.snap new file mode 100644 index 00000000..ad47c4ae --- /dev/null +++ b/modules/pfr/ltrharvest/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "homo_sapiens-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ], + "gff3": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-22T14:44:30.682167" + }, + "script_versions": { + "content": [ + "\"LTRHARVEST\":\n LTR_HARVEST_parallel: v1.1\n genometools: 1.6.5\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-22T14:44:26.672478" + }, + "gff3": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gff3:md5,da13c4ba22e44ef944ddec38aa72c468" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-22T19:29:33.962761" + }, + "stub_versions": { + "content": [ + "\"LTRHARVEST\":\n LTR_HARVEST_parallel: v1.1\n genometools: 1.6.5\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-22T14:44:30.729166" + } +} \ No newline at end of file diff --git a/modules/pfr/ltrharvest/tests/tags.yml b/modules/pfr/ltrharvest/tests/tags.yml new file mode 100644 index 00000000..92de225e --- /dev/null +++ b/modules/pfr/ltrharvest/tests/tags.yml @@ -0,0 +1,2 @@ +ltrharvest: + - "modules/nf-core/ltrharvest/**" diff --git a/modules/pfr/ltrretriever/lai/environment.yml b/modules/pfr/ltrretriever/lai/environment.yml new file mode 100644 index 00000000..e0e49682 --- /dev/null +++ b/modules/pfr/ltrretriever/lai/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ltrretriever_lai" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::LTR_retriever=2.9.9" diff --git a/modules/pfr/ltrretriever/lai/main.nf b/modules/pfr/ltrretriever/lai/main.nf new file mode 100644 index 00000000..464b215b --- /dev/null +++ b/modules/pfr/ltrretriever/lai/main.nf @@ -0,0 +1,71 @@ +process LTRRETRIEVER_LAI { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.9--hdfd78af_0': + 'biocontainers/ltr_retriever:2.9.9--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path pass_list + path annotation_out + path monoploid_seqs + + output: + tuple val(meta), path("*.LAI.log") , emit: log + tuple val(meta), path("*.LAI.out") , emit: lai_out , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : '' + def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI" + def VERSION = 'beta3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + LAI \\ + -genome $fasta \\ + -intact $pass_list \\ + -all $annotation_out \\ + -t $task.cpus \\ + $monoploid_param \\ + $args \\ + > >(tee "${prefix}.LAI.log") \\ + || echo "LAI failed! See ${prefix}.LAI.log" + + mv \\ + $lai_output_name \\ + "${prefix}.LAI.out" \\ + || echo "LAI failed to estimate assembly index. See ${prefix}.LAI.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lai: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : '' + def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI" + def VERSION = 'beta3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch "${prefix}.LAI.log" + touch "$lai_output_name" + + mv \\ + $lai_output_name \\ + "${prefix}.LAI.out" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lai: $VERSION + END_VERSIONS + """ +} diff --git a/modules/pfr/lai/meta.yml b/modules/pfr/ltrretriever/lai/meta.yml similarity index 91% rename from modules/pfr/lai/meta.yml rename to modules/pfr/ltrretriever/lai/meta.yml index 6fd7aef6..f84cf6ca 100644 --- a/modules/pfr/lai/meta.yml +++ b/modules/pfr/ltrretriever/lai/meta.yml @@ -1,7 +1,9 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "lai" -description: Estimates the mean LTR sequence identity in the genome +name: "ltrretriever_lai" +description: | + Estimates the mean LTR sequence identity in the genome. The input genome fasta should + have short alphanumeric IDs without comments keywords: - genomics - annotation diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test b/modules/pfr/ltrretriever/lai/tests/main.nf.test new file mode 100644 index 00000000..df7db2cf --- /dev/null +++ b/modules/pfr/ltrretriever/lai/tests/main.nf.test @@ -0,0 +1,166 @@ +nextflow_process { + + name "Test Process LTRRETRIEVER_LAI" + script "../main.nf" + process "LTRRETRIEVER_LAI" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gunzip" + tag "ltrretriever" + tag "ltrretriever/ltrretriever" + tag "ltrretriever/lai" + tag "ltrharvest" + tag "ltrfinder" + tag "cat/cat" + + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + + run("GUNZIP") { + script "../../../gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + + run("LTRHARVEST") { + script "../../../ltrharvest" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("LTRFINDER") { + script "../../../ltrfinder" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("CAT_CAT") { + script "../../../cat/cat" + + process { + """ + input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple() + """ + } + } + + run("LTRRETRIEVER_LTRRETRIEVER") { + script "../../ltrretriever" + + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = LTRRETRIEVER_LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list } + input[2] = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out } + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log[0][1]).text.contains("Dependency checking: Passed!") }, + { assert path(process.out.log[0][1]).text.contains("Calculate LAI:") }, + { assert path(process.out.log[0][1]).text.contains("Done!") }, + { assert path(process.out.log[0][1]).text.contains("Result file:") }, + { assert Math.abs(Float.parseFloat(path(process.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + def pass_list = new File('test.pass.list') + def out_file = new File('test.out') + def monoploid_seqs = new File('some_seqs.list.txt') + + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + input[1] = pass_list.toPath() + input[2] = out_file.toPath() + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub_with_monoploid_seqs") { + + options '-stub' + + when { + process { + """ + def pass_list = new File('test.pass.list') + def out_file = new File('test.out') + def monoploid_seqs = new File('some_seqs.list.txt') + + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + input[1] = pass_list.toPath() + input[2] = out_file.toPath() + input[3] = monoploid_seqs.toPath() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/pfr/edta/ltrharvest/tests/main.nf.test.snap b/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap similarity index 51% rename from modules/pfr/edta/ltrharvest/tests/main.nf.test.snap rename to modules/pfr/ltrretriever/lai/tests/main.nf.test.snap index 273db9a6..e1c8086b 100644 --- a/modules/pfr/edta/ltrharvest/tests/main.nf.test.snap +++ b/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "homo_sapiens-genome_21_fasta": { + "stub": { "content": [ { "0": [ @@ -7,7 +7,7 @@ { "id": "test" }, - "test.gff3:md5,da13c4ba22e44ef944ddec38aa72c468" + "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -15,30 +15,30 @@ { "id": "test" }, - "test.scn:md5,65a6b80823a3f058142aed623a028a22" + "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ - "versions.yml:md5,da7912721842adc402c15740be57751f" + "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd" ], - "gff3": [ + "lai_out": [ [ { "id": "test" }, - "test.gff3:md5,da13c4ba22e44ef944ddec38aa72c468" + "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "scn": [ + "log": [ [ { "id": "test" }, - "test.scn:md5,65a6b80823a3f058142aed623a028a22" + "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,da7912721842adc402c15740be57751f" + "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd" ] } ], @@ -46,9 +46,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-02T21:08:07.746204" + "timestamp": "2024-02-22T20:09:00.558021" }, - "homo_sapiens-genome_fasta-stub": { + "stub_with_monoploid_seqs": { "content": [ { "0": [ @@ -56,7 +56,7 @@ { "id": "test" }, - "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -64,30 +64,30 @@ { "id": "test" }, - "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ - "versions.yml:md5,da7912721842adc402c15740be57751f" + "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd" ], - "gff3": [ + "lai_out": [ [ { "id": "test" }, - "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "scn": [ + "log": [ [ { "id": "test" }, - "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,da7912721842adc402c15740be57751f" + "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd" ] } ], @@ -95,26 +95,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-02T21:08:13.587479" - }, - "script_versions": { - "content": [ - "\"EDTA_LTRHARVEST\":\n LTR_HARVEST_parallel: v1.1\n genometools: 1.6.2\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-02T21:08:07.763375" - }, - "stub_versions": { - "content": [ - "\"EDTA_LTRHARVEST\":\n LTR_HARVEST_parallel: v1.1\n genometools: 1.6.2\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-02T21:08:13.601529" + "timestamp": "2024-02-22T20:10:08.213842" } } \ No newline at end of file diff --git a/modules/pfr/ltrretriever/lai/tests/nextflow.config b/modules/pfr/ltrretriever/lai/tests/nextflow.config new file mode 100644 index 00000000..75edf1a9 --- /dev/null +++ b/modules/pfr/ltrretriever/lai/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + withName: LTRHARVEST { + ext.prefix = { "${meta.id}_ltrharvest" } + } + + withName: LTRFINDER { + ext.args = '-harvest_out -size 1000000 -time 300' + // recommended parameters: https://github.com/oushujun/LTR_retriever#usage + } + + withName: CAT_CAT { + ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } + } +} diff --git a/modules/pfr/ltrretriever/lai/tests/tags.yml b/modules/pfr/ltrretriever/lai/tests/tags.yml new file mode 100644 index 00000000..470f4687 --- /dev/null +++ b/modules/pfr/ltrretriever/lai/tests/tags.yml @@ -0,0 +1,2 @@ +ltrretriever/lai: + - "modules/nf-core/ltrretriever/lai/**" diff --git a/modules/pfr/ltrretriever/ltrretriever/environment.yml b/modules/pfr/ltrretriever/ltrretriever/environment.yml new file mode 100644 index 00000000..f49f07e7 --- /dev/null +++ b/modules/pfr/ltrretriever/ltrretriever/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ltrretriever_ltrretriever" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::LTR_retriever=2.9.9" diff --git a/modules/pfr/ltrretriever/main.nf b/modules/pfr/ltrretriever/ltrretriever/main.nf similarity index 91% rename from modules/pfr/ltrretriever/main.nf rename to modules/pfr/ltrretriever/ltrretriever/main.nf index 1e673dd5..f4577920 100644 --- a/modules/pfr/ltrretriever/main.nf +++ b/modules/pfr/ltrretriever/ltrretriever/main.nf @@ -1,10 +1,11 @@ -process LTRRETRIEVER { +process LTRRETRIEVER_LTRRETRIEVER { tag "$meta.id" label 'process_high' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2': - 'quay.io/biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.9--hdfd78af_0': + 'biocontainers/ltr_retriever:2.9.9--hdfd78af_0' }" input: tuple val(meta), path(genome) diff --git a/modules/pfr/ltrretriever/meta.yml b/modules/pfr/ltrretriever/ltrretriever/meta.yml similarity index 96% rename from modules/pfr/ltrretriever/meta.yml rename to modules/pfr/ltrretriever/ltrretriever/meta.yml index c0bfc240..a310b04a 100644 --- a/modules/pfr/ltrretriever/meta.yml +++ b/modules/pfr/ltrretriever/ltrretriever/meta.yml @@ -1,7 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "ltrretriever" -description: write your description here +name: "ltrretriever_ltrretriever" +description: Identifies LTR retrotransposons using LTR_retriever keywords: - genomics - annotation diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test new file mode 100644 index 00000000..f6ab43db --- /dev/null +++ b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test @@ -0,0 +1,133 @@ +nextflow_process { + + name "Test Process LTRRETRIEVER_LTRRETRIEVER" + script "../main.nf" + process "LTRRETRIEVER_LTRRETRIEVER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ltrretriever" + tag "ltrretriever/ltrretriever" + tag "gunzip/main" + tag "gt/ltrharvest" + tag "gt/suffixerator" + tag "ltrfinder" + tag "cat/cat" + + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + + run('GUNZIP') { + script "../../../gunzip/main" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + + run("GT_SUFFIXERATOR") { + script "../../../gt/suffixerator" + + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'dna' + """ + } + } + + run("GT_LTRHARVEST") { + script "../../../gt/ltrharvest" + + process { + """ + input[0] = GT_SUFFIXERATOR.out.index + """ + } + } + + run("LTRFINDER") { + script "../../../ltrfinder" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("CAT_CAT") { + script "../../../cat/cat" + + process { + """ + input[0] = GT_LTRHARVEST.out.tabout.mix(LTRFINDER.out.scn).groupTuple() + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log[0][1]).text.contains("####### Result files #########") }, + { assert snapshot(process.out.pass_list).match("pass_list") }, + { assert path(process.out.pass_list_gff[0][1]).text.contains("chr1\tLTR_retriever\ttarget_site_duplication") }, + { assert path(process.out.ltrlib[0][1]).text.contains("LTR#LTR/Copia") }, + { assert snapshot(process.out.annotation_out).match("annotation_out") }, + { assert path(process.out.annotation_gff[0][1]).text.contains("Classification=LTR/Copia") }, + { assert snapshot(path(process.out.versions[0]).text).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.versions[0]).text).match("versions_stub") } + ) + } + + } + +} diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap new file mode 100644 index 00000000..bcf98638 --- /dev/null +++ b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap @@ -0,0 +1,169 @@ +{ + "versions_stub": { + "content": [ + "\"LTRRETRIEVER_LTRRETRIEVER\":\n LTR_retriever: v2.9.9\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:04:16.007262" + }, + "pass_list": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.pass.list:md5,0c96ee3b48691e65da2235786a926160" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:17:50.087449" + }, + "versions": { + "content": [ + "\"LTRRETRIEVER_LTRRETRIEVER\":\n LTR_retriever: v2.9.9\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:17:50.208819" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.pass.list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pass.list.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ], + "annotation_gff": [ + [ + { + "id": "test" + }, + "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "annotation_out": [ + [ + { + "id": "test" + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ltrlib": [ + [ + { + "id": "test" + }, + "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pass_list": [ + [ + { + "id": "test" + }, + "test.pass.list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pass_list_gff": [ + [ + { + "id": "test" + }, + "test.pass.list.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:04:15.954424" + }, + "annotation_out": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.out:md5,4ecf9226cbd7a3aaf7cf5cfa575fcc6a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:17:50.150622" + } +} \ No newline at end of file diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config b/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config new file mode 100644 index 00000000..11499594 --- /dev/null +++ b/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config @@ -0,0 +1,21 @@ +process { + + withName: GT_SUFFIXERATOR { + ext.args = '-suf -lcp' + // GT_LTRHARVEST requires -suf, -lcp + } + + withName: LTRFINDER { + ext.args = '-harvest_out' + // LTRRETRIEVER requires -harvest_out + } + + withName: GT_LTRHARVEST { + ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes' + // recommended parameters: https://github.com/oushujun/LTR_retriever#usage + } + + withName: CAT_CAT { + ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } + } +} diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml b/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml new file mode 100644 index 00000000..67241ccb --- /dev/null +++ b/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml @@ -0,0 +1,2 @@ +ltrretriever/ltrretriever: + - "modules/nf-core/ltrretriever/ltrretriever/**" diff --git a/modules/pfr/ltrretriever/tests/main.nf.test b/modules/pfr/ltrretriever/tests/main.nf.test deleted file mode 100644 index 2121e7f1..00000000 --- a/modules/pfr/ltrretriever/tests/main.nf.test +++ /dev/null @@ -1,104 +0,0 @@ -nextflow_process { - - name "Test Process LTRRETRIEVER" - script "../main.nf" - process "LTRRETRIEVER" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "ltrretriever" - tag "gt/ltrharvest" - tag "gt/suffixerator" - - test("homo_sapiens-genome_21_fasta-success") { - - setup { - run("GT_SUFFIXERATOR") { - script "../../../pfr/gt/suffixerator" - - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - ] - """ - } - } - - run("GT_LTRHARVEST") { - script "../../../pfr/gt/ltrharvest" - - process { - """ - input[0] = GT_SUFFIXERATOR.out.index - """ - } - } - } - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - ] - input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout } - input[2] = [] - input[3] = [] - input[4] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.pass_list).match("pass_list") }, - { assert path(process.out.pass_list_gff.get(0).get(1)).getText().contains("chr21\tLTR_retriever\ttarget_site_duplication\t40960698\t40960702") }, - { assert process.out.ltrlib != null }, - { assert process.out.annotation_out == [] }, - { assert process.out.annotation_gff == [] }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - - test("stub") { - - options '-stub' - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - ] - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.log != null }, - { assert process.out.pass_list != null }, - { assert process.out.pass_list_gff != null }, - { assert process.out.ltrlib != null }, - { assert process.out.annotation_out == [] }, - { assert process.out.annotation_gff == [] } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/pfr/ltrretriever/tests/main.nf.test.snap b/modules/pfr/ltrretriever/tests/main.nf.test.snap deleted file mode 100644 index a6e196bc..00000000 --- a/modules/pfr/ltrretriever/tests/main.nf.test.snap +++ /dev/null @@ -1,23 +0,0 @@ -{ - "pass_list": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.pass.list:md5,4e99412b54fd99cac2ae533a51cbd4e1" - ] - ] - ], - "timestamp": "2023-12-04T15:45:22.007912" - }, - "versions": { - "content": [ - [ - "versions.yml:md5,bd6cf2de800197f0d726ba1dfa1d6be4" - ] - ], - "timestamp": "2023-12-04T15:45:22.026825" - } -} \ No newline at end of file diff --git a/modules/pfr/ltrretriever/tests/nextflow.config b/modules/pfr/ltrretriever/tests/nextflow.config deleted file mode 100644 index 22183f52..00000000 --- a/modules/pfr/ltrretriever/tests/nextflow.config +++ /dev/null @@ -1,14 +0,0 @@ -process { - - withName: GT_SUFFIXERATOR { - ext.args = '-tis -suf -lcp -des -ssp -sds -dna' - } - - withName: GT_LTRHARVEST { - ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes' - } - - withName: LTRRETRIEVER { - ext.args = '-noanno' - } -} diff --git a/modules/pfr/ltrretriever/tests/tags.yml b/modules/pfr/ltrretriever/tests/tags.yml deleted file mode 100644 index 1837f0db..00000000 --- a/modules/pfr/ltrretriever/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -ltrretriever: - - "modules/pfr/ltrretriever/**" diff --git a/modules/pfr/samblaster/environment.yml b/modules/pfr/samblaster/environment.yml index f956283e..ac838241 100644 --- a/modules/pfr/samblaster/environment.yml +++ b/modules/pfr/samblaster/environment.yml @@ -1,8 +1,11 @@ name: samblaster + channels: - conda-forge - bioconda - defaults + dependencies: + - bioconda::htslib=1.19.1 - bioconda::samblaster=0.1.26 - - bioconda::samtools=1.16.1 + - bioconda::samtools=1.19.2 diff --git a/modules/pfr/samblaster/main.nf b/modules/pfr/samblaster/main.nf index 160d1dd2..c9e89af0 100644 --- a/modules/pfr/samblaster/main.nf +++ b/modules/pfr/samblaster/main.nf @@ -1,11 +1,11 @@ process SAMBLASTER { tag "$meta.id" label 'process_low' - label "process_long" - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' : - 'quay.io/biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' }" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' : + 'biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' }" input: tuple val(meta), path(bam) @@ -34,4 +34,17 @@ process SAMBLASTER { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch "${prefix}.bam" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' ) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/pfr/samblaster/meta.yml b/modules/pfr/samblaster/meta.yml index ccb48320..5c1e5a97 100644 --- a/modules/pfr/samblaster/meta.yml +++ b/modules/pfr/samblaster/meta.yml @@ -51,3 +51,4 @@ authors: - "@lescai" maintainers: - "@lescai" + - "@gallvp" diff --git a/modules/pfr/samblaster/tests/main.nf.test b/modules/pfr/samblaster/tests/main.nf.test new file mode 100644 index 00000000..01794307 --- /dev/null +++ b/modules/pfr/samblaster/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process SAMBLASTER" + script "../main.nf" + process "SAMBLASTER" + + tag "modules" + tag "modules_nfcore" + tag "samblaster" + + test("homo_sapiens-test_paired_end_umi_unsorted_bam") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/pfr/samblaster/tests/main.nf.test.snap b/modules/pfr/samblaster/tests/main.nf.test.snap new file mode 100644 index 00000000..917c8f1f --- /dev/null +++ b/modules/pfr/samblaster/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:04:42.510824" + }, + "homo_sapiens-test_paired_end_umi_unsorted_bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,634a6bd541478e970f0a4c279f399889" + ] + ], + "1": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,634a6bd541478e970f0a4c279f399889" + ] + ], + "versions": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:04:38.118875" + } +} \ No newline at end of file diff --git a/modules/pfr/samblaster/tests/nextflow.config b/modules/pfr/samblaster/tests/nextflow.config new file mode 100644 index 00000000..605e74eb --- /dev/null +++ b/modules/pfr/samblaster/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: SAMBLASTER { + ext.args = '-M --addMateTags' + ext.prefix = { "${meta.id}.processed" } + } +} diff --git a/modules/pfr/samblaster/tests/tags.yml b/modules/pfr/samblaster/tests/tags.yml new file mode 100644 index 00000000..3882ee54 --- /dev/null +++ b/modules/pfr/samblaster/tests/tags.yml @@ -0,0 +1,2 @@ +samblaster: + - "modules/nf-core/samblaster/**" diff --git a/nextflow.config b/nextflow.config index 3450387b..f0ee1b5b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,231 +1,303 @@ -includeConfig './conf/base.config' - +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/assemblyqc Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs params { - // FASTA files (fasta, fasta.gz) for the assemblies to QC - // - // Pattern: [["tag", "file path"]] - // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3; - // Any name with alphanumeric characters including "_". - // "." is not allowed in the tag name - // Unique, short tags are recommended. - // Otherwise, some of the plots in the report may not display correctly. - // Examples: - // target_assemblies = [["tag1", "./a/relative/path/to/the/fasta/file.fasta"], - // ["tag2", "./a/relative/path/to/the/fasta/file2.fasta"], - // ["tag3", "https://ftp.ncbi.nlm.nih.gov/genomes/test_genome.fna"], ...] - // target_assemblies = [["tair10", "/an/absolute/path/to/the/fasta/file.fasta"]] - target_assemblies = [ - ["hap1", "/workspace/assembly_qc/test_data/default/test_data1.fasta.gz"], - ["hap2", "/workspace/assembly_qc/test_data/default/test_data2.fasta"] - ] - - // GFF3 files (gff3, gff3.gz) for the assemblies to QC - // - // Optional Set to [] if not needed such as assembly_gff3 = [] - // - // Not all assembly gff3 files have to be provided such as: - // assembly_gff3 = [["hap1", "/workspace/assembly_qc/test_data/default/test_data1.gff3"]] - // - // Each gff3 file should have an associated (by tag) fasta file in target_assemblies. - // - // Multiple gff3 files can be associated (by tag) with a single fasta file in target_assemblies. - assembly_gff3 = [ - ["hap1", "/workspace/assembly_qc/test_data/default/test_data1.gff3.gz"], - ["hap2", "/workspace/assembly_qc/test_data/default/test_data2.gff3"] - ] - - assemblathon_stats { - // The number of 'N's for the unknown gap size. - // This number is used to split the scaffolds into contigs - // to compute contig-related stats such as the number of contigs, N50, etc. - // NCBI recommendation(https://www.ncbi.nlm.nih.gov/genbank/wgs_gapped/) is 100 - n_limit = 100 - } - - ncbi_fcs_adaptor { - skip = 0 // 1: Skip, 0: Don't + // Input options + input = null + + // Assemblathon stats options + assemblathon_stats_n_limit = 100 + + // NCBI FCS options + ncbi_fcs_adaptor_skip = true + ncbi_fcs_adaptor_empire = null + + ncbi_fcs_gx_skip = true + ncbi_fcs_gx_tax_id = null + ncbi_fcs_gx_db_path = null + + // BUSCO options + busco_skip = true + busco_mode = null + busco_lineage_datasets = null + busco_download_path = null + + // TIDK options + tidk_skip = true + tidk_repeat_seq = null + tidk_filter_by_size = false + tidk_filter_size_bp = 1000000 + + // LAI options + lai_skip = true + + // kraken2 options + kraken2_skip = true + kraken2_db_path = null + + // HiC options + hic = null + hic_skip_fastp = false + hic_skip_fastqc = false + hic_fastp_ext_args = '--qualified_quality_phred 20 --length_required 50' + + // Synteny options + synteny_skip = true + synteny_between_input_assemblies = true + synteny_many_to_many_align = false + synteny_max_gap = 1000000 + synteny_min_bundle_size = 1000 + synteny_plot_1_vs_all = true + synteny_color_by_contig = true + synteny_xref_assemblies = null + + // Output options + outdir = './results' + email = null + + // Max resource options + max_memory = '512.GB' + max_cpus = 16 + max_time = '7.day' + + // Boilerplate options + publish_dir_mode = 'copy' + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = '' + validationShowHiddenParams = false + validate_params = true - empire = 'euk' // euk: Eukaryote, prok: Prokaryote - - // For interpretation of results, see: - // https://github.com/ncbi/fcs/wiki/FCS-adaptor#rules-for-action-assignment - // - // If adaptors are found, the pipeline stops with a report of adaptor contamination - } - - ncbi_fcs_gx { - // This is a very slow module. Skip it unless you really need it. - skip = 0 +} - // Get tax ID from https://www.ncbi.nlm.nih.gov/taxonomy - // Example assembly is from Neonectria ditissima - tax_id = "78410" +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' - // NCBI FCS GX DB path - // Due to enormity of the DB size, the pipeline does NOT download the data. - // It must be setup by the user manually before running the pipeline. - // See instructions for DB setup: https://github.com/ncbi/fcs/wiki/FCS-GX - db_path = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} - // For interpretation of results, see: - // https://github.com/ncbi/fcs/wiki/FCS-GX#fcs-gx-report-output +// Load plant-food-research-open/assemblyqc custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/assemblyqc.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/assemblyqc profiles: ${params.custom_config_base}/pipeline/assemblyqc.config") +// } +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } - - busco { - skip = 0 - - // geno or genome, for genome assemblies (DNA) - // tran or transcriptome, for transcriptome assemblies (DNA) - // prot or proteins, for annotated gene sets (protein) - mode = "geno" - - // BUSCO runs for each assembly in combination with each lineage - // Full cartesian product: Assembly x Lineage - // - // To select a lineage, see https://busco.ezlab.org/list_of_lineages.html - lineage_datasets = ["fungi_odb10", "microsporidia_odb10"] - - // BUSCO DB download path - // The pipeline automatically downloads the required DB if needed - download_path = "/workspace/ComparativeDataSources/BUSCO/assembly_qc" + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } - - tidk { - skip = 0 - - // To select an appropriate sequence, see http://telomerase.asu.edu/sequences_telomere.html - // Plant: TTTAGGG, Fungus, Vertebrates: TTAGGG, Insect: TTAGG - repeat_seq = "TTAGGG" - - // 1: Filter assembly sequences by size, 0: Include all assembly sequences in the TIDK plot - filter_by_size = 0 - - // Filter size in base-pairs. Any sequence smaller than this size is filtered if filter_by_size=1 - // Default: 1000000 (1Mbp) - filter_size_bp = 1000000 + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } - - lai { - skip = 0 - - // In calculation of LAI, this module assumes that each assembly specified in - // the target_assemblies is monoploid (1x) or only contains monoploid sequences. - // If this is not the case, use the following parameter to supply a single column - // text file listing IDs of the monoploid sequences for a polyploid assembly included - // in target_assemblies. The pipeline will only uses these sequences to compute LAI. - // - // This parameter is optional and can be set to [] if not needed. - // - // Pattern: - // monoploid_seqs = [["tag", "file_path1"], ["tag2", "file_path2"]] - monoploid_seqs = [] + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + // docker.runOptions = '-u $(id -u):$(id -g)' } - - kraken2 { - skip = 0 - - // To select a DB, see https://benlangmead.github.io/aws-indexes/k2 - // Due to enormity of the DB size, the pipeline does NOT download the data. - // - // Recommended PlusPFP: archaea, viral, plasmid, human, UniVec_Core, protozoa, fungi & plant - db_path = "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" + arm { + // docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '--platform=linux/amd64' } - - hic { - skip = 0 - - // A relative or absolute path to paired reads: *_R{1,2}.(fastq|fq).gz - // OR SRA: SRR8238190 - paired_reads = "/input/genomic/fungal/Neonectria/Genome/20190506_CAGRF19591_CGYCF_HiC/PG_PETUNIA_HiC_CGYCF_CACTCA_L001_R{1,2}.fastq.gz" + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } - - synteny { - skip = 0 - - between_target_asm = 1 // 1: Create syntenic plots between each pair of target_assemblies, 0: Don't - - // Assembly sequence list - // A two column tab-delimited txt file listing fasta sequence ids (first column) and - // labels for the synteny plots (second column). - // - // Filter sequences: If a sequence is missing from this file, it is excluded from the synteny plot even if - // it is present in the fasta file. - // - // Note: The sequence labels (second column) must be unique across the synteny assemblies. - // Otherwise, the pipeline fails with an error. - // - // Example tab-delimited file contents are listed here: - // Nd324_chr_1 ND_1 - // Nd324_chr_2 ND_2 - // - // Pattern: [["tag", "path to assembly sequence list"]] - assembly_seq_list = [ - ["hap1", "/workspace/assembly_qc/test_data/default/test_data1.seq.list"], - ["hap2", "/workspace/assembly_qc/test_data/default/test_data2.seq.list"] - ] - - // A list of reference assemblies. - // Syntenic plots are created between each assembly from target_assemblies and xref_assemblies. - // - // Optional Can be set to [] if not needed such as xref_assemblies = [] - // In this case, the synteny is only performed between target assemblies if between_target_asm is 1. - // - // Pattern: [["tag", "fasta file (fasta, fasta.gz) path", "seq.list file path"]] - xref_assemblies = [ - ["GenomeA", "/workspace/assembly_qc/test_data/default/test_data3.fasta", "/workspace/assembly_qc/test_data/default/test_data3.seq.list"], - ["GenomeB", "/workspace/assembly_qc/test_data/default/test_data4.fasta.gz", "/workspace/assembly_qc/test_data/default/test_data4.seq.list"] - ] - - // 0: Only include alignment blocks with 1-to-1 mappings (dnadiff .1coords file) - // 1: Include alignment blocks with many-to-many mappings (dnadiff .mcoords file) - many_to_many_align = 0 - - max_gap = 1000000 // 1 Mbp; Alignments within this distance are bundled together - min_bundle_size = 1000 // 1 Kbp; After bundling, any bundle smaller than this size is filtered out - - // 1: Create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly - // 0: Only plot synteny for all contigs of target assembly versus all contigs of reference assembly - plot_1_vs_all = 1 - - // 1: Synteny plot is colored by contig - // 0: Synteny plot is colored by the number of links in a bundle - color_by_contig = 1 + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } +} - outdir = "./results" +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} - // The resources are capped at their following maximum values. - // The pipeline does not use additional resources even if - // the following values are increased. These values only set the - // upper bound. - // - // To truly increase the max resources above the following values, - // change the process classes defined in './conf/base.config'. - max_cpus = 12 +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] - // NCBI_FCS_GX requires at least 512 GBs - max_memory = 512.GB +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false - // SYNTENY::DNADIFF can take up to a week - // - // Target and Reference assemblies: 2.5 GB - // MUMMER .delta file: 1 GB - // Execution time: 134 hours - max_time = 7.days +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } - -apptainer { - // This is where the pipeline looks for and stores the singularity - // containers. - cacheDir = "/workspace/assembly_qc/singularity" +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { - name = "AssemblyQC" - nextflowVersion = '!>=22.04.3' + name = 'plant-food-research-open/assemblyqc' + author = """Usman Rashid, Ken Smith, Ross Crowhurst, Chen Wu, Marcus Davy""" + homePage = 'https://github.com/plant-food-research-open/assemblyqc' + description = """A NextFlow pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report.""" + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.0' + version = '1.4' + doi = '10.5281/zenodo.10647870' } -includeConfig './conf/modules.config' -includeConfig './conf/reporting_defaults.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..f16a18ba --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,463 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/nextflow_schema.json", + "title": "plant-food-research-open/assemblyqc pipeline parameters", + "description": "A NextFlow pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report.", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "description": "", + "default": "", + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "csv", + "schema": "assets/schema_input.json", + "help_text": "FASTA and other associated files for input assemblies provided as a formatted CSV file", + "description": "Input assembly sheet in CSV format", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "default": "./results" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + }, + "required": ["input", "outdir"] + }, + "general_stats_options": { + "title": "General stats options", + "type": "object", + "description": "", + "default": "", + "properties": { + "assemblathon_stats_n_limit": { + "type": "integer", + "default": 100, + "help_text": "This number is used to split the scaffolds into contigs to compute contig-related stats such as the number of contigs, N50, etc. NCBI recommendation is 100.", + "description": "The number of 'N's for the unknown gap size" + } + } + }, + "ncbi_fcs_options": { + "title": "NCBI FCS options", + "type": "object", + "description": "", + "default": "", + "properties": { + "ncbi_fcs_adaptor_skip": { + "type": "boolean", + "description": "Skip NCBI FCS Adaptor checking", + "default": true + }, + "ncbi_fcs_adaptor_empire": { + "type": "string", + "enum": ["euk", "prok"], + "description": "Empire for NCBI FCS Adaptor checking", + "help_text": "'euk' for eukaryotes, or 'prok' for prokaryotes" + }, + "ncbi_fcs_gx_skip": { + "type": "boolean", + "description": "Skip NCBI FCS external organism contamination checking", + "default": true + }, + "ncbi_fcs_gx_tax_id": { + "type": "number", + "help_text": "Get correct tax ID from https://www.ncbi.nlm.nih.gov/taxonomy", + "description": "Tax ID for NCBI FCS GX" + }, + "ncbi_fcs_gx_db_path": { + "type": "string", + "format": "directory-path", + "help_text": "NCBI FCS GX DB path\n\nDue to enormity of the DB size, the pipeline does NOT download the data. It must be setup by the user manually before running the pipeline. See instructions for DB setup: https://github.com/ncbi/fcs/wiki/FCS-GX", + "description": "Path to NCBI FCS GX database" + } + } + }, + "busco_options": { + "title": "BUSCO options", + "type": "object", + "description": "", + "default": "", + "properties": { + "busco_skip": { + "type": "boolean", + "description": "Skip BUSCO", + "default": true + }, + "busco_mode": { + "type": "string", + "enum": ["geno", "tran", "prot", "genome", "transcriptome", "proteins"], + "help_text": "'geno' or 'genome' for genome assemblies (DNA), 'tran' or 'transcriptome' for transcriptome assemblies (DNA), 'prot' or 'proteins' for annotated gene sets (protein)", + "description": "BUSCO mode" + }, + "busco_lineage_datasets": { + "type": "string", + "help_text": "Each input assembly is assessed against each lineage. It should be provided as a space-separated list of lineages: 'fungi_odb10 microsporidia_odb10' ", + "pattern": "^(\\w+_odb10\\s)*\\w+_odb10$", + "description": "BUSCO lineages" + }, + "busco_download_path": { + "type": "string", + "help_text": "BUSCO DB download path\n\nThe pipeline automatically downloads the required DB if needed", + "description": "Download path for BUSCO", + "format": "directory-path" + } + } + }, + "tidk_options": { + "title": "TIDK options", + "type": "object", + "description": "", + "default": "", + "properties": { + "tidk_skip": { + "type": "boolean", + "description": "Skip telomere identification", + "default": true + }, + "tidk_repeat_seq": { + "type": "string", + "description": "Telomere repeat sequence", + "pattern": "^[ACGT]+$", + "help_text": "Plant: TTTAGGG, Fungus, Vertebrates: TTAGGG, Insect: TTAGG" + }, + "tidk_filter_by_size": { + "type": "boolean", + "description": "Filter size in base-pairs" + }, + "tidk_filter_size_bp": { + "type": "integer", + "default": 1000000, + "description": "Filter size in base-pairs" + } + } + }, + "lai_options": { + "title": "LAI options", + "type": "object", + "description": "", + "default": "", + "properties": { + "lai_skip": { + "type": "boolean", + "default": true, + "description": "Skip LAI estimation" + } + } + }, + "kraken2_options": { + "title": "Kraken2 options", + "type": "object", + "description": "", + "default": "", + "properties": { + "kraken2_skip": { + "type": "boolean", + "default": true, + "description": "Skip Kraken2" + }, + "kraken2_db_path": { + "type": "string", + "description": "Kraken2 database path", + "format": "path" + } + } + }, + "hic_options": { + "title": "HiC options", + "type": "object", + "description": "", + "default": "", + "properties": { + "hic": { + "type": "string", + "description": "HiC reads", + "help_text": "Path to reads provided as a SRA ID or as a path to paired reads with pattern '*{1,2}.(fastq|fq).gz'", + "pattern": "^SR\\w+$|^\\S+\\{1,2\\}\\.f(ast)?q\\.gz$" + }, + "hic_skip_fastp": { + "type": "boolean", + "description": "Skip HiC read trimming" + }, + "hic_skip_fastqc": { + "type": "boolean", + "description": "Skip HiC read QC" + }, + "hic_fastp_ext_args": { + "type": "string", + "default": "--qualified_quality_phred 20 --length_required 50", + "description": "Additional parameters for fastp trimming" + } + } + }, + "synteny_options": { + "title": "Synteny options", + "type": "object", + "description": "", + "default": "", + "properties": { + "synteny_skip": { + "type": "boolean", + "default": true, + "description": "Skip synteny analysis" + }, + "synteny_between_input_assemblies": { + "type": "boolean", + "description": "Create syntenic plots between each pair of input assemblies", + "default": true + }, + "synteny_many_to_many_align": { + "type": "boolean", + "description": "Include alignment blocks with many-to-many mappings (dnadiff .mcoords file)" + }, + "synteny_max_gap": { + "type": "integer", + "default": 1000000, + "description": "Alignments within this distance are bundled together" + }, + "synteny_min_bundle_size": { + "type": "integer", + "default": 1000, + "description": "After bundling, any bundle smaller than this size is filtered out" + }, + "synteny_plot_1_vs_all": { + "type": "boolean", + "default": true, + "description": "Create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly" + }, + "synteny_color_by_contig": { + "type": "boolean", + "default": true, + "description": "Synteny plot is colored by contig" + }, + "synteny_xref_assemblies": { + "type": "string", + "description": "Reference assemblies for synteny analysis", + "help_text": "FASTA and synteny label tsv files should be provided in a formatted CSV file ", + "format": "file-path", + "mimetype": "csv", + "schema": "assets/schema_xref_assemblies.json" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "512.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "7.day", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/general_stats_options" + }, + { + "$ref": "#/definitions/ncbi_fcs_options" + }, + { + "$ref": "#/definitions/busco_options" + }, + { + "$ref": "#/definitions/tidk_options" + }, + { + "$ref": "#/definitions/lai_options" + }, + { + "$ref": "#/definitions/kraken2_options" + }, + { + "$ref": "#/definitions/hic_options" + }, + { + "$ref": "#/definitions/synteny_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/pfr/params.json b/pfr/params.json new file mode 100644 index 00000000..ab1ea360 --- /dev/null +++ b/pfr/params.json @@ -0,0 +1,29 @@ +{ + "input": "/workspace/assemblyqc/testdata/default/assemblysheet.csv", + "ncbi_fcs_adaptor_skip": false, + "ncbi_fcs_adaptor_empire": "euk", + "ncbi_fcs_gx_skip": false, + "ncbi_fcs_gx_tax_id": 3750, + "ncbi_fcs_gx_db_path": "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24", + "busco_skip": false, + "busco_mode": "geno", + "busco_lineage_datasets": "embryophyta_odb10 eudicots_odb10", + "busco_download_path": "/workspace/ComparativeDataSources/BUSCO/assemblyqc", + "tidk_skip": false, + "tidk_repeat_seq": "TTTAGGG", + "tidk_filter_by_size": true, + "tidk_filter_size_bp": 1000000, + "lai_skip": false, + "kraken2_skip": false, + "kraken2_db_path": "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314", + "hic": null, + "synteny_skip": false, + "synteny_between_input_assemblies": true, + "synteny_many_to_many_align": false, + "synteny_max_gap": 1000000, + "synteny_min_bundle_size": 1000, + "synteny_plot_1_vs_all": true, + "synteny_color_by_contig": true, + "synteny_xref_assemblies": "/workspace/assemblyqc/testdata/default/xrefsheet.csv", + "outdir": "./results" +} diff --git a/pfr/profile.config b/pfr/profile.config new file mode 100644 index 00000000..3a3c18c3 --- /dev/null +++ b/pfr/profile.config @@ -0,0 +1,17 @@ +profiles { + pfr { + process { + executor = 'slurm' + } + + apptainer { + envWhitelist = "APPTAINER_BINDPATH,APPTAINER_BIND" + cacheDir = "/workspace/assemblyqc/singularity" + } + } +} + +params { + config_profile_name = 'Plant&Food profile' + config_profile_description = 'Plant&Food profile using SLURM in combination with Apptainer' +} diff --git a/pfr_assemblyqc b/pfr_assemblyqc index eb668f21..f92ae0d2 100644 --- a/pfr_assemblyqc +++ b/pfr_assemblyqc @@ -1,6 +1,6 @@ #!/bin/bash -e -#SBATCH --job-name ASM_QC +#SBATCH --job-name ASMQC #SBATCH --time=7-00:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 @@ -9,6 +9,22 @@ #SBATCH --error pfr_assemblyqc.stderr #SBATCH --mem=4G +full_test_flag=0 + +# Parse command line options +while getopts "t" opt; do + case ${opt} in + t ) + full_test_flag=1 + ;; + \? ) + echo "Invalid option: $OPTARG" 1>&2 + exit 1 + ;; + esac +done +shift $((OPTIND -1)) + ml unload perl ml apptainer/1.1 ml nextflow/23.04.4 @@ -16,4 +32,22 @@ ml nextflow/23.04.4 export TMPDIR="/workspace/$USER/tmp" export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp" -nextflow main.nf -profile pfr,apptainer -resume +if [ $full_test_flag -eq 1 ]; then + nextflow \ + main.nf \ + -c pfr/profile.config \ + -profile pfr,apptainer,test_full \ + --ncbi_fcs_gx_skip false \ + --ncbi_fcs_gx_tax_id 35717 \ + --ncbi_fcs_gx_db_path "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" \ + --kraken2_skip false \ + --kraken2_db_path "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" \ + -resume +else + nextflow \ + main.nf \ + -c pfr/profile.config \ + -profile pfr,apptainer \ + -params-file pfr/params.json \ + -resume +fi diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..7d08e1c8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,13 @@ +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.ruff] +line-length = 120 +target-version = "py38" +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] +cache-dir = "~/.cache/ruff" + +[tool.ruff.isort] +known-first-party = ["nf_core"] + +[tool.ruff.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/busco.nf b/subworkflows/local/busco.nf deleted file mode 100644 index 10519bc5..00000000 --- a/subworkflows/local/busco.nf +++ /dev/null @@ -1,84 +0,0 @@ -nextflow.enable.dsl=2 - -workflow BUSCO { - take: - tuple_of_hap_file_lineage - - main: - if (!params.busco.skip) { - RUN_BUSCO(tuple_of_hap_file_lineage) - | collect - | set {ch_busco_summaries} - - CREATE_PLOT(ch_busco_summaries) - .set { ch_busco_plot } - - ch_busco_summaries - .mix(ch_busco_plot) - .collect() - .set { ch_outputs } - } else { - ch_outputs = Channel.of([]) - } - - emit: - list_of_outputs = ch_outputs -} - -process RUN_BUSCO { - tag "${hap_name}:${lineage_dataset}" - label "process_high" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0': - 'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }" - - publishDir "${params.outdir}/busco", mode: 'copy' - - input: - tuple val(hap_name), path(fasta_file), val(lineage_dataset) - - output: - path "${hap_name}/short_summary.specific.${lineage_dataset}.${hap_name}_${lineage_split}.txt" - - script: - def lineages_path = params.busco.download_path ? "--download_path ${params.busco.download_path}" : '' - def lineage_to_split = "${lineage_dataset}"; - def parts = lineage_to_split.split("_"); - lineage_split = parts[0]; - - """ - busco \ - -m ${params.busco.mode} \ - -o ${hap_name} \ - -i $fasta_file \ - -l ${lineage_dataset} \ - --update-data \ - $lineages_path \ - -c ${task.cpus} - - mv "${hap_name}/short_summary.specific.${lineage_dataset}.${hap_name}.txt" "${hap_name}/short_summary.specific.${lineage_dataset}.${hap_name}_${lineage_split}.txt" - """ -} - -process CREATE_PLOT { - tag "all summaries" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0': - 'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }" - - publishDir params.outdir, mode: 'copy' - - input: - path "short_summary.*", stageAs: 'busco/*' - - output: - path 'busco/*.png' - - script: - """ - generate_plot.py -wd ./busco - """ -} diff --git a/subworkflows/local/create_hic_file.nf b/subworkflows/local/create_hic_file.nf deleted file mode 100644 index 2c5af32d..00000000 --- a/subworkflows/local/create_hic_file.nf +++ /dev/null @@ -1,36 +0,0 @@ -nextflow.enable.dsl=2 - -include { MAKE_AGP_FROM_FASTA } from '../../modules/local/make_agp_from_fasta.nf' -include { AGP2_ASSEMBLY } from '../../modules/local/agp2_assembly.nf' -include { ASSEMBLY2_BEDPE } from '../../modules/local/assembly2_bedpe.nf' -include { MATLOCK_BAM2_JUICER } from '../../modules/local/matlock_bam2_juicer.nf' -include { JUICER_SORT } from '../../modules/local/juicer_sort.nf' -include { RUN_ASSEMBLY_VISUALIZER } from '../../modules/local/run_assembly_visualizer.nf' - -workflow CREATE_HIC_FILE { - take: - create_hic_file_inputs // [sample_id.on.tag, assembly_fasta, alignment_bam] - - main: - create_hic_file_inputs - | map { - [it[0], it[1]] // [sample_id.on.tag, assembly_fasta] - } - | MAKE_AGP_FROM_FASTA - | AGP2_ASSEMBLY - | ASSEMBLY2_BEDPE - - create_hic_file_inputs - | map { - [it[0], it[2]] // [sample_id.on.tag, alignment_bam] - } - | MATLOCK_BAM2_JUICER - | JUICER_SORT - - AGP2_ASSEMBLY.out.agp_assembly_file - | join(JUICER_SORT.out.sorted_links_txt_file) // [sample_id.on.tag, agp_assembly_file, sorted_links_txt_file] - | RUN_ASSEMBLY_VISUALIZER - - emit: - hic_file = RUN_ASSEMBLY_VISUALIZER.out.hic_file // [sample_id_on_tag, hic_file] -} diff --git a/subworkflows/local/fasta_busco_plot.nf b/subworkflows/local/fasta_busco_plot.nf new file mode 100644 index 00000000..f6cc5dc7 --- /dev/null +++ b/subworkflows/local/fasta_busco_plot.nf @@ -0,0 +1,37 @@ +include { BUSCO } from '../../modules/local/busco' +include { BUSCO_PLOT } from '../../modules/local/busco_plot' + +workflow FASTA_BUSCO_PLOT { + take: + tuple_of_hap_file // Channel + lineage // val + mode // val + download_path // val; Use [] to use work directory. Useful on AWS + + main: + ch_versions = Channel.empty() + + // MODULE: BUSCO + BUSCO( + tuple_of_hap_file, + lineage, + mode, + download_path + ) + + ch_busco_summaries = BUSCO.out.summary + | collect + + ch_versions = ch_versions.mix(BUSCO.out.versions.first()) + + // MODULE: BUSCO_PLOT + BUSCO_PLOT ( ch_busco_summaries ) + + ch_busco_plot = BUSCO_PLOT.out.png + ch_versions = ch_versions.mix(BUSCO_PLOT.out.versions.first()) + + emit: + summary = BUSCO.out.summary + plot = ch_busco_plot + versions = ch_versions +} diff --git a/subworkflows/local/fasta_kraken2.nf b/subworkflows/local/fasta_kraken2.nf new file mode 100644 index 00000000..fa399a21 --- /dev/null +++ b/subworkflows/local/fasta_kraken2.nf @@ -0,0 +1,44 @@ +include { UNTAR } from '../../modules/nf-core/untar/main' +include { KRAKEN2 } from '../../modules/local/kraken2' +include { KRAKEN2_KRONA_PLOT } from '../../modules/local/kraken2_krona_plot' + +workflow FASTA_KRAKEN2 { + take: + tuple_of_hap_file + db_path // channel: path + + main: + ch_tar_db = db_path + | filter { db -> "$db".endsWith('.tar.gz') } + + ch_untar_db = db_path + | filter { db -> !( "$db".endsWith('.tar.gz') ) } + + // MODULE: UNTAR + UNTAR ( ch_tar_db.map { tar -> [ [ id: "kraken2_db" ], tar ] } ) + + ch_kraken2_inputs = UNTAR.out.untar + | map { meta, untar -> untar } + | mix( + ch_untar_db + ) + | combine(tuple_of_hap_file) + + // MODULE: KRAKEN2 + KRAKEN2( + ch_kraken2_inputs.map { db, tag, fasta -> [ tag, fasta ] }, + ch_kraken2_inputs.map { db, tag, fasta -> db } + ) + + // MODULE: KRAKEN2_KRONA_PLOT + KRAKEN2_KRONA_PLOT ( KRAKEN2.out.report ) + + ch_versions = Channel.empty() + | mix(KRAKEN2.out.versions.first()) + | mix(UNTAR.out.versions.first()) + | mix(KRAKEN2_KRONA_PLOT.out.versions.first()) + + emit: + plot = KRAKEN2_KRONA_PLOT.out.plot + versions = ch_versions +} diff --git a/subworkflows/local/fasta_synteny.nf b/subworkflows/local/fasta_synteny.nf new file mode 100644 index 00000000..219b3da7 --- /dev/null +++ b/subworkflows/local/fasta_synteny.nf @@ -0,0 +1,253 @@ +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { FILTERSORTFASTA } from '../../modules/local/filtersortfasta' +include { MUMMER } from '../../modules/local/mummer' +include { GETFASTALENGTH } from '../../modules/local/getfastalength' +include { DNADIFF } from '../../modules/local/dnadiff' +include { CIRCOS_BUNDLELINKS } from '../../modules/local/circos_bundlelinks' +include { COLOURBUNDLELINKS } from '../../modules/local/colourbundlelinks' +include { RELABELBUNDLELINKS } from '../../modules/local/relabelbundlelinks' +include { SPLITBUNDLEFILE } from '../../modules/local/splitbundlefile' +include { RELABELFASTALENGTH } from '../../modules/local/relabelfastalength' +include { GENERATEKARYOTYPE } from '../../modules/local/generatekaryotype' +include { CIRCOS } from '../../modules/local/circos' + +workflow FASTA_SYNTENY { + take: + ch_fasta // Channel: [ tag, fa ] + ch_labels // Channel: [ tag, txt ] + ch_xref_fasta_labels // Channel: [ tag2, fa, txt ] + between_input_assemblies // val(true|false) + many_to_many_align // val(true|false) + max_gap // val(Integer) + min_bundle_size // val(Integer) + plot_1_vs_all // val(true|false) + color_by_contig // val(true|false) + + main: + ch_versions = Channel.empty() + + ch_fasta_labels = ch_fasta + | join( + ch_labels + ) + + ch_input_combinations = ! between_input_assemblies + ? Channel.empty() + : ch_fasta_labels + | map { [it] } + | collect + | map { getUniqueWithinCombinations(it) } + | flatten + | buffer(size:6) + + ch_xref_fa_branch = ch_xref_fasta_labels + | map { tag, fa, txt -> + [ [ id: tag ], fa ] + } + | branch { meta, fa -> + gz: "$fa".endsWith(".gz") + rest: !"$fa".endsWith(".gz") + } + + // MODULE: GUNZIP_FASTA + GUNZIP_FASTA ( ch_xref_fa_branch.gz ) + + ch_xref_ungz_fa_labels = GUNZIP_FASTA.out.gunzip + | mix( + ch_xref_fa_branch.rest + ) + | map { meta, fa -> [ meta.id, fa ] } + | join( + ch_xref_fasta_labels + ) + | map { tag, fa, input_fa, seq_list -> + [ tag, fa, seq_list ] + } + + ch_all_combinations = ch_input_combinations + | mix( + ch_fasta_labels + | combine( + ch_xref_ungz_fa_labels + ) + ) + + ch_all_combination_labels = ch_all_combinations + | map { target_tag, target_fa, target_txt, xref_tag, xref_fa, xref_txt -> + [ "${target_tag}.on.${xref_tag}", target_txt, xref_txt ] + } + + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first()) + + // MODULE: FILTERSORTFASTA + FILTERSORTFASTA ( ch_all_combinations ) + + ch_versions = ch_versions.mix(FILTERSORTFASTA.out.versions.first()) + + // MODULE: MUMMER + MUMMER ( FILTERSORTFASTA.out.fasta ) + + ch_versions = ch_versions.mix(MUMMER.out.versions.first()) + + // MODULE: GETFASTALENGTH + GETFASTALENGTH ( FILTERSORTFASTA.out.fasta ) + + ch_versions = ch_versions.mix(GETFASTALENGTH.out.versions.first()) + + // MODULE: DNADIFF + ch_dnadiff_inputs = FILTERSORTFASTA.out.fasta + | map { target, reference, target_fasta, ref_fasta -> + [ "${target}.on.${reference}", target_fasta, ref_fasta ] + } + | join( + MUMMER.out.delta + ) + DNADIFF( + ch_dnadiff_inputs, + many_to_many_align + ) + + ch_versions = ch_versions.mix(DNADIFF.out.versions.first()) + + // MODULE: CIRCOS_BUNDLELINKS + CIRCOS_BUNDLELINKS( + DNADIFF.out.coords, + max_gap, + min_bundle_size + ) + + ch_versions = ch_versions.mix(CIRCOS_BUNDLELINKS.out.versions.first()) + + // MODULE: COLOURBUNDLELINKS + COLOURBUNDLELINKS( + CIRCOS_BUNDLELINKS.out.links, + color_by_contig + ) + + ch_coloured_links = COLOURBUNDLELINKS.out.coloured_links + ch_versions = ch_versions.mix(COLOURBUNDLELINKS.out.versions.first()) + + // MODULE: RELABELBUNDLELINKS + ch_relabellinks_inputs = ch_coloured_links + | join(ch_all_combination_labels) + + RELABELBUNDLELINKS ( ch_relabellinks_inputs ) + + ch_versions = ch_versions.mix(RELABELBUNDLELINKS.out.versions.first()) + + // MODULE: SPLITBUNDLEFILE + SPLITBUNDLEFILE( + RELABELBUNDLELINKS.out.relabeled_links, + plot_1_vs_all + ) + + ch_split_links = SPLITBUNDLEFILE.out.split_file + | map { flattenSplitBundles(it) } + | flatten + | buffer(size:3) + + ch_versions = ch_versions.mix(SPLITBUNDLEFILE.out.versions.first()) + + // MODULE: RELABELFASTALENGTH + ch_relabelfastalength_inputs = GETFASTALENGTH.out.length + | join(ch_all_combination_labels) + + RELABELFASTALENGTH ( ch_relabelfastalength_inputs ) + + ch_versions = ch_versions.mix(RELABELFASTALENGTH.out.versions.first()) + + // MODULE: GENERATEKARYOTYPE + ch_generate_karyotype_inputs = RELABELFASTALENGTH.out.relabeled_seq_lengths + | cross( + ch_split_links + ) + | map { seq_len_tuple, split_bundle_tuple -> + + def target_on_xref = seq_len_tuple[0] + def seq_tag = split_bundle_tuple[1] + def split_bundle_file = split_bundle_tuple[2] + def target_seq_len = seq_len_tuple[1] + def ref_seq_len = seq_len_tuple[2] + + [ target_on_xref, seq_tag, split_bundle_file, target_seq_len, ref_seq_len ] + } + GENERATEKARYOTYPE ( ch_generate_karyotype_inputs ) + + ch_versions = ch_versions.mix(GENERATEKARYOTYPE.out.versions.first()) + + // MODULE: CIRCOS + ch_circos_inputs = GENERATEKARYOTYPE.out.karyotype + | join( + ch_split_links + | map { target_on_xref, seq_tag, txt -> + [ "${target_on_xref}.${seq_tag}", txt ] + } + ) + CIRCOS ( ch_circos_inputs ) + + ch_versions = ch_versions.mix(CIRCOS.out.versions.first()) + + emit: + plot = CIRCOS.out.png_file + versions = ch_versions +} + +def getUniqueWithinCombinations(inputArray) { + if (inputArray.size() <= 1) { + return [] + } + + inputArray.sort { a, b -> a[0].compareTo(b[0]) } + + def outputList = [] + + for (int i = 0; i < inputArray.size() - 1; i++) { + for (int j = i + 1; j < inputArray.size(); j++) { + def combination = [ + inputArray[i][0], + inputArray[i][1], + inputArray[i][2], + inputArray[j][0], + inputArray[j][1], + inputArray[j][2] + ] + outputList.add(combination) + } + } + return outputList +} + +def appendTags(tag, valuesArray) { + if (valuesArray.size() <= 1) { + return [] + } + + def outputList = [] + + for (int i = 0; i < valuesArray.size(); i++) { + outputList.add([tag, valuesArray[i]]) + } + return outputList +} + +def flattenSplitBundles(inputArray) { + def target_on_ref = inputArray[0] + def files = inputArray[1] + + if(files in ArrayList) { + return files.collect { [target_on_ref, extractBundleTag(it), it] } + } else { + return [files].collect { [target_on_ref, extractBundleTag(it), it] } + } +} + +def extractBundleTag(filePath) { + def regex = /.*\.(\w+)\.split\.bundle\.txt/ + def matcher = filePath =~ regex + if (matcher.matches()) { + return matcher.group(1) + } else { + // This branch should not execut unless the upstream logic is flawed + error "Error: Failed to parse the sequence tag from file name: ${filePath.getName()}" + } +} diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf new file mode 100644 index 00000000..f8a3eaa9 --- /dev/null +++ b/subworkflows/local/fq2hic.nf @@ -0,0 +1,90 @@ +include { FASTQ_TRIM_FASTP_FASTQC } from '../nf-core/fastq_trim_fastp_fastqc/main' +include { FASTQ_BWA_MEM_SAMBLASTER } from '../pfr/fastq_bwa_mem_samblaster/main' +include { HICQC } from '../../modules/local/hicqc' +include { MAKEAGPFROMFASTA } from '../../modules/local/makeagpfromfasta' +include { AGP2ASSEMBLY } from '../../modules/local/agp2assembly' +include { ASSEMBLY2BEDPE } from '../../modules/local/assembly2bedpe' +include { MATLOCK_BAM2_JUICER } from '../../modules/local/matlock_bam2_juicer' +include { JUICER_SORT } from '../../modules/local/juicer_sort' +include { RUNASSEMBLYVISUALIZER } from '../../modules/local/runassemblyvisualizer' +include { HIC2HTML } from '../../modules/local/hic2html' + +workflow FQ2HIC { + take: + reads // [ val(meta), [ fq ] ] + ref // [ val(meta2), fa ] + hic_skip_fastp // val: true|false + hic_skip_fastqc // val: true|false + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC + FASTQ_TRIM_FASTP_FASTQC( + reads, + [], + true, // val_save_trimmed_fail + false, // val_save_merged + hic_skip_fastp, + hic_skip_fastqc + ) + + ch_trim_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads + ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions) + + // SUBWORKFLOW: FASTQ_BWA_MEM_SAMBLASTER + FASTQ_BWA_MEM_SAMBLASTER( + ch_trim_reads, + ref.map { meta2, fa -> [ meta2, fa, [] ] } + ) + + ch_bam = FASTQ_BWA_MEM_SAMBLASTER.out.bam + ch_versions = ch_versions.mix(FASTQ_BWA_MEM_SAMBLASTER.out.versions) + + // MODULE: HICQC + ch_bam_and_ref = ch_bam + | map { meta, bam -> [ meta.ref_id, meta, bam ] } + | join( + ref.map { meta2, fa -> [ meta2.id, fa ] } + ) + | map { ref_id, meta, bam, fa -> + [ [ id: "${meta.id}.on.${meta.ref_id}" ], bam, fa ] + } + + HICQC ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3, bam ] } ) + + ch_versions = ch_versions.mix(HICQC.out.versions) + + // MODULE: MAKEAGPFROMFASTA | AGP2ASSEMBLY | ASSEMBLY2BEDPE + MAKEAGPFROMFASTA ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, fa ] } ) + AGP2ASSEMBLY ( MAKEAGPFROMFASTA.out.agp ) + ASSEMBLY2BEDPE ( AGP2ASSEMBLY.out.assembly ) + + ch_versions = ch_versions.mix(MAKEAGPFROMFASTA.out.versions.first()) + | mix(AGP2ASSEMBLY.out.versions.first()) + | mix(ASSEMBLY2BEDPE.out.versions.first()) + + // MODULE: MATLOCK_BAM2_JUICER | JUICER_SORT + MATLOCK_BAM2_JUICER ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, bam ] } ) + + JUICER_SORT ( MATLOCK_BAM2_JUICER.out.links ) + + ch_versions = ch_versions.mix(MATLOCK_BAM2_JUICER.out.versions.first()) + | mix(JUICER_SORT.out.versions.first()) + + // MODULE: RUNASSEMBLYVISUALIZER + RUNASSEMBLYVISUALIZER ( AGP2ASSEMBLY.out.assembly.join(JUICER_SORT.out.links) ) + + ch_hic = RUNASSEMBLYVISUALIZER.out.hic + ch_versions = ch_versions.mix(RUNASSEMBLYVISUALIZER.out.versions.first()) + + // MODULE: HIC2HTML + HIC2HTML ( ch_hic ) + + ch_versions = ch_versions.mix(HIC2HTML.out.versions.first()) + + emit: + hic = ch_hic + html = HIC2HTML.out.html + versions = ch_versions +} diff --git a/subworkflows/local/hic_contact_map.nf b/subworkflows/local/hic_contact_map.nf deleted file mode 100644 index c2629ee5..00000000 --- a/subworkflows/local/hic_contact_map.nf +++ /dev/null @@ -1,63 +0,0 @@ -nextflow.enable.dsl=2 - -include { FASTQ_BWA_MEM_SAMBLASTER } from '../pfr/fastq_bwa_mem_samblaster/main' -include { CREATE_HIC_FILE } from './create_hic_file.nf' - -include { HIC_QC } from '../../modules/local/hic_qc.nf' - -workflow HIC_CONTACT_MAP { - take: - reads // [ val(id), [ fq ] ] - fasta // [ val(tag), fasta ] - - main: - if (!params.hic.skip) { - - FASTQ_BWA_MEM_SAMBLASTER( - reads.map { id, fq -> [ [ id: id ], fq ]}, - fasta.map { tag, fasta -> [ [ id: tag ], fasta, [] ] } - ) - .bam - | map { meta, bam -> [ meta.ref_id, meta, bam ] } - | join( - fasta - ) - | map { ref_id, meta, bam, fasta -> - [ "${meta.id}.on.${meta.ref_id}", fasta, bam ] - } - | set { ch_fasta_bam } - - HIC_QC ( ch_fasta_bam.map { id, fasta, bam -> [ id, bam ] } ) - - ch_fasta_bam - | CREATE_HIC_FILE - | HIC2_HTML - | collect - | set { ch_list_of_html_files } - } else { - ch_list_of_html_files = Channel.of([]) - } - - emit: - list_of_html_files = ch_list_of_html_files -} - -process HIC2_HTML { - tag "$sample_id_on_tag" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - publishDir "${params.outdir}/hic", mode: 'copy' - - input: - tuple val(sample_id_on_tag), path(hic_file) - - output: - path "*.html" - - script: - """ - file_name="$hic_file" - hic_2_html_fc62f04.py "$hic_file" > "\${file_name%.*}.html" - """ -} diff --git a/subworkflows/local/hic_preprocess.nf b/subworkflows/local/hic_preprocess.nf deleted file mode 100644 index 59607e8f..00000000 --- a/subworkflows/local/hic_preprocess.nf +++ /dev/null @@ -1,73 +0,0 @@ -nextflow.enable.dsl=2 - -workflow HIC_PREPROCESS { - take: - paired_reads - - main: - if (!params.hic.skip) { - FASTP(paired_reads) - | set { ch_cleaned_paired_reads } - - paired_reads - .join(ch_cleaned_paired_reads, remainder: true) - | FAST_QC - } else { - ch_cleaned_paired_reads = Channel.of([]) - } - - emit: - cleaned_paired_reads = ch_cleaned_paired_reads -} - -process FASTP { - tag "$sample_id" - label "process_medium" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h5f740d0_3': - 'quay.io/biocontainers/fastp:0.23.2--h5f740d0_3' }" - - input: - tuple val(sample_id), path(reads) - - output: - tuple val(sample_id), path('*.fastp.fastq.gz') - - script: - """ - fastp \ - -i ${reads[0]} \ - -o "\$(basename ${reads[0]} .fastq.gz).fastp.fastq.gz" \ - -I ${reads[1]} \ - -O "\$(basename ${reads[1]} .fastq.gz).fastp.fastq.gz" \ - --qualified_quality_phred 20 \ - --length_required 50 \ - --thread ${task.cpus} - """ -} - - -process FAST_QC { - tag "$sample_id" - label "process_medium" - - publishDir "${params.outdir}/hic/fastqc", mode:'copy' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1': - 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1' }" - - input: - tuple val(sample_id), path(raw_reads), path(clean_reads) - - output: - path '*.html' - path '*.zip' - - script: - """ - fastqc ${raw_reads} ${clean_reads} \ - -t ${task.cpus} \ - --nogroup - """ -} diff --git a/subworkflows/local/kraken2.nf b/subworkflows/local/kraken2.nf deleted file mode 100644 index dc79884f..00000000 --- a/subworkflows/local/kraken2.nf +++ /dev/null @@ -1,96 +0,0 @@ -nextflow.enable.dsl=2 - -include { UNTAR } from '../../modules/nf-core/untar/main.nf' - -workflow KRAKEN2 { - take: - tuple_of_hap_file - db_path // val - - main: - if (!params.kraken2.skip) { - - ch_tar_db = "$db_path".endsWith('.tar.gz') - ? Channel.of(file(db_path, checkIfExists:true)) - : Channel.empty() - - ch_untar_db = "$db_path".endsWith('.tar.gz') - ? Channel.empty() - : Channel.of(file(db_path, checkIfExists:true)) - - ch_tar_db - | map { tar -> [ [ id: "kraken2_db" ], tar ] } - | UNTAR - - UNTAR.out.untar - | map { meta, untar -> untar } - | mix( - ch_untar_db - ) - | combine(tuple_of_hap_file) - | set { ch_kraken2_inputs } - - RUN_KRAKEN2( - ch_kraken2_inputs.map { db, tag, fasta -> [ tag, fasta ] }, - ch_kraken2_inputs.map { db, tag, fasta -> db } - ) - | KRONA_PLOT - | collect - | set { ch_list_of_kraken2_outputs } - } else { - ch_list_of_kraken2_outputs = Channel.of([]) - } - - emit: - list_of_outputs = ch_list_of_kraken2_outputs -} - -process RUN_KRAKEN2 { - tag "${hap_name}" - label "process_single" - label "process_high_memory" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/kraken2:2.1.2--pl5321h9f5acd7_2': - 'quay.io/biocontainers/kraken2:2.1.2--pl5321h9f5acd7_2' }" - - publishDir "${params.outdir}/kraken2", mode: 'copy' - - input: - tuple val(hap_name), path(fasta_file) - path db_path - - output: - tuple val(hap_name), path("*.kraken2.cut"), path("*.kraken2.report") - - script: - """ - kraken2 \ - --output "${hap_name}.kraken2.cut" \ - --report "${hap_name}.kraken2.report" \ - --use-names \ - --db $db_path \ - --threads ${task.cpus} \ - $fasta_file > kraken2.log - """ -} - -process KRONA_PLOT { - tag "${hap_name}" - label "process_single" - - container "docker.io/nanozoo/krona:2.7.1--e7615f7" - publishDir "${params.outdir}/kraken2", mode: 'copy' - - input: - tuple val(hap_name), path(kraken2_cut), path(kraken2_report) - - output: - tuple path("*.kraken2.krona.cut"), path("*.kraken2.krona.html") - - script: - """ - perl -lane '@a=split /\\t/; if (\$a[2] =~ /taxid\\s+(\\d+)/) {print "\$a[1]\\t\$1\\t1\\t\$a[3]";}' $kraken2_cut > "${hap_name}.kraken2.krona.cut" - ktImportTaxonomy -i -o "${hap_name}.kraken2.krona.html" -m "4" "${hap_name}.kraken2.krona.cut" - """ -} diff --git a/subworkflows/local/ncbi_fcs_adaptor.nf b/subworkflows/local/ncbi_fcs_adaptor.nf deleted file mode 100644 index 48de75c9..00000000 --- a/subworkflows/local/ncbi_fcs_adaptor.nf +++ /dev/null @@ -1,107 +0,0 @@ -nextflow.enable.dsl=2 - -workflow NCBI_FCS_ADAPTOR { - take: - tuple_of_tag_file - - main: - if (!params.ncbi_fcs_adaptor.skip) { - SCREEN_SAMPLE(tuple_of_tag_file) - | set { ch_report } - - ch_report - .map { - it[1] // report file path - } - .collect() - .set { ch_all_reports } - - ch_report - | CHECK_CONTAMINATION - | map { - def itTokes = "$it".tokenize(':') - def status = itTokes[1] - def tag = itTokes[2] - - def isClean = status == "CLEAN" - - [tag, isClean] - } - | set { ch_tuple_tag_is_clean } // [tag, is_clean flag] - - ch_tuple_tag_is_clean - | map { - def tag = it[0] - def isClean = it[1] - - if (!isClean) { - log.warn(""" - Adaptor contamination detected in ${tag}. - See the report for further details. - """.stripIndent()) - } - } - } else { - tuple_of_tag_file - .map { - [it[0], true] // [tag, true] - } - .set { ch_tuple_tag_is_clean } - - ch_all_reports = Channel.of([]) - } - - emit: - is_clean_status = ch_tuple_tag_is_clean - reports = ch_all_reports -} - -process SCREEN_SAMPLE { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.4.0/fcs-adaptor.sif': - 'docker.io/ncbi/fcs-adaptor:0.4.0' }" - - publishDir "${params.outdir}/ncbi_fcs_adaptor", mode: 'copy' - - input: - tuple val(hap_name), path(fasta_file) - - output: - tuple val(hap_name), path("${hap_name}_fcs_adaptor_report.tsv") - - script: - """ - mkdir "${hap_name}_outputdir" - - /app/fcs/bin/av_screen_x \ - -o "${hap_name}_outputdir" \ - --${params.ncbi_fcs_adaptor.empire} \ - "${fasta_file}" - - mv "${hap_name}_outputdir/fcs_adaptor_report.txt" "./${hap_name}_fcs_adaptor_report.tsv" - """ -} - -process CHECK_CONTAMINATION { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(hap_name), path(report_tsv) - - output: - stdout - - script: - """ - num_lines=\$(cat $report_tsv | wc -l) - [[ \$num_lines -gt 1 ]] && echo -n "CHECK_ADAPTOR_CONTAMINATION:CONTAMINATED:$hap_name" || echo -n "CHECK_ADAPTOR_CONTAMINATION:CLEAN:$hap_name" - """ -} diff --git a/subworkflows/local/ncbi_fcs_gx.nf b/subworkflows/local/ncbi_fcs_gx.nf index 421cdb58..cc54292f 100644 --- a/subworkflows/local/ncbi_fcs_gx.nf +++ b/subworkflows/local/ncbi_fcs_gx.nf @@ -1,190 +1,62 @@ -nextflow.enable.dsl=2 +include { NCBI_FCS_GX_SETUP_SAMPLE } from '../../modules/local/ncbi_fcs_gx_setup_sample' +include { NCBI_FCS_GX_SCREEN_SAMPLES } from '../../modules/local/ncbi_fcs_gx_screen_samples' +include { NCBI_FCS_GX_KRONA_PLOT } from '../../modules/local/ncbi_fcs_gx_krona_plot' workflow NCBI_FCS_GX { take: - tuple_of_tag_file - db_path // val + tuple_of_tag_file + db_path // val: String + tax_id // val: Integer main: - if (!params.ncbi_fcs_gx.skip) { + ch_versions = Channel.empty() - tuple_of_tag_file - | SETUP_SAMPLE - | collect - | set {ch_all_samples} + // MODULE: NCBI_FCS_GX_SETUP_SAMPLE + NCBI_FCS_GX_SETUP_SAMPLE ( tuple_of_tag_file ) - SCREEN_SAMPLES(ch_all_samples, file(db_path, checkIfExists:true)) + ch_all_samples = NCBI_FCS_GX_SETUP_SAMPLE.out.fsata + | collect - // Clean/contaminated branching - SCREEN_SAMPLES - .out - .fcs_gx_reports - | flatten - | map { - def parts = it.getName().split("\\.") - def tag = parts[0] - [tag, it] - } - | CHECK_CONTAMINATION - | map { - def itTokes = "$it".tokenize(':') - def status = itTokes[1] - def tag = itTokes[2] + ch_versions = ch_versions.mix(NCBI_FCS_GX_SETUP_SAMPLE.out.versions.first()) - def isClean = status == "CLEAN" + // MODULE: NCBI_FCS_GX_SCREEN_SAMPLES + ch_db = ! db_path + ? Channel.empty() + : Channel.of( file(db_path, checkIfExists:true) ) - [tag, isClean] - } - | set { ch_tuple_tag_is_clean } // [tag, is_clean flag] + NCBI_FCS_GX_SCREEN_SAMPLES( + ch_all_samples, + ch_db, + tax_id + ) - ch_tuple_tag_is_clean - | map { - def tag = it[0] - def isClean = it[1] + ch_gx_report = NCBI_FCS_GX_SCREEN_SAMPLES.out.fcs_gx_reports + | flatten + | map { + def parts = it.getName().split("\\.") + def tag = parts[0] + [tag, it] + } - if (!isClean) { - log.warn(""" - Foreign organism contamination detected in ${tag}. - See the report for further details. - """.stripIndent()) - } - } + ch_gx_taxonomy = NCBI_FCS_GX_SCREEN_SAMPLES.out.fcs_gx_taxonomies + | flatten + | map { + def parts = it.getName().split("\\.") + def tag = parts[0] + [tag, it] + } - // Taxonomy Krona plot - SCREEN_SAMPLES - .out - .fcs_gx_taxonomies - | flatten - | map { - def parts = it.getName().split("\\.") - def tag = parts[0] - [tag, it] - } - | FCS_GX_KRONA_PLOT - | flatten - | mix( - SCREEN_SAMPLES.out.fcs_gx_reports.flatten() - ) - | collect - | set { ch_fcs_gx_reports } - } else { - tuple_of_tag_file - .map { - [it[0], true] // [tag, true] - } - .set { ch_tuple_tag_is_clean } + ch_versions = ch_versions.mix(NCBI_FCS_GX_SCREEN_SAMPLES.out.versions) - ch_fcs_gx_reports = Channel.of([]) - } + // MODULE: NCBI_FCS_GX_KRONA_PLOT + NCBI_FCS_GX_KRONA_PLOT ( ch_gx_taxonomy ) - emit: - is_clean_status = ch_tuple_tag_is_clean - fcs_gx_reports = ch_fcs_gx_reports -} - -process SETUP_SAMPLE { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(hap_name), path(fasta_file) - - output: - path 'fasta.file.for.*.fasta' - - script: - """ - ln -s $fasta_file "fasta.file.for.${hap_name}.fasta" - """ -} + ch_gx_taxonomy_plot = NCBI_FCS_GX_KRONA_PLOT.out.plot + ch_versions = ch_versions.mix(NCBI_FCS_GX_KRONA_PLOT.out.versions.first()) - -process SCREEN_SAMPLES { - tag "all samples" - label "process_high" - label "process_long" - label "process_very_high_memory" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.4.0/fcs-gx.sif': - 'docker.io/ncbi/fcs-gx:0.4.0' }" - - publishDir "${params.outdir}/ncbi_fcs_gx", mode: 'copy' - - input: - path samples - path db_path - - output: - path "*.fcs_gx_report.txt", emit: fcs_gx_reports - path "*.taxonomy.rpt", emit: fcs_gx_taxonomies - - script: - """ - for sample_fasta in $samples; - do - sample_tag=\$(echo "\$sample_fasta" | sed 's/fasta.file.for.//g' | sed 's/.fasta//g') - python3 /app/bin/run_gx --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${params.ncbi_fcs_gx.tax_id}" - - mv "\${sample_fasta%.fasta}.${params.ncbi_fcs_gx.tax_id}.fcs_gx_report.txt" "\${sample_tag}.fcs_gx_report.txt" - mv "\${sample_fasta%.fasta}.${params.ncbi_fcs_gx.tax_id}.taxonomy.rpt" "\${sample_tag}.taxonomy.rpt" - done - """ -} - -process CHECK_CONTAMINATION { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(hap_name), path(report_file) - - output: - stdout - - script: - """ - hap_name=\$(echo "$report_file" | sed 's/.fcs_gx_report.txt//g') - num_lines=\$(cat $report_file | wc -l) - [[ \$num_lines -gt 2 ]] && echo -n "CHECK_GX_CONTAMINATION:CONTAMINATED:\$hap_name" || echo -n "CHECK_GX_CONTAMINATION:CLEAN:\$hap_name" - """ -} - -process FCS_GX_KRONA_PLOT { - tag "${tag_name}" - label "process_single" - - container "docker.io/nanozoo/krona:2.7.1--e7615f7" - publishDir "${params.outdir}/ncbi_fcs_gx", mode: 'copy' - - input: - tuple val(tag_name), path(fcs_gx_taxonomy) - - output: - tuple path("${tag_name}.inter.tax.rpt.tsv"), path("${tag_name}.fcs.gx.krona.cut"), path("${tag_name}.fcs.gx.krona.html") - - script: - """ - cat $fcs_gx_taxonomy \ - | awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \ - > "${tag_name}.inter.tax.rpt.tsv" - - cat "${tag_name}.inter.tax.rpt.tsv" \ - | awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \ - > "${tag_name}.fcs.gx.krona.cut" - - cat "${tag_name}.inter.tax.rpt.tsv" \ - | awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \ - >> "${tag_name}.fcs.gx.krona.cut" - - ktImportTaxonomy -i -o "${tag_name}.fcs.gx.krona.html" -m "4" "${tag_name}.fcs.gx.krona.cut" - """ + emit: + gx_report = ch_gx_report + gx_taxonomy = ch_gx_taxonomy + gx_taxonomy_plot = ch_gx_taxonomy_plot + versions = ch_versions } diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf deleted file mode 100644 index ce4742fe..00000000 --- a/subworkflows/local/synteny.nf +++ /dev/null @@ -1,613 +0,0 @@ -nextflow.enable.dsl=2 - -include { GZIP_FASTA } from '../../modules/local/gzip_fasta' - -workflow SYNTENY { - take: - tuple_of_tag_fasta_seq_list - tuple_of_tag_xref_fasta_seq_list - - main: - if(!params.synteny.skip) { - - if(params.synteny.between_target_asm) { - tuple_of_tag_fasta_seq_list - | map { - [it] - } - | collect - | map { - getUniqueWithinCombinations(it) - } - | flatten - | buffer(size:6) - | set { ch_between_target_asm_combinations } - } else { - ch_between_target_asm_combinations = Channel.empty() - } - - tuple_of_tag_xref_fasta_seq_list - | map { - [it[0], it[1]] // [tag, xref fasta file path] - } - | GZIP_FASTA - | join( - tuple_of_tag_xref_fasta_seq_list - ) - | map { - [it[0], it[1], it[3]] // [tag, uncompressed xref fasta file path, seq list] - } - | set { ch_tuple_tag_xref_uncompressed_fasta_seq_list } - - ch_between_target_asm_combinations - .mix( - tuple_of_tag_fasta_seq_list - | combine( - ch_tuple_tag_xref_uncompressed_fasta_seq_list - ) - ) - .tap { ch_full_tap_from_all_combinations } - .map { - ["${it[0]}.on.${it[3]}", it[2], it[5]] // [target.on.reference, target_seq_list, ref_seq_list] - } - .set { ch_seq_lists } - - - ch_full_tap_from_all_combinations - | FILTER_SORT_FASTA_AND_VALIDATE_SEQ_LISTS - | (MUMMER & GET_FASTA_LEN) - - - FILTER_SORT_FASTA_AND_VALIDATE_SEQ_LISTS.out.tags_fasta_files - .map { target, reference, target_fasta, ref_fasta -> - [ "${target}.on.${reference}", target_fasta, ref_fasta ] - } - .join( - MUMMER.out.tag_delta_file - ) - | DNADIFF - | CIRCOS_BUNDLE_LINKS - | ADD_COLOUR_TO_BUNDLE_LINKS - | join(ch_seq_lists) - | RELABEL_BUNDLE_LINKS - | SPLIT_BUNDLE_FILE_BY_TARGET_SEQS - | map { - flattenSplitBundles(it) - } - | flatten - | buffer(size:3) - | set { ch_circos_split_bundle_links } - - GET_FASTA_LEN - .out - .tag_len_files - | join(ch_seq_lists) - | RELABEL_FASTA_LEN - | cross( - ch_circos_split_bundle_links - ) - | map { - [it[0][0], it[1][1], it[1][2], it[0][1], it[0][2]] // [target.on.reference, seq_tag, split_bundle_file, target_seq_len, ref_seq_len] - } - | GENERATE_KARYOTYPE - | join( - ch_circos_split_bundle_links - | map { - ["${it[0]}.${it[1]}", it[2]] // [target.on.reference.seq_tag, split_bundle_file] - } - ) - | CIRCOS - - CIRCOS - .out - .png_file - | collect - | set{ ch_list_of_circos_plots } - } - else { - ch_list_of_circos_plots = Channel.of([]) - } - - emit: - list_of_circos_plots = ch_list_of_circos_plots -} - -def getUniqueWithinCombinations(inputArray) { - if (inputArray.size() <= 1) { - return [] - } - - inputArray.sort { a, b -> a[0].compareTo(b[0]) } - - def outputList = [] - - for (int i = 0; i < inputArray.size() - 1; i++) { - for (int j = i + 1; j < inputArray.size(); j++) { - def combination = [ - inputArray[i][0], - file(inputArray[i][1], checkIfExists: true), - file(inputArray[i][2], checkIfExists: true), - inputArray[j][0], - file(inputArray[j][1], checkIfExists: true), - file(inputArray[j][2], checkIfExists: true) - ] - outputList.add(combination) - } - } - return outputList -} - -def appendTags(tag, valuesArray) { - if (valuesArray.size() <= 1) { - return [] - } - - def outputList = [] - - for (int i = 0; i < valuesArray.size(); i++) { - outputList.add([tag, valuesArray[i]]) - } - return outputList -} - -def flattenSplitBundles(inputArray) { - def target_on_ref = inputArray[0] - def files = inputArray[1] - - if(files in ArrayList) { - return files.collect { [target_on_ref, extractBundleTag(it), it] } - } else { - return [files].collect { [target_on_ref, extractBundleTag(it), it] } - } -} - -def extractBundleTag(filePath) { - def regex = /.*\.(\w+)\.split\.bundle\.txt/ - def matcher = filePath =~ regex - if (matcher.matches()) { - return matcher.group(1) - } else { - // This branch should never be executed if all the upstream logic is implemented correctly. - error "Error: Failed to parse the sequence tag from file name: ${filePath.getName()}" - } -} - -process FILTER_SORT_FASTA_AND_VALIDATE_SEQ_LISTS { - tag "${target}.on.${reference}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1': - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(target), path(target_fasta), path(target_seq_list), val(reference), path(ref_fasta), path(ref_seq_list) - - output: - tuple val(target), val(reference), path("filtered.ordered.target.fasta"), path("filtered.ordered.ref.fasta"), emit: tags_fasta_files - - script: - """ - validate_seq_lists_1d50376.sh "$target_seq_list" "$ref_seq_list" - samtools faidx $target_fasta \$(awk '{print \$1}' $target_seq_list) > filtered.ordered.target.fasta - samtools faidx $ref_fasta \$(awk '{print \$1}' $ref_seq_list) > filtered.ordered.ref.fasta - """ -} - -process GET_FASTA_LEN { - tag "${target}.on.${reference}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1': - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(target), val(reference), path(filtered_ordered_target_fasta), path(filtered_ordered_ref_fasta) - - output: - tuple val("${target}.on.${reference}"), path("target.seq.lengths"), path("ref.seq.lengths"), emit: tag_len_files - - script: - """ - samtools faidx $filtered_ordered_target_fasta - samtools faidx $filtered_ordered_ref_fasta - - cat "${filtered_ordered_target_fasta}.fai" | awk '{print \$1, \$2}' OFS="\\t" > target.seq.lengths - cat "${filtered_ordered_ref_fasta}.fai" | awk '{print \$1, \$2}' OFS="\\t" > ref.seq.lengths - """ -} - -process MUMMER { - tag "${target}.on.${reference}" - label "process_high" - - container "docker.io/staphb/mummer:4.0.0" - - input: - tuple val(target), val(reference), path(target_fasta), path(ref_fasta) - - output: - tuple val("${target}.on.${reference}"), path("*.delta"), emit: tag_delta_file - - script: - """ - nucmer \ - --mum \ - -t ${task.cpus} \ - -p "${target}.on.${reference}" \ - $ref_fasta \ - $target_fasta - """ -} - -process DNADIFF { - tag "${target_on_ref}" - label "process_single" - label "process_week_long" - - container "docker.io/staphb/mummer:4.0.0" - - input: - tuple val(target_on_ref), path(target_fasta), path(ref_fasta), path(dnadiff_file) - - output: - tuple val(target_on_ref), path("*.xcoords"), path("*.report") - - script: - def inter_extension = params.synteny.many_to_many_align == 1 ? 'mcoords' : '1coords' - def out_extension = params.synteny.many_to_many_align == 1 ? 'm.xcoords' : '1.xcoords' - """ - cat \\ - $dnadiff_file \\ - | sed '1s/.*/${ref_fasta} ${target_fasta}/' \\ - > ${target_on_ref}.sed.delta - - dnadiff \\ - -p $target_on_ref \\ - -d ${target_on_ref}.sed.delta - - cat \\ - "${target_on_ref}.${inter_extension}" \\ - > "${target_on_ref}.${out_extension}" - """ -} - -process CIRCOS_BUNDLE_LINKS { - tag "${target_on_ref}" - label "process_single" - - container "docker.io/gallvp/circos-tools:v0.23-1_ps" - - input: - tuple val(target_on_ref), path(coords_file), path(report_file) - - output: - tuple val(target_on_ref), path("*.xcoords.bundle.txt") - - script: - """ - cat $coords_file | awk '{print \$12,\$1,\$2,\$13,\$3,\$4}' OFS="\\t" > "\$(basename $coords_file).links.txt" - - /usr/share/circos/tools/bundlelinks/bin/bundlelinks \ - -links "\$(basename $coords_file).links.txt" \ - -max_gap "${params.synteny.max_gap}" \ - -min_bundle_size "${params.synteny.min_bundle_size}" \ - 1>"\$(basename $coords_file).bundle.txt" \ - 2>bundlelinks.err - """ -} - -process ADD_COLOUR_TO_BUNDLE_LINKS { - tag "${target_on_ref}" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - - input: - tuple val(target_on_ref), path(bundle_links) - - output: - tuple val(target_on_ref), path("*.xcoords.bundle.coloured.txt"), emit: coloured_bundle_links - - script: - """ - if [[ "${params.synteny.color_by_contig}" = "1" ]];then - color_circos_bundles_by_contig_943e0fb.py \ - "${bundle_links}" \ - > "\$(basename $bundle_links .bundle.txt).bundle.coloured.txt" - else - add_color_2_circos_bundle_file_943e0fb.pl \ - -i="${bundle_links}" \ - -o="\$(basename $bundle_links .bundle.txt).bundle.coloured.txt" - fi - """ -} - -process RELABEL_BUNDLE_LINKS { - tag "${target_on_ref}" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - - input: - tuple val(target_on_ref), path(coloured_bundle_links), path(target_seq_list), path(ref_seq_list) - - output: - tuple val(target_on_ref), path("*.xcoords.bundle.coloured.relabeled.txt"), emit: relabeled_coloured_bundle_links - - script: - """ - #!/usr/bin/env python - - import pandas as pd - import sys - import os - - output_file_name = ".".join("$coloured_bundle_links".split(".")[0:-1]) + ".relabeled.txt" - - subs_target_seq = pd.read_csv('$target_seq_list', sep='\\t', header=None) - subs_target_seq_dict = dict(zip(subs_target_seq.iloc[:, 0], subs_target_seq.iloc[:, 1])) - - subs_ref_seq = pd.read_csv('$ref_seq_list', sep='\\t', header=None) - subs_ref_seq_dict = dict(zip(subs_ref_seq.iloc[:, 0], subs_ref_seq.iloc[:, 1])) - - if os.path.getsize('$coloured_bundle_links') == 0: - with open(output_file_name, 'w') as f: - f.write('') - sys.exit(0) - else: - df = pd.read_csv('$coloured_bundle_links', sep=' ', header=None) - - df.iloc[:, 3] = df.iloc[:, 3].replace(subs_target_seq_dict, regex=False) - df.iloc[:, 0] = df.iloc[:, 0].replace(subs_ref_seq_dict, regex=False) - - df.to_csv(output_file_name, sep=' ', index=False, header=None) - """ -} - -process RELABEL_FASTA_LEN { - tag "${target_on_ref}" - label "process_single" - - container "docker.io/gallvp/python3npkgs:v0.4" - - input: - tuple val(target_on_ref), path(target_seq_lengths), path(ref_seq_lengths), path(target_seq_list), path(ref_seq_list) - - output: - tuple val(target_on_ref), path("relabeld.target.seq.lengths"), path("relabeld.ref.seq.lengths"), emit: relabeled_seq_lengths - - script: - """ - #!/usr/bin/env python - - import pandas as pd - - subs_target_seq = pd.read_csv('$target_seq_list', sep='\\t', header=None) - subs_target_seq_dict = dict(zip(subs_target_seq.iloc[:, 0], subs_target_seq.iloc[:, 1])) - - subs_ref_seq = pd.read_csv('$ref_seq_list', sep='\\t', header=None) - subs_ref_seq_dict = dict(zip(subs_ref_seq.iloc[:, 0], subs_ref_seq.iloc[:, 1])) - - df_target_seq_lengths = pd.read_csv('$target_seq_lengths', sep='\\t', header=None) - df_target_seq_lengths.iloc[:, 0] = df_target_seq_lengths.iloc[:, 0].replace(subs_target_seq_dict, regex=False) - df_target_seq_lengths.to_csv("relabeld.target.seq.lengths", sep='\\t', index=False, header=None) - - df_ref_seq_lengths = pd.read_csv('$ref_seq_lengths', sep='\\t', header=None) - df_ref_seq_lengths.iloc[:, 0] = df_ref_seq_lengths.iloc[:, 0].replace(subs_ref_seq_dict, regex=False) - df_ref_seq_lengths.to_csv("relabeld.ref.seq.lengths", sep='\\t', index=False, header=None) - """ -} - -process SPLIT_BUNDLE_FILE_BY_TARGET_SEQS { - tag "${target_on_ref}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(target_on_ref), path(coloured_bundle_links) - - output: - tuple val(target_on_ref), path("*.split.bundle.txt") - - script: - """ - if [[ "${params.synteny.plot_1_vs_all}" = "1" ]];then - target_seqs=(\$(awk '{print \$4}' $coloured_bundle_links | sort | uniq)) - - for i in "\${!target_seqs[@]}" - do - target_seq=\${target_seqs[\$i]} - awk -v seq="\$target_seq" '\$4==seq {print \$0}' $coloured_bundle_links > "${target_on_ref}.\${target_seq}.split.bundle.txt" - done - fi - - cat $coloured_bundle_links > "${target_on_ref}.all.split.bundle.txt" - """ -} - -process GENERATE_KARYOTYPE { - tag "${target_on_ref}.${seq_tag}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(target_on_ref), val(seq_tag), path(split_bundle_file), path(target_seq_len), path(ref_seq_len) - - output: - tuple val("${target_on_ref}.${seq_tag}"), path("*.karyotype") - - script: - """ - ref_seqs=(\$(awk '{print \$1}' $split_bundle_file | sort | uniq)) - - if [ \${#ref_seqs[@]} -eq 0 ]; then - touch "${target_on_ref}.${seq_tag}.karyotype" - exit 0 - fi - - tmp_file=\$(mktemp) - printf '%s\\n' "\${ref_seqs[@]}" > "\$tmp_file" - - if [[ $seq_tag = "all" ]];then - cat $target_seq_len > filtered.target.seq.len - else - grep -w "$seq_tag" $target_seq_len > filtered.target.seq.len - fi - cat filtered.target.seq.len | awk '{print \$1,\$2,"grey"}' OFS="\\t" > colored.filtered.target.seq.len - - grep -w -f "\$tmp_file" $ref_seq_len > filtered.ref.seq.len - cat filtered.ref.seq.len | awk '{print \$1,\$2,"black"}' OFS="\\t" > colored.filtered.ref.seq.len - - cat colored.filtered.ref.seq.len | sort -k1V > merged.seq.lengths - cat colored.filtered.target.seq.len | sort -k1Vr >> merged.seq.lengths - sed -i '/^\$/d' merged.seq.lengths - - cat merged.seq.lengths \ - | awk '{print "chr -",\$1,\$1,"0",\$2-1,\$3}' OFS="\\t" \ - > "${target_on_ref}.${seq_tag}.karyotype" - - rm "\$tmp_file" - """ -} - -process CIRCOS { - tag "${target_on_ref_seq}" - label "process_single" - - container "docker.io/gallvp/circos-tools:v0.23-1_ps" - publishDir "${params.outdir}/synteny/${target_on_ref_seq}", mode: 'copy' - - input: - tuple val(target_on_ref_seq), path(karyotype), path(bundle_file) - - output: - path "*.svg", emit: svg_file - path "*.png", emit: png_file - path "bundled.links.tsv", emit: bundled_links_tsv - path "circos.conf", emit: circos_conf - path "karyotype.tsv", emit: karyotype_tsv - - script: - """ - - links_count=\$(wc -l < "$bundle_file") - max_links=20000 - if [ "\$links_count" -gt "\$max_links" ]; then - echo "Link count exceeded \$max_links for ${bundle_file}." - echo "Try to shrink the number of links by increasing the max_gap and min_bundle_size options in the config file." - exit 1 - fi - - cat $karyotype > "karyotype.tsv" - cat $bundle_file | awk '{print \$1,\$2,\$3,\$4,\$5,\$6,\$7}' OFS="\\t" > bundled.links.tsv - - num_sequences=\$(cat $karyotype | wc -l) - if (( \$num_sequences <= 10 )); then - label_font_size=40 - elif (( \$num_sequences <= 30 )); then - label_font_size=30 - else - label_font_size=15 - fi - - if (( \$num_sequences <= 10 )); then - ticks_config=" - radius = dims(ideogram,radius_outer) - orientation = out - label_multiplier = 1e-6 - color = black - thickness = 5p - label_offset = 5p - - spacing = 0.5u - size = 10p - show_label = yes - label_size = 20p - format = %.1f - - - spacing = 1.0u - size = 15p - show_label = yes - label_size = 30p - format = %.1f - - " - - label_offset=" + 120p" - else - ticks_config="" - - label_offset=" + 25p" - fi - - cat <<- EOF > circos.conf - # circos.conf - karyotype = $karyotype - - - - default = 0.005r - - - radius = 0.8r - thickness = 25p - fill = yes - stroke_thickness = 0 - - show_label = yes - label_font = default - label_radius = dims(ideogram,radius_outer)\$label_offset - label_size = \$label_font_size - label_parallel = yes - - - - radius = 0.99r - crest = 1 - ribbon = yes - flat = yes - stroke_thickness = 0 - color = grey_a3 - - bezier_radius = 0r - bezier_radius_purity = 0.5 - - file = bundled.links.tsv - - - - show_ticks = yes - show_tick_labels = yes - chromosomes_units = 1000000 - chromosomes_display_default = yes - - \$ticks_config - - - <> - - <> - <> -EOF - - if [ ! -s $karyotype ]; then - touch "${target_on_ref_seq}.svg" - touch "${target_on_ref_seq}.png" - exit 0 - fi - - circos - - mv circos.svg "${target_on_ref_seq}.svg" - mv circos.png "${target_on_ref_seq}.png" - """ -} diff --git a/subworkflows/local/tidk.nf b/subworkflows/local/tidk.nf deleted file mode 100644 index dd5728ce..00000000 --- a/subworkflows/local/tidk.nf +++ /dev/null @@ -1,212 +0,0 @@ -nextflow.enable.dsl=2 - -workflow TIDK { - take: - tuple_of_hap_file - - main: - if (!params.tidk.skip) { - GET_APRIORI_SEQUENCE() - .set { ch_apriori_sequence } - - SORT_AND_FILTER_BY_SEQ_LENGTH(tuple_of_hap_file) - .set { ch_sorted_hap_file } - - EXPLORE_REPEAT_SEQ(tuple_of_hap_file) - .set { ch_explored_repeat_seq } - - ch_explored_repeat_seq - .join( - ch_sorted_hap_file - ) - | SEARCH_A_POSTERIORI_REPEAT_SEQ - | PLOT_A_POSTERIORI_REPEAT_SEQ - | collect - | set { ch_list_of_a_posteriori_tidk_plots } - - SEARCH_A_PRIORI_REPEAT_SEQ(ch_sorted_hap_file) - | PLOT_A_PRIORI_REPEAT_SEQ - | collect - | set { ch_list_of_a_priori_tidk_plots } - - - ch_list_of_a_posteriori_tidk_plots - .mix(ch_list_of_a_priori_tidk_plots) - .mix( - ch_explored_repeat_seq - .map { - it[1] // a_posteriori sequence - } - ) - .mix(ch_apriori_sequence) - .collect() - .set { ch_list_of_tidk_plots } - } - else { - ch_list_of_tidk_plots = Channel.of([]) - } - - emit: - list_of_plots = ch_list_of_tidk_plots -} - -process GET_APRIORI_SEQUENCE { - tag "setup" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - output: - path("a_priori.sequence") - - script: - """ - echo "${params.tidk.repeat_seq}" >> a_priori.sequence - """ -} - -process SORT_AND_FILTER_BY_SEQ_LENGTH { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0': - 'quay.io/biocontainers/seqkit:2.3.1--h9ee0642_0' }" - - input: - tuple val(hap_name), path(fasta_file) - - output: - tuple val(hap_name), path("${hap_name}.seqkit.sort.fasta") - - script: - """ - if [[ "${params.tidk.filter_by_size}" = "1" ]];then - seqkit seq -m ${params.tidk.filter_size_bp} $fasta_file > filtered.file.fasta - else - cat $fasta_file > filtered.file.fasta - fi - - cat filtered.file.fasta \ - | seqkit sort --quiet --reverse --by-length \ - > "${hap_name}.seqkit.sort.fasta" - """ -} - -process SEARCH_A_PRIORI_REPEAT_SEQ { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/tidk:0.2.31--h87f3376_0': - 'quay.io/biocontainers/tidk:0.2.31--h87f3376_0' }" - publishDir params.outdir, mode: 'copy' - - input: - tuple val(hap_name), path(fasta_file) - - output: - tuple val(hap_name), path("tidk/${hap_name}.a_priori.tidk.search*.tsv") - - script: - """ - tidk search --string "${params.tidk.repeat_seq}" --output "${hap_name}.a_priori.tidk.search" --dir tidk --extension "tsv" "${fasta_file}" - """ -} - -process EXPLORE_REPEAT_SEQ { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/tidk:0.2.31--h87f3376_0': - 'quay.io/biocontainers/tidk:0.2.31--h87f3376_0' }" - publishDir "${params.outdir}/tidk", mode: 'copy' - - input: - tuple val(hap_name), path(fasta_file) - - output: - tuple val(hap_name), path("${hap_name}.a_posteriori.sequence") - - script: - """ - tidk explore --minimum 5 --maximum 30 "${fasta_file}" > ${hap_name}.tidk.explore.txt - cat ${hap_name}.tidk.explore.txt | sed -n 2p | awk '{print \$1;}' > "${hap_name}.a_posteriori.sequence" - """ -} - -process SEARCH_A_POSTERIORI_REPEAT_SEQ { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/tidk:0.2.31--h87f3376_0': - 'quay.io/biocontainers/tidk:0.2.31--h87f3376_0' }" - publishDir params.outdir, mode: 'copy' - - input: - tuple val(hap_name), path(hap_explored_sequence), path(fasta_file) - - output: - tuple val(hap_name), path("tidk/${hap_name}.a_posteriori.tidk.search*.tsv") - - script: - """ - if [ -s ${hap_name}.a_posteriori.sequence ]; then - xyz=`cat ${hap_name}.a_posteriori.sequence` - tidk search --string "\${xyz}" --output "${hap_name}.a_posteriori.tidk.search" --dir tidk --extension "tsv" "${fasta_file}" - else - mkdir tidk - touch tidk/${hap_name}.a_posteriori.tidk.search.empty.tsv - fi - """ -} - -process PLOT_A_PRIORI_REPEAT_SEQ { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/tidk:0.2.31--h87f3376_0': - 'quay.io/biocontainers/tidk:0.2.31--h87f3376_0' }" - publishDir "${params.outdir}/tidk", mode: 'copy' - - input: - tuple val(hap_name), path(tsv_file) - - output: - path "${hap_name}_a_priori.tidk.plot*.svg" - - script: - """ - tidk plot --tsv "$tsv_file" --output "${hap_name}_a_priori.tidk.plot" - """ -} - -process PLOT_A_POSTERIORI_REPEAT_SEQ { - tag "${hap_name}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/tidk:0.2.31--h87f3376_0': - 'quay.io/biocontainers/tidk:0.2.31--h87f3376_0' }" - publishDir "${params.outdir}/tidk", mode: 'copy' - - input: - tuple val(hap_name), path(tsv_file) - - output: - path "${hap_name}_a_posteriori.tidk.plot*.svg" - - script: - """ - if [ -s ${tsv_file} ]; then - tidk plot --tsv "$tsv_file" --output "${hap_name}_a_posteriori.tidk.plot" - else - touch ${hap_name}_a_posteriori.tidk.plot.empty.svg - fi - """ -} diff --git a/subworkflows/local/validate_fasta.nf b/subworkflows/local/validate_fasta.nf deleted file mode 100644 index 796911b8..00000000 --- a/subworkflows/local/validate_fasta.nf +++ /dev/null @@ -1,52 +0,0 @@ -nextflow.enable.dsl=2 - -include { GZIP_FASTA } from '../../modules/local/gzip_fasta' - -workflow VALIDATE_FASTA { - take: - tuple_of_tag_file - - main: - tuple_of_tag_file - | GZIP_FASTA - | set { ch_tuple_tag_extracted_file } - - ch_tuple_tag_extracted_file - | RUN_VALIDATOR - | map { - def literals = it.split(":") - - [literals[1]] // [tag] - } - | join( - ch_tuple_tag_extracted_file - ) - | set { ch_tuple_tag_valid_fasta } - - emit: - tuple_tag_valid_fasta = ch_tuple_tag_valid_fasta -} - -process RUN_VALIDATOR { - tag "${tag_label}" - label "process_single" - - container "docker.io/gallvp/fasta_validator:a6a2ec1_ps" - - input: - tuple val(tag_label), path(fasta_file) - - output: - stdout - - script: - """ - fasta_validate -v $fasta_file >/dev/null - - # If invalid, the above command will fail and - # the NXF error startegy will kick in. - # Otherwise, pass the is_valid status to stdout - - echo -n "VALIDATE_FASTA:$tag_label:VALID" - """ -} diff --git a/subworkflows/local/validate_gff3.nf b/subworkflows/local/validate_gff3.nf deleted file mode 100644 index d29ca495..00000000 --- a/subworkflows/local/validate_gff3.nf +++ /dev/null @@ -1,143 +0,0 @@ -nextflow.enable.dsl=2 - -workflow VALIDATE_GFF3 { - take: - tuple_of_tag_gff3_file - tuple_of_tag_fasta_file - - main: - tuple_of_tag_gff3_file - | GZIP_GFF3 - | FORMAT_GFF3 - | set { ch_tuple_tag_extracted_file } - - ch_tuple_tag_extracted_file - | RUN_VALIDATOR - | map { - def literals = it.split(":") - - [literals[1]] // [tag] - } - | join( - ch_tuple_tag_extracted_file - ) - | set { ch_tuple_tag_after_validator } - - - tuple_of_tag_fasta_file - | cross(ch_tuple_tag_after_validator) - | map { - [it[0][0], it[1][1], it[0][1]] // [tag, gff3, fasta] - } - | CHECK_FASTA_GFF3_CORRESPONDENCE - | map { - def literals = it.split(":") - - [literals[1]] // [tag] - } - | join( - ch_tuple_tag_extracted_file - ) - | set { ch_tuple_tag_valid_gff3 } - - emit: - tuple_tag_valid_gff3 = ch_tuple_tag_valid_gff3 -} - -process GZIP_GFF3 { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" - - input: - tuple val(tag_label), path(gff3_file) - - output: - tuple val(tag_label), path("*.gzip.gff3") - - script: - """ - input_file_name_var="\$(basename $gff3_file .gz)" - output_file_name="\${input_file_name_var%.*}.gzip.gff3" - - gzip -cdf "$gff3_file" > "\$output_file_name" - """ -} - -process FORMAT_GFF3 { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.2--py310he7ef181_3': - 'quay.io/biocontainers/genometools-genometools:1.6.2--py310he7ef181_3' }" - - input: - tuple val(tag_label), path(gff3_file) - - output: - tuple val(tag_label), path("*.gt.gff3") - - script: - """ - output_file_name="\$(basename $gff3_file .gzip.gff3).gt.gff3" - - gt gff3 -tidy -retainids "$gff3_file" \ - > "\$output_file_name" - """ -} - -process RUN_VALIDATOR { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.2--py310he7ef181_3': - 'quay.io/biocontainers/genometools-genometools:1.6.2--py310he7ef181_3' }" - - input: - tuple val(tag_label), path(gff3_file) - - output: - stdout - - script: - """ - gt gff3validator "$gff3_file" >/dev/null - - # If invalid, the above command will fail and - # the NXF error startegy will kick in. - # Otherwise, pass the is_valid status to stdout - - echo -n "VALIDATE_GFF3:$tag_label:VALID" - """ -} - -process CHECK_FASTA_GFF3_CORRESPONDENCE { - tag "${tag_label}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1': - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(tag_label), path(gff3_file), path(fasta_file) - - output: - stdout - - script: - """ - check_gff3_fasta_corresp_3031aca.sh "$fasta_file" "$gff3_file" - - # If invalid, the above command will fail and - # the NXF error startegy will kick in. - # Otherwise, pass the is_valid status to stdout - - echo -n "CHECK_FASTA_GFF3_CORRESPONDENCE:$tag_label:VALID" - """ -} diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/main.nf b/subworkflows/nf-core/fasta_explore_search_plot_tidk/main.nf new file mode 100644 index 00000000..091f0c2c --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/main.nf @@ -0,0 +1,90 @@ +include { SEQKIT_SEQ as FILTER_BY_LENGTH } from '../../../modules/nf-core/seqkit/seq' +include { SEQKIT_SORT as SORT_BY_LENGTH } from '../../../modules/nf-core/seqkit/sort' +include { TIDK_EXPLORE } from '../../../modules/nf-core/tidk/explore' +include { TIDK_SEARCH as TIDK_SEARCH_APRIORI } from '../../../modules/nf-core/tidk/search' +include { TIDK_SEARCH as TIDK_SEARCH_APOSTERIORI } from '../../../modules/nf-core/tidk/search' +include { TIDK_PLOT as TIDK_PLOT_APRIORI } from '../../../modules/nf-core/tidk/plot' +include { TIDK_PLOT as TIDK_PLOT_APOSTERIORI } from '../../../modules/nf-core/tidk/plot' + + +workflow FASTA_EXPLORE_SEARCH_PLOT_TIDK { + + take: + ch_fasta // channel: [ val(meta), [ fasta ] ] + ch_apriori_sequence // channel: [ val(meta), val(sequence) ]; Optional: Set to [] if not needed + // val(meta) from ch_fasta and ch_apriori_sequence are only required to have + // the same `id` + + main: + ch_versions = Channel.empty() + + // MODULE: SEQKIT_SEQ as FILTER_BY_LENGTH + FILTER_BY_LENGTH ( ch_fasta ) + + ch_filtered_fasta = FILTER_BY_LENGTH.out.fastx + ch_versions = ch_versions.mix(FILTER_BY_LENGTH.out.versions.first()) + + // MODULE: SEQKIT_SORT as SORT_BY_LENGTH + SORT_BY_LENGTH ( ch_filtered_fasta ) + + ch_sorted_fasta = SORT_BY_LENGTH.out.fastx + ch_versions = ch_versions.mix(SORT_BY_LENGTH.out.versions.first()) + + // TIDK_EXPLORE + TIDK_EXPLORE ( ch_filtered_fasta ) + + ch_top_sequence = TIDK_EXPLORE.out.top_sequence + ch_versions = ch_versions.mix(TIDK_EXPLORE.out.versions.first()) + + // TIDK_SEARCH as TIDK_SEARCH_APRIORI + ch_apriori_inputs = ch_sorted_fasta + | map { meta, fasta -> [ meta.id, meta, fasta ] } + | join( + ( ch_apriori_sequence ?: Channel.empty() ) + | map { meta, seq -> [ meta.id, seq ] } + ) + | map { id, meta, fasta, seq -> [ meta, fasta, seq ] } + + TIDK_SEARCH_APRIORI ( + ch_apriori_inputs.map { meta, fasta, seq -> [ meta, fasta ] }, + ch_apriori_inputs.map { meta, fasta, seq -> seq } + ) + + ch_apriori_tsv = TIDK_SEARCH_APRIORI.out.tsv + ch_versions = ch_versions.mix(TIDK_SEARCH_APRIORI.out.versions.first()) + + // TIDK_SEARCH as TIDK_SEARCH_APOSTERIORI + ch_aposteriori_inputs = ch_sorted_fasta + | join(ch_top_sequence) + | map { meta, fasta, txt -> + [ meta, fasta, txt.getText().strip() ] + } + + TIDK_SEARCH_APOSTERIORI ( + ch_aposteriori_inputs.map { meta, fasta, seq -> [ meta, fasta ] }, + ch_aposteriori_inputs.map { meta, fasta, seq -> seq } + ) + + ch_aposteriori_tsv = TIDK_SEARCH_APOSTERIORI.out.tsv + ch_versions = ch_versions.mix(TIDK_SEARCH_APOSTERIORI.out.versions.first()) + + // TIDK_PLOT as TIDK_PLOT_APRIORI + TIDK_PLOT_APRIORI ( ch_apriori_tsv ) + + ch_apriori_svg = TIDK_PLOT_APRIORI.out.svg + ch_versions = ch_versions.mix(TIDK_PLOT_APRIORI.out.versions.first()) + + // TIDK_PLOT as TIDK_PLOT_APOSTERIORI + TIDK_PLOT_APOSTERIORI ( ch_aposteriori_tsv ) + + ch_aposteriori_svg = TIDK_PLOT_APOSTERIORI.out.svg + ch_versions = ch_versions.mix(TIDK_PLOT_APOSTERIORI.out.versions.first()) + + emit: + apriori_tsv = ch_apriori_tsv // channel: [ val(meta), tsv ] + apriori_svg = ch_apriori_svg // channel: [ val(meta), svg ] + aposteriori_sequence = ch_top_sequence // channel: [ val(meta), txt ] + aposteriori_tsv = ch_aposteriori_tsv // channel: [ val(meta), tsv ] + aposteriori_svg = ch_aposteriori_svg // channel: [ val(meta), svg ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/meta.yml b/subworkflows/nf-core/fasta_explore_search_plot_tidk/meta.yml new file mode 100644 index 00000000..80bd15fd --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/meta.yml @@ -0,0 +1,74 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_explore_search_plot_tidk" +description: | + Uses Telomere Identification toolKit (TIDK) to identify the frequency of telomeric repeats + along a sliding window for each sequence in the input fasta file. Results are presented in + TSV and SVG formats. The user can specify an a priori sequence for identification. + Possible a posteriori sequences are also explored and the most frequent sequence is + used for identification similar to the a priori sequence. seqkit/seq and seqkit/sort modules are + also included to filter out small sequences and sort sequences by length. +keywords: + - genomics + - telomere + - repeat + - search + - plot +components: + - seqkit/seq + - seqkit/sort + - tidk/explore + - tidk/plot + - tidk/search +input: + - ch_fasta: + type: file + description: | + Input assembly + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fsa/fa/fasta}" + - ch_apriori_sequence: + type: string + description: | + A priori sequence + Structure: [ val(meta), val(sequence) ] +output: + - apriori_tsv: + type: file + description: | + Frequency table for the identification of the a priori sequence + Structure: [ val(meta), path(tsv) ] + pattern: "*.tsv" + - apriori_svg: + type: file + description: | + Frequency graph for the identification of the a priori sequence + Structure: [ val(meta), path(svg) ] + pattern: "*.svg" + - aposteriori_sequence: + type: file + description: | + The most frequent a posteriori sequence + Structure: [ val(meta), path(txt) ] + pattern: "*.txt" + - aposteriori_tsv: + type: file + description: | + Frequency table for the identification of the a aposteriori sequence + Structure: [ val(meta), path(tsv) ] + pattern: "*.tsv" + - aposteriori_svg: + type: file + description: | + Frequency graph for the identification of the a aposteriori sequence + Structure: [ val(meta), path(svg) ] + pattern: "*.svg" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test new file mode 100644 index 00000000..6bc13bac --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test @@ -0,0 +1,119 @@ +nextflow_workflow { + + name "Test Workflow FASTA_EXPLORE_SEARCH_PLOT_TIDK" + script "../main.nf" + workflow "FASTA_EXPLORE_SEARCH_PLOT_TIDK" + config './nextflow.config' + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fasta_explore_search_plot_tidk" + tag "fasta_explore_search_plot_tidk" + tag "seqkit/seq" + tag "seqkit/sort" + tag "tidk/explore" + tag "tidk/search" + tag "tidk/plot" + + test("homo_sapiens-genome_fasta-genome_21_fasta") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ], + [ + [ id:'test2' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + 'TTAGGG' + ], + [ + [ id:'test2' ], + 'TTAGGG' + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("homo_sapiens-genome_fasta-genome_21_fasta-partial_apriori-stub") { + + options '-stub' + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ], + [ + [ id:'test2' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + 'TTAGGG' + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("homo_sapiens-genome_fasta-genome_21_fasta-no_apriori-stub") { + + options '-stub' + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ], + [ + [ id:'test2' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + ) + input[1] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap new file mode 100644 index 00000000..6e4e99c6 --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap @@ -0,0 +1,389 @@ +{ + "homo_sapiens-genome_fasta-genome_21_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,4a42ed016c022a8238739e4acddf649c" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,5b44a89396f412eb571ea240ef9deedd" + ] + ], + "1": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,e76985fdc220867a05967984c2c4675d" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d1bca1077cd52ad8c5126a4c9614d8f7" + ] + ], + "2": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,bd63900958df06516e45b887072d788f" + ] + ], + "3": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,43f358de532d30e02e65339db07e4e00" + ] + ], + "4": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,f0a6d4bb1464125bcbb6b54a471c1e3c" + ] + ], + "5": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7", + "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ], + "aposteriori_sequence": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,bd63900958df06516e45b887072d788f" + ] + ], + "aposteriori_svg": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,f0a6d4bb1464125bcbb6b54a471c1e3c" + ] + ], + "aposteriori_tsv": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,43f358de532d30e02e65339db07e4e00" + ] + ], + "apriori_svg": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,e76985fdc220867a05967984c2c4675d" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d1bca1077cd52ad8c5126a4c9614d8f7" + ] + ], + "apriori_tsv": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,4a42ed016c022a8238739e4acddf649c" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,5b44a89396f412eb571ea240ef9deedd" + ] + ], + "versions": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7", + "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ] + } + ], + "timestamp": "2023-12-12T15:33:35.106116" + }, + "homo_sapiens-genome_fasta-genome_21_fasta-no_apriori-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ], + "aposteriori_sequence": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "aposteriori_svg": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "aposteriori_tsv": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "apriori_svg": [ + + ], + "apriori_tsv": [ + + ], + "versions": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ] + } + ], + "timestamp": "2023-12-21T11:15:42.277945" + }, + "homo_sapiens-genome_fasta-genome_21_fasta-partial_apriori-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7", + "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ], + "aposteriori_sequence": [ + [ + { + "id": "test2" + }, + "test2.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.top.sequence.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "aposteriori_svg": [ + [ + { + "id": "test2" + }, + "test2.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "aposteriori_tsv": [ + [ + { + "id": "test2" + }, + "test2.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "apriori_svg": [ + [ + { + "id": "test" + }, + "test.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "apriori_tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,02d48eb43c3882d9832c88b9e080b420", + "versions.yml:md5,506585e66b23f17620bf582ef60af56d", + "versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656", + "versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f", + "versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7", + "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1", + "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04" + ] + } + ], + "timestamp": "2023-12-21T11:15:25.633714" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/nextflow.config b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/nextflow.config new file mode 100644 index 00000000..22d4fde0 --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/nextflow.config @@ -0,0 +1,19 @@ +process { + + withName: FILTER_BY_LENGTH { + ext.prefix = { "${meta.id}.filtered" } + } + + withName: SORT_BY_LENGTH { + ext.args = '--quiet --reverse --by-length' + ext.prefix = { "${meta.id}.sorted" } + } + + withName: TIDK_EXPLORE { + ext.args = '--minimum 5 --maximum 30' + } + + withName: 'TIDK_SEARCH_*' { + ext.args = '--extension tsv' + } +} diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/tags.yml b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/tags.yml new file mode 100644 index 00000000..d2d150d7 --- /dev/null +++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fasta_explore_search_plot_tidk: + - subworkflows/nf-core/fasta_explore_search_plot_tidk/** diff --git a/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf new file mode 100644 index 00000000..39a086ad --- /dev/null +++ b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf @@ -0,0 +1,106 @@ +// +// Read QC and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +workflow FASTQ_TRIM_FASTP_FASTQC { + take: + ch_reads // channel: [ val(meta), path(reads) ] + ch_adapter_fasta // channel: [ path(fasta) ] + val_save_trimmed_fail // value: boolean + val_save_merged // value: boolean + val_skip_fastp // value: boolean + val_skip_fastqc // value: boolean + + main: + + ch_versions = Channel.empty() + + ch_fastqc_raw_html = Channel.empty() + ch_fastqc_raw_zip = Channel.empty() + if (!val_skip_fastqc) { + FASTQC_RAW ( + ch_reads + ) + ch_fastqc_raw_html = FASTQC_RAW.out.html + ch_fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + ch_trim_reads = ch_reads + ch_trim_json = Channel.empty() + ch_trim_html = Channel.empty() + ch_trim_log = Channel.empty() + ch_trim_reads_fail = Channel.empty() + ch_trim_reads_merged = Channel.empty() + ch_fastqc_trim_html = Channel.empty() + ch_fastqc_trim_zip = Channel.empty() + if (!val_skip_fastp) { + FASTP ( + ch_reads, + ch_adapter_fasta, + val_save_trimmed_fail, + val_save_merged + ) + ch_trim_reads = FASTP.out.reads + ch_trim_json = FASTP.out.json + ch_trim_html = FASTP.out.html + ch_trim_log = FASTP.out.log + ch_trim_reads_fail = FASTP.out.reads_fail + ch_trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter empty FastQ files after adapter trimming so FastQC doesn't fail + // + ch_trim_reads + .join(ch_trim_json) + .map { meta, reads, json -> + if (json.text.readLines().size < 1) { + return [ meta, reads ] + } + + if (getFastpReadsAfterFiltering(json) > 0) { + [ meta, reads ] + } + } + .set { ch_trim_reads } + + if (!val_skip_fastqc) { + FASTQC_TRIM ( + ch_trim_reads + ) + ch_fastqc_trim_html = FASTQC_TRIM.out.html + ch_fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = ch_trim_reads // channel: [ val(meta), path(reads) ] + trim_json = ch_trim_json // channel: [ val(meta), path(json) ] + trim_html = ch_trim_html // channel: [ val(meta), path(html) ] + trim_log = ch_trim_log // channel: [ val(meta), path(log) ] + trim_reads_fail = ch_trim_reads_fail // channel: [ val(meta), path(fastq.gz) ] + trim_reads_merged = ch_trim_reads_merged // channel: [ val(meta), path(fastq.gz) ] + + fastqc_raw_html = ch_fastqc_raw_html // channel: [ val(meta), path(html) ] + fastqc_raw_zip = ch_fastqc_raw_zip // channel: [ val(meta), path(zip) ] + fastqc_trim_html = ch_fastqc_trim_html // channel: [ val(meta), path(html) ] + fastqc_trim_zip = ch_fastqc_trim_zip // channel: [ val(meta), path(zip) ] + + versions = ch_versions.ifEmpty(null) // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml b/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml new file mode 100644 index 00000000..9f4e12e0 --- /dev/null +++ b/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml @@ -0,0 +1,108 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_trim_fastp_fastqc" +description: Read QC, fastp trimming and read qc +keywords: + - qc + - quality_control + - adapters + - trimming + - fastq +components: + - fastqc + - fastp +input: + - ch_reads: + type: file + description: | + Structure: [ val(meta), path (reads) ] + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ], List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - ch_adapter_fasta: + type: file + description: | + Structure: path(adapter_fasta) + File in FASTA format containing possible adapters to remove. + - val_save_trimmed_fail: + type: boolean + description: | + Structure: val(save_trimmed_fail) + Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - val_save_merged: + type: boolean + description: | + Structure: val(save_merged) + Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + - val_skip_fastqc: + type: boolean + description: | + Structure: val(skip_fastqc) + skip the fastqc process if true + - val_skip_fastp: + type: boolean + description: | + Structure: val(skip_fastp) + skip the fastp process if true +output: + - meta: + type: value + description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Structure: [ val(meta), path(reads) ] + The trimmed/modified/unmerged fastq reads + - trim_json: + type: file + description: | + Structure: [ val(meta), path(trim_json) ] + Results in JSON format + - trim_html: + type: file + description: | + Structure: [ val(meta), path(trim_html) ] + Results in HTML format + - trim_log: + type: file + description: | + Structure: [ val(meta), path(trim_log) ] + fastq log file + - trim_reads_fail: + type: file + description: | + Structure: [ val(meta), path(trim_reads_fail) ] + Reads the failed the preprocessing + - trim_reads_merged: + type: file + description: | + Structure: [ val(meta), path(trim_reads_merged) ] + Reads that were successfully merged + - fastqc_raw_html: + type: file + description: | + Structure: [ val(meta), path(fastqc_raw_html) ] + Raw fastQC report + - fastqc_raw_zip: + type: file + description: | + Structure: [ val(meta), path(fastqc_raw_zip) ] + Raw fastQC report archive + - fastqc_trim_html: + type: file + description: | + Structure: [ val(meta), path(fastqc_trim_html) ] + Trimmed fastQC report + - fastqc_trim_zip: + type: file + description: | + Structure: [ val(meta), path(fastqc_trim_zip) ] + Trimmed fastQC report archive + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf index 733b53ca..7bdc2c28 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf +++ b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf @@ -1,55 +1,74 @@ -include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids/main.nf' -include { EDTA_LTRHARVEST } from '../../../modules/pfr/edta/ltrharvest/main.nf' -include { LTRFINDER } from '../../../modules/pfr/ltrfinder/main.nf' -include { LTRRETRIEVER } from '../../../modules/pfr/ltrretriever/main.nf' -include { CAT_CAT } from '../../../modules/pfr/cat/cat/main.nf' -include { LAI } from '../../../modules/pfr/lai/main.nf' -include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids/main.nf' +include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids/main' +include { LTRHARVEST } from '../../../modules/pfr/ltrharvest/main' +include { LTRFINDER } from '../../../modules/pfr/ltrfinder/main' +include { LTRRETRIEVER_LTRRETRIEVER } from '../../../modules/pfr/ltrretriever/ltrretriever/main' +include { CAT_CAT } from '../../../modules/pfr/cat/cat/main' +include { LTRRETRIEVER_LAI } from '../../../modules/pfr/ltrretriever/lai/main' +include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids/main' workflow FASTA_LTRRETRIEVER_LAI { take: - ch_fasta // channel: [ val(meta), fasta ] - ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed - skip_lai // val; true|false + ch_fasta // channel: [ val(meta), fasta ] + ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed + // val(meta) from ch_fasta and ch_monoploid_seqs are only required + // to have the same `id` + skip_lai // val(true|false) main: - - ch_versions = Channel.empty() + ch_versions = Channel.empty() // MOUDLE: CUSTOM_SHORTENFASTAIDS CUSTOM_SHORTENFASTAIDS ( ch_fasta ) - ch_short_ids_fasta = ch_fasta - | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) - | map { meta, fasta, short_ids_fasta -> - [ meta, short_ids_fasta ?: fasta ] - } - - ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv - ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) - - // MODULE: EDTA_LTRHARVEST - EDTA_LTRHARVEST ( ch_short_ids_fasta ) - - ch_ltrharvest_scn = EDTA_LTRHARVEST.out.scn - ch_versions = ch_versions.mix(EDTA_LTRHARVEST.out.versions.first()) + ch_short_ids_fasta = ch_fasta + | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) + | map { meta, fasta, short_ids_fasta -> + if ( fasta ) { [ meta, short_ids_fasta ?: fasta ] } + } + + ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv + ch_short_monoploid_seqs = ch_short_ids_tsv + | join( + ch_monoploid_seqs ?: Channel.empty() + ) + | map { meta, short_ids_tsv, monoploid_seqs -> + map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) + } + | collectFile(newLine:true) + | map { seqs -> + def id = seqs.name.split('.mapped.monoploid.seqs.txt')[0] + + [ [ id: id ], seqs ] + } + ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) + + // MODULE: LTRHARVEST + LTRHARVEST ( ch_short_ids_fasta ) + + ch_ltrharvest_scn = LTRHARVEST.out.scn + ch_versions = ch_versions.mix(LTRHARVEST.out.versions.first()) // MODULE: LTRFINDER LTRFINDER ( ch_short_ids_fasta ) - ch_ltrfinder_scn = LTRFINDER.out.scn - ch_versions = ch_versions.mix(LTRFINDER.out.versions.first()) + ch_ltrfinder_scn = LTRFINDER.out.scn + ch_versions = ch_versions.mix(LTRFINDER.out.versions.first()) // MODULE: CAT_CAT - CAT_CAT ( ch_ltrharvest_scn.mix(ch_ltrfinder_scn).groupTuple() ) + ch_cat_cat_inputs = ch_ltrharvest_scn + | join(ch_ltrfinder_scn) + | map { meta, harvested, found -> [ meta, [ harvested, found ] ] } - ch_ltr_candidates = CAT_CAT.out.file_out - ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + CAT_CAT ( ch_cat_cat_inputs ) - // MODULE: LTRRETRIEVER - ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates) - LTRRETRIEVER ( + ch_ltr_candidates = CAT_CAT.out.file_out + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + + // MODULE: LTRRETRIEVER_LTRRETRIEVER + ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates) + + LTRRETRIEVER_LTRRETRIEVER ( ch_ltrretriever_inputs.map { meta, fasta, ltr -> [ meta, fasta ] }, ch_ltrretriever_inputs.map { meta, fasta, ltr -> ltr }, [], @@ -57,54 +76,87 @@ workflow FASTA_LTRRETRIEVER_LAI { [] ) - ch_pass_list = LTRRETRIEVER.out.pass_list - ch_ltrlib = LTRRETRIEVER.out.ltrlib - ch_annotation_out = LTRRETRIEVER.out.annotation_out - ch_annotation_gff = LTRRETRIEVER.out.annotation_gff - ch_versions = ch_versions.mix(LTRRETRIEVER.out.versions.first()) + ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list + ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib + ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out + ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff + ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first()) // MODULE: LAI - ch_lai_inputs = skip_lai - ? Channel.empty() - : ch_short_ids_fasta - | join(ch_pass_list) - | join(ch_annotation_out) - | join( - ch_monoploid_seqs ?: Channel.empty(), - by:0, - remainder: true - ) - | map { meta, fasta, pass, out, mono -> - [ meta, fasta, pass, out, mono ?: [] ] - } - LAI ( + ch_lai_inputs = skip_lai + ? Channel.empty() + : ch_short_ids_fasta + | join(ch_pass_list) + | join(ch_annotation_out) + | map { meta, fasta, pass, out -> + [ meta.id, meta, fasta, pass, out ] + } + | join( + ch_short_monoploid_seqs + | map { meta, mono -> [ meta.id, mono ] }, + by:0, + remainder: true + ) + | map { id, meta, fasta, pass, out, mono -> + [ meta, fasta, pass, out, mono ?: [] ] + } + LTRRETRIEVER_LAI( ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] }, ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass }, ch_lai_inputs.map { meta, fasta, pass, out, mono -> out }, ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono } ) - ch_lai_log = LAI.out.log - ch_lai_out = LAI.out.lai_out - ch_versions = ch_versions.mix(LAI.out.versions.first()) + ch_lai_log = LTRRETRIEVER_LAI.out.log + ch_lai_out = LTRRETRIEVER_LAI.out.lai_out + ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) // MODULE: CUSTOM_RESTOREGFFIDS - ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv) + ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv) CUSTOM_RESTOREGFFIDS ( ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] }, ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv } ) - ch_restored_gff = ch_annotation_gff - | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) - | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } - ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) + ch_restored_gff = ch_annotation_gff + | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) + | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } + + ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) emit: - ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] - annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] - lai_log = ch_lai_log // channel: [ val(meta), log ] - lai_out = ch_lai_out // channel: [ val(meta), out ] - versions = ch_versions // channel: [ versions.yml ] + ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] + annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] + lai_log = ch_lai_log // channel: [ val(meta), log ] + lai_out = ch_lai_out // channel: [ val(meta), out ] + versions = ch_versions // channel: [ versions.yml ] +} + + +def map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) { + + def short_ids_head = short_ids_tsv.text.split('\n')[0] + + if (short_ids_head == "IDs have acceptable length and character. No change required.") { + return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.split('\n') + } + + def orig_to_new_ids = [:] + short_ids_tsv.text.eachLine { line -> + def (original_id, renamed_id) = line.split('\t') + orig_to_new_ids[original_id] = renamed_id + } + + def mapped_ids = [] + monoploid_seqs.text.eachLine { original_id -> + if (!orig_to_new_ids[original_id]) { + error "Faild to find $original_id in ${monoploid_seqs}" + + "The monoploid_seqs file is malformed!" + } + + mapped_ids.add(orig_to_new_ids[original_id]) + } + + return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + mapped_ids } diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/meta.yml b/subworkflows/pfr/fasta_ltrretriever_lai/meta.yml index 20be97a5..5ba17303 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/meta.yml +++ b/subworkflows/pfr/fasta_ltrretriever_lai/meta.yml @@ -14,11 +14,11 @@ keywords: - qc components: - custom/shortenfastaids - - edta/ltrharvest + - ltrharvest - ltrfinder - - ltrretriever + - ltrretriever/ltrretriever - cat/cat - - lai + - ltrretriever/lai - custom/restoregffids input: - ch_fasta: diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test index 03e0af52..7ba88ab6 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test +++ b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test @@ -11,14 +11,14 @@ nextflow_workflow { tag "fasta_ltrretriever_lai" tag "modules/nf-core/gunzip" tag "custom/shortenfastaids" - tag "edta/ltrharvest" + tag "ltrharvest" tag "ltrfinder" - tag "ltrretriever" + tag "ltrretriever/ltrretriever" tag "cat/cat" - tag "lai" + tag "ltrretriever/lai" tag "custom/restoregffids" - test("homo_sapiens_genome_21_fasta") { + test("actinidia_chinensis-genome_21_fasta_gz") { setup { run("GUNZIP") { @@ -28,7 +28,7 @@ nextflow_workflow { """ input[0] = [ [ id:'test' ], - file('/Users/hrauxr/Projects/nf-modules/tests/data/zenodo/actinidia_chinensis/chr1_7M.fasta.gz', checkIfExists: true) + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) ] """ } @@ -38,9 +38,9 @@ nextflow_workflow { when { workflow { """ - input[0] = GUNZIP.out.gunzip - input[1] = [] - input[2] = false + input[0] = GUNZIP.out.gunzip + input[1] = [] + input[2] = false """ } } @@ -48,12 +48,137 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert file(workflow.out.annotation_gff[0][1]).text.contains('Gypsy_LTR_retrotransposon') }, + { assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') }, { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') }, { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') }, { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }, - { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Gypsy') } + { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') } ) } } -} \ No newline at end of file + + test("actinidia_chinensis-genome_21_fasta_gz-with_mono") { + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + GUNZIP.out.gunzip.map { meta, fasta -> + def fa = new File('test.fa') + fa.write(fasta.text.replaceAll('>chr1', '>chr_xxxxxxxxxxxxxxx_1')) + + [ meta, fa.toPath() ] + } + | set { ch_fa } + + def monoploid_seqs = new File('test.mono.seq.txt') + monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1") + + input[0] = ch_fa + input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] ) + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') }, + { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') }, + { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') }, + { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }, + { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') } + ) + } + } + + test("actinidia_chinensis-genome_21_fasta_gz-without_lai") { + + options '-stub' + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + GUNZIP.out.gunzip.map { meta, fasta -> + def fa = new File('test.fa') + fa.write(fasta.text.replaceAll('>chr1', '>chr_xxxxxxxxxxxxxxx_1')) + + [ meta, fa.toPath() ] + } + | set { ch_fa } + + def monoploid_seqs = new File('test.mono.seq.txt') + monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1") + + input[0] = ch_fa + input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] ) + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.annotation_gff != null }, + { assert workflow.out.ltrlib != null }, + { assert workflow.out.lai_log == [] }, + { assert workflow.out.lai_out == [] } + ) + } + } + + test("empty_fasta_stub") { + + options '-stub' + + when { + workflow { + """ + def monoploid_seqs = new File('test.mono.seq.txt') + monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1") + + input[0] = Channel.empty() + input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] ) + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/tests/nextflow.config b/subworkflows/pfr/fasta_ltrretriever_lai/tests/nextflow.config index 1c1e4001..617b1160 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/tests/nextflow.config +++ b/subworkflows/pfr/fasta_ltrretriever_lai/tests/nextflow.config @@ -1,7 +1,7 @@ process { - withName: EDTA_LTRHARVEST { - ext.prefix = { "${meta.id}_edta_ltrharvest" } + withName: LTRHARVEST { + ext.prefix = { "${meta.id}_ltrharvest" } } withName: LTRFINDER { diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test index 9c3e2b01..1e279e48 100644 --- a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test @@ -41,4 +41,4 @@ nextflow_workflow { ) } } -} \ No newline at end of file +} diff --git a/subworkflows/pfr/gff3_validate/main.nf b/subworkflows/pfr/gff3_validate/main.nf new file mode 100644 index 00000000..5437c5a6 --- /dev/null +++ b/subworkflows/pfr/gff3_validate/main.nf @@ -0,0 +1,61 @@ +include { GT_GFF3 } from '../../../modules/pfr/gt/gff3/main' +include { GT_GFF3VALIDATOR } from '../../../modules/pfr/gt/gff3validator/main' +include { CUSTOM_CHECKGFF3FASTACORRESPONDENCE } from '../../../modules/pfr/custom/checkgff3fastacorrespondence/main' + +workflow GFF3_VALIDATE { + + take: + ch_gff3 // channel: [ val(meta), gff3 ] + ch_fasta // channel: [ val(meta), fasta ] + + main: + + ch_versions = Channel.empty() + + // MODULE: GT_GFF3 + GT_GFF3 ( ch_gff3 ) + ch_versions = ch_versions.mix(GT_GFF3.out.versions.first()) + + // MODULE: GT_GFF3VALIDATOR + GT_GFF3VALIDATOR ( GT_GFF3.out.gt_gff3 ) + ch_versions = ch_versions.mix(GT_GFF3VALIDATOR.out.versions.first()) + + // MODULE: CUSTOM_CHECKGFF3FASTACORRESPONDENCE + GT_GFF3VALIDATOR.out.success_log + | join ( + GT_GFF3.out.gt_gff3 + ) + | map { meta, log, gff3 -> [ meta, gff3 ] } + | join ( + ch_fasta + ) + | set { ch_gff3_fasta } + + CUSTOM_CHECKGFF3FASTACORRESPONDENCE ( + ch_gff3_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] }, + ch_gff3_fasta.map { meta, gff3, fasta -> fasta } + ) + + ch_versions = ch_versions.mix(CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.versions.first()) + + CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.success_log + | join ( + ch_gff3_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] } + ) + | map { meta, log, gff3 -> [ meta, gff3 ] } + | set { ch_valid_gff3 } + + GT_GFF3.out.error_log + | mix ( + GT_GFF3VALIDATOR.out.error_log + ) + | mix ( + CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.error_log + ) + | set { ch_log_for_invalid_gff3 } + + emit: + valid_gff3 = ch_valid_gff3 // channel: [ val(meta), gff3 ] + log_for_invalid_gff3 = ch_log_for_invalid_gff3 // channel: [ val(meta), log ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/pfr/gff3_validate/meta.yml b/subworkflows/pfr/gff3_validate/meta.yml new file mode 100644 index 00000000..5dea12a5 --- /dev/null +++ b/subworkflows/pfr/gff3_validate/meta.yml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: gff3_validate +description: | + Validates a gff3 file using GenomeTools gt-gff3, gt-gff3validator and + checks its correspondence with a fasta file +keywords: + - genome + - gff3 + - annotation + - validation +components: + - gt/gff3 + - gt/gff3validator + - custom/checkgff3fastacorrespondence +input: + - ch_gff3: + type: file + description: | + Input channel containing a gff3 file + Structure: [ val(meta), path(gff3) ] + pattern: "*.{gff,gff3}" + - ch_fasta: + type: file + description: | + Input channel containing a fasta file + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fsa,fa,fasta}" +output: + - valid_gff3: + type: file + description: | + Valid gff3 file + Structure: [ val(meta), path(gff3) ] + pattern: "*.gff3" + - log_for_invalid_gff3: + type: file + description: | + Error log if the gff3 file is invalid + Structure: [ val(meta), path(gff3) ] + pattern: "*.gff3" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/pfr/gff3_validate/tests/main.nf.test b/subworkflows/pfr/gff3_validate/tests/main.nf.test new file mode 100644 index 00000000..e71712b8 --- /dev/null +++ b/subworkflows/pfr/gff3_validate/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_workflow { + + name "Test Workflow GFF3_VALIDATE" + script "../main.nf" + workflow "GFF3_VALIDATE" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/gff3_validate" + tag "gff3_validate" + tag "gt" + tag "gt/gff3" + tag "gt/gff3validator" + tag "custom" + tag "custom/checkgff3fastacorrespondence" + + test("sarscov2-genome_gff3-genome_fasta-all_pass") { + + when { + workflow { + """ + input[0] = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ]) + input[1] = Channel.of([ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("homo_sapiens-genome_bed-genome_fasta-gt_gff3_fail") { + + when { + workflow { + """ + input[0] = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ]) + input[1] = Channel.of([ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2-genome_gff3-homo_sapiens-genome_fasta-correspondence_fail") { + + when { + workflow { + """ + input[0] = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ]) + input[1] = Channel.of([ [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap b/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap new file mode 100644 index 00000000..4d2a59b2 --- /dev/null +++ b/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap @@ -0,0 +1,115 @@ +{ + "sarscov2-genome_gff3-genome_fasta-all_pass": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34", + "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd", + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ], + "log_for_invalid_gff3": [ + + ], + "valid_gff3": [ + [ + { + "id": "test" + }, + "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85" + ] + ], + "versions": [ + "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34", + "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd", + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ] + } + ], + "timestamp": "2023-12-07T10:33:21.09887" + }, + "homo_sapiens-genome_bed-genome_fasta-gt_gff3_fail": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.error.log:md5,c096494c3cd02864eb54434c294ba382" + ] + ], + "2": [ + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ], + "log_for_invalid_gff3": [ + [ + { + "id": "test" + }, + "test.error.log:md5,c096494c3cd02864eb54434c294ba382" + ] + ], + "valid_gff3": [ + + ], + "versions": [ + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ] + } + ], + "timestamp": "2023-12-07T10:35:26.549003" + }, + "sarscov2-genome_gff3-homo_sapiens-genome_fasta-correspondence_fail": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.error.log:md5,67686ea1ef271821f1218a8fe0207e1f" + ] + ], + "2": [ + "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34", + "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd", + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ], + "log_for_invalid_gff3": [ + [ + { + "id": "test" + }, + "test.error.log:md5,67686ea1ef271821f1218a8fe0207e1f" + ] + ], + "valid_gff3": [ + + ], + "versions": [ + "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34", + "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd", + "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae" + ] + } + ], + "timestamp": "2023-12-07T10:35:32.53584" + } +} \ No newline at end of file diff --git a/subworkflows/pfr/gff3_validate/tests/nextflow.config b/subworkflows/pfr/gff3_validate/tests/nextflow.config new file mode 100644 index 00000000..d07a8881 --- /dev/null +++ b/subworkflows/pfr/gff3_validate/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: GT_GFF3 { + ext.args = '-tidy -retainids -addintrons' + } +} diff --git a/subworkflows/pfr/gff3_validate/tests/tags.yml b/subworkflows/pfr/gff3_validate/tests/tags.yml new file mode 100644 index 00000000..60ffbf0c --- /dev/null +++ b/subworkflows/pfr/gff3_validate/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/gff3_validate: + - subworkflows/pfr/gff3_validate/** diff --git a/tests/invalid/assemblysheet.csv b/tests/invalid/assemblysheet.csv new file mode 100644 index 00000000..de7e586f --- /dev/null +++ b/tests/invalid/assemblysheet.csv @@ -0,0 +1,4 @@ +tag,fasta,gff3,monoploid_ids,synteny_labels +FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,, +TT_2021a,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,, +MISC,tests/invalid/invalid.fsa.gz,,, diff --git a/tests/invalid/invalid.fsa.gz b/tests/invalid/invalid.fsa.gz new file mode 100644 index 00000000..b60e9686 Binary files /dev/null and b/tests/invalid/invalid.fsa.gz differ diff --git a/tests/invalid/params.json b/tests/invalid/params.json new file mode 100644 index 00000000..dd017c16 --- /dev/null +++ b/tests/invalid/params.json @@ -0,0 +1,8 @@ +{ + "config_profile_name": "Invalid profile", + "config_profile_description": "Profile to test invalid files", + "input": "tests/invalid/assemblysheet.csv", + "max_cpus": 2, + "max_memory": "6.GB", + "max_time": "6.h" +} diff --git a/docs/test_files/FI1.monoploid.seqs.txt b/tests/stub/FI1.monoploid.seqs.txt similarity index 100% rename from docs/test_files/FI1.monoploid.seqs.txt rename to tests/stub/FI1.monoploid.seqs.txt diff --git a/docs/test_files/FI1.seq.list b/tests/stub/FI1.seq.labels.tsv similarity index 100% rename from docs/test_files/FI1.seq.list rename to tests/stub/FI1.seq.labels.tsv diff --git a/docs/test_files/TT_2021a.seq.list b/tests/stub/TT_2021a.seq.labels.tsv similarity index 100% rename from docs/test_files/TT_2021a.seq.list rename to tests/stub/TT_2021a.seq.labels.tsv diff --git a/tests/stub/gxdb/test/test-only.gxi b/tests/stub/gxdb/test/test-only.gxi new file mode 100644 index 00000000..e69de29b diff --git a/tests/stub/hic/Dummy_hic.R1.fq.gz b/tests/stub/hic/Dummy_hic.R1.fq.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/stub/hic/Dummy_hic.R2.fq.gz b/tests/stub/hic/Dummy_hic.R2.fq.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/stub/kraken2/k2_minusb_20231009.tar.gz b/tests/stub/kraken2/k2_minusb_20231009.tar.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/stub/stub.config b/tests/stub/stub.config new file mode 100644 index 00000000..4f753187 --- /dev/null +++ b/tests/stub/stub.config @@ -0,0 +1,35 @@ +params { + config_profile_name = 'Full stub test' + config_profile_description = 'Full test of the pipeline in stub mode' + + input = 'assets/assemblysheet.csv' + + ncbi_fcs_adaptor_skip = false + ncbi_fcs_adaptor_empire = 'euk' + + ncbi_fcs_gx_skip = false + ncbi_fcs_gx_tax_id = 12 + ncbi_fcs_gx_db_path = 'tests/stub/gxdb/test' + + busco_skip = false + busco_mode = 'geno' + busco_lineage_datasets = 'fungi_odb10 hypocreales_odb10' + + tidk_skip = false + tidk_repeat_seq = 'TTTGGG' + + lai_skip = false + + kraken2_skip = false + kraken2_db_path = 'tests/stub/kraken2/k2_minusb_20231009.tar.gz' + + hic = 'tests/stub/hic/Dummy_hic.R{1,2}.fq.gz' + + synteny_skip = true // GitHub action runner runs out of memory + synteny_xref_assemblies = 'assets/xrefsheet.csv' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/version_check.sh b/version_check.sh new file mode 100755 index 00000000..e23860d3 --- /dev/null +++ b/version_check.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +config_version=$(sed -n "/^\s*version\s*=\s*'/s/version//p" nextflow.config | tr -d "=[:space:]'") +cff_version=$(sed -n '/^version: /s/version: //p' CITATION.cff | tr -d '[:space:]') + +if [[ $config_version != $cff_version ]]; then + echo 'config_version != cff_version' + exit 1 +fi + +# Check CHANGELOG version + +grep "## $config_version - " CHANGELOG.md >/dev/null \ + || (echo 'Failed to match CHANGELOG version'; exit 1) diff --git a/workflows/assembly_qc.nf b/workflows/assembly_qc.nf deleted file mode 100644 index fb4ea013..00000000 --- a/workflows/assembly_qc.nf +++ /dev/null @@ -1,196 +0,0 @@ -nextflow.enable.dsl=2 - -include {validateParams } from '../modules/local/utils.nf' -include {jsonifyParams } from '../modules/local/utils.nf' - -include { VALIDATE_FASTA } from '../subworkflows/local/validate_fasta.nf' -include { VALIDATE_GFF3 } from '../subworkflows/local/validate_gff3.nf' -include { BUSCO } from '../subworkflows/local/busco.nf' -include { TIDK } from '../subworkflows/local/tidk.nf' -include { FASTA_LTRRETRIEVER_LAI} from '../subworkflows/pfr/fasta_ltrretriever_lai/main.nf' -include { KRAKEN2 } from '../subworkflows/local/kraken2.nf' -include { NCBI_FCS_ADAPTOR } from '../subworkflows/local/ncbi_fcs_adaptor.nf' -include { NCBI_FCS_GX } from '../subworkflows/local/ncbi_fcs_gx.nf' -include { HIC_PREPROCESS } from '../subworkflows/local/hic_preprocess.nf' -include { HIC_CONTACT_MAP } from '../subworkflows/local/hic_contact_map.nf' -include { SYNTENY } from '../subworkflows/local/synteny.nf' - -include { CREATE_REPORT } from '../modules/local/create_report.nf' -include { ASSEMBLATHON_STATS } from '../modules/local/assemblathon_stats.nf' -include { GENOMETOOLS_GT_STAT } from '../modules/local/genometools_gt_stat.nf' -include { BIOCODE_GFF3_STATS } from '../modules/local/biocode_gff3_stats.nf' - -validateParams(params) -def paramsAsJSON = jsonifyParams(params) - -workflow ASSEMBLY_QC { - - // VALIDATE_FASTA - Channel.fromList(params.target_assemblies) - | map { - [it[0], file(it[1], checkIfExists: true)] // [tag, assembly fasta path] - } - | VALIDATE_FASTA - | set { ch_tag_valid_fasta } - - // VALIDATE_GFF3 - Channel.fromList(params.assembly_gff3) - | map { - [it[0], file(it[1], checkIfExists: true)] // [tag, assembly gff3 path] - } - | set { ch_tag_gff3_file } - - VALIDATE_GFF3(ch_tag_gff3_file, ch_tag_valid_fasta) - | set { ch_tag_valid_gff3 } - - - // GENOMETOOLS_GT_STAT - ch_tag_valid_gff3 - | GENOMETOOLS_GT_STAT - | collect - | set { ch_genometools_gt_stats } - - - // BIOCODE_GFF3_STATS - ch_tag_valid_gff3 - | BIOCODE_GFF3_STATS - | collect - | set { ch_biocode_gff3_stats } - - - // NCBI-FCS-ADAPTOR & NCBI-FCS-GX - ch_tag_valid_fasta - | NCBI_FCS_ADAPTOR - - NCBI_FCS_GX( - ch_tag_valid_fasta, - params.ncbi_fcs_gx.db_path - ) - - NCBI_FCS_ADAPTOR - .out - .is_clean_status - | join( - NCBI_FCS_GX - .out - .is_clean_status - ) - | filter { - it[1] && it[2] // NCBI_FCS_ADAPTOR and NCBI_FCS_GX both report no contamination - } - | join( - ch_tag_valid_fasta - ) - | map { - [it[0], it[3]] // [tag, valid fasta path] - } - | set { ch_clean_target_assemblies } - - - // ASSEMBLATHON_STATS - ASSEMBLATHON_STATS(ch_clean_target_assemblies) - | collect - | set { ch_general_stats } - - - // BUSCO - ch_clean_target_assemblies - | combine(Channel.fromList(params.busco.lineage_datasets)) - | map { - [it[0], file(it[1], checkIfExists: true), it[2]] // [tag, assembly fasta path, busco lineage] - } - | BUSCO - - // TIDK - TIDK(ch_clean_target_assemblies) - - // FASTA_LTRRETRIEVER_LAI - ch_lai_inputs = params.lai.skip - ? Channel.empty() - : ch_clean_target_assemblies - | join( - Channel.fromList(params.lai.monoploid_seqs) - | map { - [it[0], file(it[1], checkIfExists: true)] // [tag, monoploid_seqs] - }, remainder: true - ) - | map { id, fasta, mono -> [ id, fasta, mono ?: [] ] } - - FASTA_LTRRETRIEVER_LAI( - ch_lai_inputs.map { id, fasta, mono -> [ [ id:id ], fasta ] }, - ch_lai_inputs.map { id, fasta, mono -> [ [ id:id ], mono ] }, - false // Not using this flag - ) - - ch_lai_outputs = FASTA_LTRRETRIEVER_LAI.out.lai_log - | join(FASTA_LTRRETRIEVER_LAI.out.lai_out, remainder: true) - | map { meta, log, out -> out ? [ log, out ] : [log] } - | collect - - // KRAKEN2 - KRAKEN2( - ch_clean_target_assemblies, - params.kraken2.db_path - ) - - // HIC_CONTACT_MAP - if(!params.hic.skip) { - if ("${params.hic.paired_reads}".find(/.*[\/].*\.(fastq|fq)\.gz/)) { - ch_paired_reads = Channel.fromFilePairs(params.hic.paired_reads, checkIfExists: true) - } else { - ch_paired_reads = Channel.fromSRA(params.hic.paired_reads) - } - } else { - ch_paired_reads = Channel.empty() - } - - HIC_PREPROCESS(ch_paired_reads) - | set { ch_cleaned_paired_reads } - - HIC_CONTACT_MAP( - ch_cleaned_paired_reads, - ch_clean_target_assemblies - ) - - // SYNTENY - if(!params.synteny.skip) { - ch_clean_target_assemblies - .join( - Channel.fromList(params.synteny.assembly_seq_list) - .map { - [it[0], file(it[1], checkIfExists: true)] // [tag, assembly seq list path] - } - ) - .set { ch_clean_target_assemblies_seq_list } - - Channel.fromList(params.synteny.xref_assemblies) - .map { - [it[0], file(it[1], checkIfExists: true), file(it[2], checkIfExists: true)] // [tag, xref assembly fasta file path, seq list path] - } - .set { ch_with_assemblies } - } else { - Channel.empty() - .set { ch_clean_target_assemblies_seq_list } - - Channel.empty() - .set { ch_with_assemblies } - } - - SYNTENY(ch_clean_target_assemblies_seq_list, ch_with_assemblies) - - // CREATE REPORT - CREATE_REPORT( - NCBI_FCS_ADAPTOR.out.reports.ifEmpty([]), - NCBI_FCS_GX.out.fcs_gx_reports.ifEmpty([]), - ch_general_stats.ifEmpty([]), - ch_genometools_gt_stats.ifEmpty([]), - ch_biocode_gff3_stats.ifEmpty([]), - BUSCO.out.list_of_outputs.ifEmpty([]), - TIDK.out.list_of_plots.ifEmpty([]), - ch_lai_outputs.ifEmpty([]), - KRAKEN2.out.list_of_outputs.ifEmpty([]), - HIC_CONTACT_MAP.out.list_of_html_files.ifEmpty([]), - SYNTENY.out.list_of_circos_plots.ifEmpty([]), - Channel.of("$paramsAsJSON") - ) -} diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf new file mode 100644 index 00000000..38217ec5 --- /dev/null +++ b/workflows/assemblyqc.nf @@ -0,0 +1,473 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + +WorkflowAssemblyqc.initialise(params, log) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { GT_STAT } from '../modules/pfr/gt/stat/main' +include { GFF3_VALIDATE } from '../subworkflows/pfr/gff3_validate/main' +include { NCBI_FCS_ADAPTOR } from '../modules/local/ncbi_fcs_adaptor' +include { NCBI_FCS_GX } from '../subworkflows/local/ncbi_fcs_gx' +include { ASSEMBLATHON_STATS } from '../modules/local/assemblathon_stats' +include { FASTA_BUSCO_PLOT } from '../subworkflows/local/fasta_busco_plot' +include { FASTA_LTRRETRIEVER_LAI } from '../subworkflows/pfr/fasta_ltrretriever_lai/main' +include { FASTA_KRAKEN2 } from '../subworkflows/local/fasta_kraken2' +include { FQ2HIC } from '../subworkflows/local/fq2hic' +include { FASTA_SYNTENY } from '../subworkflows/local/fasta_synteny' +include { CREATEREPORT } from '../modules/local/createreport' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// + +include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main' +include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' +include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main' + +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def input_assembly_sheet_fields = 5 +def synteny_xref_assemblies_fields = 3 + +workflow ASSEMBLYQC { + + // Input channels + ch_versions = Channel.empty() + ch_input = Channel.fromSamplesheet('input') + | collect + | flatMap { WorkflowAssemblyqc.validateInput(it) } + | buffer(size: input_assembly_sheet_fields) + + ch_target_assemby_branch = ch_input + | map { tag, fasta, gff, mono_ids, labels -> + [ [ id: tag ], file(fasta, checkIfExists: true) ] + } + | branch { meta, fasta -> + gz: "$fasta".endsWith(".gz") + rest: ! "$fasta".endsWith(".gz") + } + + ch_assemby_gff3_branch = ch_input + | map { tag, fasta, gff, mono_ids, labels -> + gff + ? [ [ id: tag ], file(gff, checkIfExists: true) ] + : null + } + | branch { meta, gff -> + gz: "$gff".endsWith(".gz") + rest: ! "$gff".endsWith(".gz") + } + + ch_mono_ids = ch_input + | map { tag, fasta, gff, mono_ids, labels -> + mono_ids + ? [ [ id: tag ], file(mono_ids, checkIfExists: true) ] + : null + } + + ch_synteny_labels = ch_input + | map { tag, fasta, gff, mono_ids, labels -> + labels + ? [ [ id: tag ], file(labels, checkIfExists: true) ] + : ( + params.synteny_skip + ? null + : log.warn("A synteny_labels file must be provided" + + " in the input assembly sheet when running synteny analysis." + + " Synteny analysis is skipped!") + ) + } + + ch_hic_reads = ! params.hic + ? Channel.empty() + : ( + "$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/) + ? Channel.fromFilePairs(params.hic, checkIfExists: true) + : Channel.fromSRA(params.hic) + ) + | map{ sample, fq -> + [ [ id: sample, single_end: false ], fq ] + } + + ch_xref_assembly = params.synteny_skip || ! params.synteny_xref_assemblies + ? Channel.empty() + : Channel.fromSamplesheet('synteny_xref_assemblies') + | collect + | flatMap { WorkflowAssemblyqc.validateXrefAssemblies(it) } + | buffer(size: synteny_xref_assemblies_fields) + | map { tag, fa, labels -> + [ tag, file(fa, checkIfExists: true), file(labels, checkIfExists: true) ] + } + + // MODULE: GUNZIP as GUNZIP_FASTA + GUNZIP_FASTA ( ch_target_assemby_branch.gz ) + + ch_target_assembly = GUNZIP_FASTA.out.gunzip.mix(ch_target_assemby_branch.rest) + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first()) + + + // MODULE: GUNZIP as GUNZIP_GFF3 + GUNZIP_GFF3 ( ch_assemby_gff3_branch.gz ) + + ch_assembly_gff3 = GUNZIP_GFF3.out.gunzip.mix(ch_assemby_gff3_branch.rest) + ch_versions = ch_versions.mix(GUNZIP_GFF3.out.versions.first()) + + // MODULE: FASTAVALIDATOR + FASTAVALIDATOR ( ch_target_assembly ) + + ch_valid_target_assembly = ch_target_assembly.join(FASTAVALIDATOR.out.success_log) + | map { meta, fasta, log -> [ meta, fasta ] } + + ch_invalid_assembly_log = FASTAVALIDATOR.out.error_log + | map { meta, error_log -> + log.warn("FASTA validation failed for ${meta.id}\n${error_log.text}") + + [ meta, error_log ] + } + + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions.first()) + + // SUBWORKFLOW: GFF3_VALIDATE + GFF3_VALIDATE ( + ch_assembly_gff3, + ch_valid_target_assembly + ) + + ch_valid_gff3 = GFF3_VALIDATE.out.valid_gff3 + + ch_invalid_gff3_log = GFF3_VALIDATE.out.log_for_invalid_gff3 + | map { meta, error_log -> + log.warn("GFF3 validation failed for ${meta.id}\n${error_log.text}") + + [ meta, error_log ] + } + + ch_versions = ch_versions.mix(GFF3_VALIDATE.out.versions) + + // MODULE: GT_STAT + GT_STAT ( ch_valid_gff3 ) + + ch_gt_stats = GT_STAT.out.stats + | map { meta, yml -> yml } + + ch_versions = ch_versions.mix(GT_STAT.out.versions.first()) + + // MODULE: NCBI_FCS_ADAPTOR + ch_fcs_adaptor_inputs = params.ncbi_fcs_adaptor_skip + ? Channel.empty() + : ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + + NCBI_FCS_ADAPTOR( + ch_fcs_adaptor_inputs, + params.ncbi_fcs_adaptor_empire ?: [] + ) + + ch_fcs_adaptor_report = NCBI_FCS_ADAPTOR.out.report + | map { tag, report -> + def is_clean = file(report).readLines().size < 2 + + if (! is_clean) { + log.warn(""" + Adaptor contamination detected in ${tag}. + See the report for further details. + """.stripIndent()) + } + + [ tag, report ] + } + + ch_fcs_adaptor_passed_assembly = params.ncbi_fcs_adaptor_skip + ? ( + ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + ) + : ( + ch_fcs_adaptor_report + | map { tag, report -> + [ tag, file(report).readLines().size < 2 ] + } + | filter { tag, is_clean -> is_clean } + | join( + ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + ) + | map { tag, clean, fa -> + [ tag, fa ] + } + ) + + ch_versions = ch_versions.mix(NCBI_FCS_ADAPTOR.out.versions.first()) + + // SUBWORKFLOW: NCBI_FCS_GX + ch_fcs_gx_input_assembly = params.ncbi_fcs_gx_skip + ? Channel.empty() + : ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + + NCBI_FCS_GX( + ch_fcs_gx_input_assembly, + params.ncbi_fcs_gx_db_path ?: [], + params.ncbi_fcs_gx_tax_id ?: [] + ) + + ch_fcs_gx_report = NCBI_FCS_GX.out.gx_report + | map { tag, report -> + def is_clean = file(report).readLines().size < 3 + + if (! is_clean) { + log.warn(""" + Foreign organism contamination detected in ${tag}. + See the report for further details. + """.stripIndent()) + } + + [ tag, report ] + } + + ch_fcs_gx_taxonomy_plot = NCBI_FCS_GX.out.gx_taxonomy_plot + | map { tag, cut, html -> [ tag, html ] } + + ch_fcs_gx_passed_assembly = params.ncbi_fcs_gx_skip + ? ( + ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + ) + : ( + ch_fcs_gx_report + | map { tag, report -> + [ tag, file(report).readLines().size < 3 ] + } + | filter { tag, is_clean -> is_clean } + | join( + ch_valid_target_assembly + | map { meta, fa -> [ meta.id, fa ] } + ) + | map { tag, clean, fa -> + [ tag, fa ] + } + ) + + ch_versions = ch_versions.mix(NCBI_FCS_GX.out.versions) + + ch_clean_assembly = ch_fcs_adaptor_passed_assembly + | join( + ch_fcs_gx_passed_assembly + ) + | map { tag, fa, fa2 -> + [ tag, fa ] + } + + // MODULE: ASSEMBLATHON_STATS + ASSEMBLATHON_STATS( + ch_clean_assembly, + params.assemblathon_stats_n_limit + ) + + ch_assemblathon_stats = ASSEMBLATHON_STATS.out.stats + ch_versions = ch_versions.mix(ASSEMBLATHON_STATS.out.versions.first()) + + // SUBWORKFLOW: FASTA_BUSCO_PLOT + ch_busco_inputs = params.busco_skip + ? Channel.empty() + : ch_clean_assembly + | combine( + Channel.of(params.busco_lineage_datasets) + | map { it.split(' ') } + | flatten + ) + | map { tag, fa, lineage -> + [ tag, file(fa, checkIfExists: true), lineage ] + } + FASTA_BUSCO_PLOT( + ch_busco_inputs.map { tag, fa, lineage -> [ tag, fa ] }, + ch_busco_inputs.map { tag, fa, lineage -> lineage }, + params.busco_mode ?: [], + params.busco_download_path ?: [] + ) + + ch_busco_summary = FASTA_BUSCO_PLOT.out.summary + ch_busco_plot = FASTA_BUSCO_PLOT.out.plot + ch_versions = ch_versions.mix(FASTA_BUSCO_PLOT.out.versions) + + // SUBWORKFLOW: FASTA_EXPLORE_SEARCH_PLOT_TIDK + ch_tidk_inputs = params.tidk_skip + ? Channel.empty() + : ch_clean_assembly + | map { tag, fa -> [ [ id: tag ], fa ] } + | combine( + Channel.of(params.tidk_repeat_seq) + ) + + FASTA_EXPLORE_SEARCH_PLOT_TIDK( + ch_tidk_inputs.map { meta, fa, seq -> [ meta, fa ] }, + ch_tidk_inputs.map { meta, fa, seq -> [ meta, seq ] } + ) + + ch_tidk_outputs = FASTA_EXPLORE_SEARCH_PLOT_TIDK.out.apriori_svg + | mix(FASTA_EXPLORE_SEARCH_PLOT_TIDK.out.aposteriori_svg) + | mix(FASTA_EXPLORE_SEARCH_PLOT_TIDK.out.aposteriori_sequence) + | map { meta, file -> file } + | mix( + Channel.of("$params.tidk_repeat_seq") + | collectFile(name: 'a_priori.sequence', newLine: true) + ) + + ch_versions = ch_versions.mix(FASTA_EXPLORE_SEARCH_PLOT_TIDK.out.versions) + + // SUBWORKFLOW: FASTA_LTRRETRIEVER_LAI + ch_lai_inputs = params.lai_skip + ? Channel.empty() + : ch_clean_assembly + | join( + ch_mono_ids + | map { meta, mono -> [ meta.id, mono ] }, + remainder: true + ) + | filter { id, fasta, mono -> fasta != null } + | map { id, fasta, mono -> [ id, fasta, mono ?: [] ] } + + FASTA_LTRRETRIEVER_LAI( + ch_lai_inputs.map { id, fasta, mono -> [ [ id:id ], fasta ] }, + ch_lai_inputs.map { id, fasta, mono -> [ [ id:id ], mono ] }, + false // Not skipping LAI using this flag + ) + + ch_lai_outputs = FASTA_LTRRETRIEVER_LAI.out.lai_log + | join(FASTA_LTRRETRIEVER_LAI.out.lai_out, remainder: true) + | map { meta, log, out -> out ? [ log, out ] : [log] } + + ch_versions = ch_versions.mix(FASTA_LTRRETRIEVER_LAI.out.versions) + + // SUBWORKFLOW: FASTA_KRAKEN2 + ch_kraken2_input_assembly = params.kraken2_skip + ? Channel.empty() + : ch_clean_assembly + + ch_kraken2_db_path = params.kraken2_skip + ? Channel.empty() + : Channel.of(file(params.kraken2_db_path, checkIfExists:true)) + FASTA_KRAKEN2( + ch_kraken2_input_assembly, + ch_kraken2_db_path + ) + + ch_kraken2_plot = FASTA_KRAKEN2.out.plot + ch_versions = ch_versions.mix(FASTA_KRAKEN2.out.versions) + + // SUBWORKFLOW: FQ2HIC + ch_hic_input_assembly = ! params.hic + ? Channel.empty() + : ch_clean_assembly + | map { tag, fa -> [ [ id: tag ], fa ] } + + FQ2HIC( + ch_hic_reads, + ch_hic_input_assembly, + params.hic_skip_fastp, + params.hic_skip_fastqc + ) + + ch_hic_html = FQ2HIC.out.html + ch_versions = ch_versions.mix(FQ2HIC.out.versions) + + // SUBWORKFLOW: FASTA_SYNTENY + FASTA_SYNTENY( + ch_clean_assembly, + ch_synteny_labels.map { meta, txt -> [ meta.id, txt ] }, + ch_xref_assembly, + params.synteny_between_input_assemblies, + params.synteny_many_to_many_align, + params.synteny_max_gap, + params.synteny_min_bundle_size, + params.synteny_plot_1_vs_all, + params.synteny_color_by_contig + ) + + ch_synteny_plot = FASTA_SYNTENY.out.plot + ch_versions = ch_versions.mix(FASTA_SYNTENY.out.versions) + + // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // MODULE: CREATEREPORT + CREATEREPORT( + ch_invalid_assembly_log .map { meta, file -> file }.collect().ifEmpty([]), + ch_invalid_gff3_log .map { meta, file -> file }.collect().ifEmpty([]), + ch_fcs_adaptor_report .map { meta, file -> file }.collect().ifEmpty([]), + ch_fcs_gx_report .mix(ch_fcs_gx_taxonomy_plot).map { meta, file -> file }.collect().ifEmpty([]), + ch_assemblathon_stats .collect().ifEmpty([]), + ch_gt_stats .collect().ifEmpty([]), + ch_busco_summary .mix(ch_busco_plot).collect().ifEmpty([]), + ch_tidk_outputs .collect().ifEmpty([]), + ch_lai_outputs .collect().ifEmpty([]), + ch_kraken2_plot .collect().ifEmpty([]), + ch_hic_html .collect().ifEmpty([]), + ch_synteny_plot .collect().ifEmpty([]), + CUSTOM_DUMPSOFTWAREVERSIONS .out.yml, + Channel.of ( WorkflowAssemblyqc.jsonifyParams ( params ) ), + Channel.of ( WorkflowAssemblyqc.jsonifySummaryParams ( summary_params ) ) + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) + } + NfcoreTemplate.dump_parameters(workflow, params) + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end-stub") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved-stub") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..b4c0e1dd --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,330 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:10:13.467574" + }, + "versions_interleaved": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:24.615634793" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.223817" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:42.333545689" + }, + "test_fastp_paired_end_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:06.431833729" + }, + "test_fastp_interleaved-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:37.827323085" + }, + "test_fastp_paired_end_merged_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:08:44.496251446" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:27.354051299" + }, + "versions_interleaved-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:46.535528418" + }, + "versions_single_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:03.724591407" + }, + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:07:15.398827" + }, + "versions_paired_end-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:06.50017282" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:07.67921647" + }, + "versions_paired_end_merged_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:47.350653154" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.127974" + }, + "versions_paired_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:18.140484878" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.244202" + }, + "test_fastp_single_end-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:57:30.791982648" + }, + "versions_paired_end_merged_adapterlist": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:05:37.845370554" + }, + "versions_paired_end_merged": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:32.860543858" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:41.942317" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 00000000..0f7849ad --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..9e19a74c --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,55 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 00000000..ee5507e0 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,57 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..70edae4d --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls