diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..4ecfbfe --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index 3babc67..b6b3190 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,12 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 -[*.md] -indent_size = unset - # These files are edited and tested upstream in nf-core/modules [/modules/nf-core/**] charset = unset diff --git a/.gitattributes b/.gitattributes index 050bb12..7a2dabc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 0000000..191fabd --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..8016625 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,120 @@ +# mskcc/forte: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving mskcc/forte. + +We try to manage the required tasks for mskcc/forte using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +:::info +If you need help using or modifying mskcc/forte then the best place to ask is on the [FORTE issues page](https://github.com/mskcc/forte/issues). +::: + +## Contribution workflow + +If you'd like to write some code for mskcc/forte, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [mskcc/forte issues](https://github.com/mskcc/forte/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [mskcc/forte repository](https://github.com/mskcc/forte) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [mskcc/forte documentation](https://github.com/mskcc/forte/blob/main/docs/). + +## Pipeline contribution conventions + +To make the mskcc/forte code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/mskcc/forte/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..cfbe138 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [mskcc/forte pipeline documentation](https://github.com/mskcc/forte/tree/main/docs) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of mskcc/forte _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..23e13ac --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..0e20ea1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the mskcc/forte pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6e45266 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,25 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/mskcc/forte/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the mskcc/forte _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 0000000..46909bb --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,39 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-tower: + name: Run AWS full tests + if: github.repository == 'mskcc/forte' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/forte/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/forte/results-${{ github.sha }}" + } + profiles: test_full + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 0000000..81c898b --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-tower: + name: Run AWS tests + if: github.repository == 'mskcc/forte' + runs-on: ubuntu-latest + steps: + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/forte/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/forte/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 0000000..571a8f4 --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `develop` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `develop` or any `patch` branches + - name: Check PRs + if: github.repository == 'mskcc/forte' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == mskcc/forte ]] && [[ $GITHUB_HEAD_REF == "develop" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `develop` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `develop` branch. + + You do not need to close this PR, you can change the target branch to `develop` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ac89a55 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,43 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - develop + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core develop branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'mskcc/forte') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - name: Check out pipeline code + uses: actions/checkout@v3 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..694e90e --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/code-linting.yml b/.github/workflows/code-linting.yml deleted file mode 100644 index fccb7e0..0000000 --- a/.github/workflows/code-linting.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Code Linting -on: - push: - branches: [master] - pull_request: - branches: [main, develop] - -# Cancel if a newer run is started -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - Prettier: - runs-on: ubuntu-latest - steps: - - name: Check out repository - uses: actions/checkout@v3 - - - name: Install NodeJS - uses: actions/setup-node@v2 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check . - - EditorConfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v2 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 0000000..037849b --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'mskcc/forte' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "result=pass" >> $GITHUB_OUTPUT + else + echo "result=fail" >> $GITHUB_OUTPUT + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..a16fa5d --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,109 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - develop + pull_request: + release: + types: [published] + +jobs: + EditorConfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + + Prettier: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} + + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + + nf-core: + if: ${{ false }} # disable for now + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@v3 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 0000000..0bbcd30 --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v2 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 0000000..6ad3392 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..25488dc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 93ddfc7..3805dc8 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,29 +1 @@ repository_type: pipeline -lint: - files_exist: - - CODE_OF_CONDUCT.md - - assets/nf-core-forte_logo_light.png - - docs/images/nf-core-forte_logo_light.png - - docs/images/nf-core-forte_logo_dark.png - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml - - .github/ISSUE_TEMPLATE/bug_report.yml - - .github/workflows/branch.yml - - .github/workflows/ci.yml - - .github/workflows/linting_comment.yml - - .github/workflows/linting.yml - - conf/igenomes.config - nextflow_config: - - manifest.name - - manifest.homePage - - process.cpus - - process.memory - - process.time - - custom_config - multiqc_config: - - report_comment - files_unchanged: - - .github/ISSUE_TEMPLATE/bug_report.yml - readme: - - nextflow_badge diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0c31cdb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore index eb74a57..437d763 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 4480533..bb7fedf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v0.0.1 - [date] +## v1.0.1 - [date] Initial release of mskcc/forte, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 017666c..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md index 4a31680..6bead84 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -26,8 +26,6 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -- [Fusion-report](https://github.com/pintoa1-mskcc/fusion-report) - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) @@ -44,5 +42,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..c089ec7 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,182 @@ +# Code of Conduct at nf-core (v1.4) + +## Our Pledge + +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: + +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status + +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. + +## Preamble + +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: + +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. + +## Our Responsibilities + +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 + +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 0075129..6e2e5dd 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,7 @@ - **T**ranscriptome - **E**lements/**E**xpression -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - -## Pipeline summary +### Features 1. Read pre-processing 1. Trimming @@ -29,41 +27,36 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. -3. Download the pipeline and test it on a minimal dataset with a single command: - - ```bash - nextflow run mskcc/forte -profile test,singularity --outdir - ``` - - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +## Usage - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +::: -4. Start running your own analysis! +Now, you can run the pipeline using: - ```bash - nextflow run mskcc/forte --input samplesheet.csv --outdir --genome GRCh37 -profile singularity - ``` +```bash +git clone git@github.com:mskcc/forte.git +nextflow run forte/main.nf \ + -profile \ + --input samplesheet.csv \ + --outdir +``` - The input file should contain IDs, paths and meta-data pertaining to each sample. The following is a description of each field that can be used. Fields that do not have a default value are required; those that do are not. +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: - | Header | Type | Values | Defaults | - | :--------- | :---------- | :---------------------- | :------- | - | sample | `str` | | (none) | - | single_end | `bool` | `true`/`false` | `false` | - | umi | `str`/`int` | `NNNXX`/`3` | `''` | - | umi2 | `str`/`int` | `NNNXX`/`3` | `''` | - | strand | `str` | `yes`/`no`/`reverse` | `no` | - | bait | `str` | `idt_v2/idt_v1/agilent` | `''` | - | fastq_1 | `str` | `/path/to/*fastq.gz` | (none) | - | fastq_2 | `str` | `/path/to/*fastq.gz` | (none) | +For more details and further functionality, please refer to the [usage documentation](docs/usage.md). - If you are running on juno, chain the `juno` profile (i.e. `-profile singularity,juno`) to take advantage of local resources on juno. +## Pipeline output -5. For more information on running the pipeline, please see the [detailed usage documentation](docs/usage.md). +For more details about the output files and reports, please refer to the +[output documentation](docs/output.md). ## Credits @@ -72,7 +65,7 @@ mskcc/forte was originally written by Anne Marie Noronha . We thank the following people for their extensive assistance in the development of this pipeline: - Allison Richards -- Alexandra Pinto +- Alexandria Pinto - Yixiao Gong We also thank the following contributors: @@ -89,9 +82,14 @@ If you would like to contribute to this pipeline, please see the [contributing g ## Citations + + + + + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. -This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). +You can cite the `nf-core` publication as follows: > **The nf-core framework for community-curated bioinformatics pipelines.** > diff --git a/assets/analysis_multiqc_config.yml b/assets/analysis_multiqc_config.yml new file mode 100644 index 0000000..ce46189 --- /dev/null +++ b/assets/analysis_multiqc_config.yml @@ -0,0 +1,29 @@ +show_analysis_paths: False +extra_fn_clean_trim: + - "htseq.count" +export_plots: true + +use_filename_as_sample_name: + - kallisto + +custom_data: + htseq_expression_genstats: + plot_type: "generalstats" + pconfig: + - GeneCount: + title: "Gene Count" + format: "{:,.0f}" + description: "HTSeq: Number of genes with detected expression" + kallisto_expression_genstats: + plot_type: "generalstats" + pconfig: + - GeneCount: + title: "Gene Count" + format: "{:,.0f}" + description: "Kallisto: Number of genes with detected expression" + +sp: + htseq_expression_genstats: + fn: "*.htseq.summary.txt" + kallisto_expression_genstats: + fn: "*.kallisto.customsummary.txt" diff --git a/assets/email_template.txt b/assets/email_template.txt index 43b3352..426c045 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,3 +1,11 @@ +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/forte v${version} +---------------------------------------------------- Run Name: $runName <% if (success){ @@ -29,3 +37,4 @@ Pipeline Configuration: -- mskcc/forte https://github.com/mskcc/forte + diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 284afab..4bd50cb 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -5,12 +5,17 @@ section_href: "https://github.com/mskcc/forte" plot_type: "html" data: |

Methods

-

Data was processed using mskcc/forte v${workflow.manifest.version} ${doi_text}.

+

Data was processed using mskcc/forte v${workflow.manifest.version} ${doi_text}, utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ce46189..49e35a6 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,29 +1,15 @@ -show_analysis_paths: False -extra_fn_clean_trim: - - "htseq.count" -export_plots: true +report_comment: > -use_filename_as_sample_name: - - kallisto + This report has been generated by the nf-core/forte + analysis pipeline. For information about how to interpret these results, please see the + documentation. -custom_data: - htseq_expression_genstats: - plot_type: "generalstats" - pconfig: - - GeneCount: - title: "Gene Count" - format: "{:,.0f}" - description: "HTSeq: Number of genes with detected expression" - kallisto_expression_genstats: - plot_type: "generalstats" - pconfig: - - GeneCount: - title: "Gene Count" - format: "{:,.0f}" - description: "Kallisto: Number of genes with detected expression" +report_section_order: + "nf-core-forte-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-forte-summary": + order: -1002 -sp: - htseq_expression_genstats: - fn: "*.htseq.summary.txt" - kallisto_expression_genstats: - fn: "*.kallisto.customsummary.txt" +export_plots: true diff --git a/assets/nf-core-forte_logo_light.png b/assets/nf-core-forte_logo_light.png new file mode 100644 index 0000000..c6bdf27 Binary files /dev/null and b/assets/nf-core-forte_logo_light.png differ diff --git a/assets/pipeline_multiqc_config.yml b/assets/pipeline_multiqc_config.yml deleted file mode 100644 index dee69fd..0000000 --- a/assets/pipeline_multiqc_config.yml +++ /dev/null @@ -1,14 +0,0 @@ -report_comment: > - This report has been generated by the mskcc/forte - analysis pipeline. -report_section_order: - "mskcc-forte-methods-description": - order: -1000 - software_versions: - order: -1001 - "mskcc-forte-summary": - order: -1002 - -export_plots: true - -show_analysis_paths: False diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 22d8cae..3bb12be 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -9,12 +9,12 @@ Content-Type: text/html; charset=utf-8 $email_html --nfcoremimeboundary -Content-Type: image/png;name="anoronh4-forte_logo.png" +Content-Type: image/png;name="mskcc-forte_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="anoronh4-forte_logo_light.png" +Content-Disposition: inline; filename="mskcc-forte_logo_light.png" -<% out << new File("$projectDir/assets/anoronh4-forte_logo_light.png"). +<% out << new File("$projectDir/assets/mskcc-forte_logo_light.png"). bytes. encodeBase64(). toString(). diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..27df516 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/forte v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_maf_samplesheet.py b/bin/check_maf_samplesheet.py deleted file mode 100755 index 31c1fff..0000000 --- a/bin/check_maf_samplesheet.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".maf", - ) - - def __init__( - self, - sample_col="sample", - maf_col="maf", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - maf_col (str): The name of the column that contains the MAF file path - (default "maf"). - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._maf_col = maf_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_maf(row) - self._seen.add((row[self._sample_col], row[self._maf_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_maf(self, row): - """Assert that the MAF entry is non-empty and has the right format.""" - if len(row[self._maf_col]) <= 0: - raise AssertionError("The MAF file is required.") - self._validate_maf_format(row[self._maf_col]) - - def _validate_maf_format(self, filename): - """Assert that a given filename has one of the expected MAF extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The MAF file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and maf filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and MAF must be unique.") - seen = set() - for row in self.modified: - sample = row[self._sample_col] - if sample in seen: - raise AssertionError("The sample name must be unique.") - else: - seen.add(sample) - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - ## Commenting out this line because it sometimes fails when it shouldn't, and there is a downstream check for header names that is essentially redundant. - #sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by forte. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,maf - SAMPLE_PE,DNA_SAMPLE_PE.maf - - """ - required_columns = {"sample", "maf"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 0ebb2b1..43652ec 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -165,10 +165,6 @@ def sniff_format(handle): peek = read_head(handle) handle.seek(0) sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - ## Commenting out this line because it sometimes fails when it shouldn't, and there is a downstream check for header names that is essentially redundant. - #sys.exit(1) dialect = sniffer.sniff(peek) return dialect diff --git a/conf/base.config b/conf/base.config index 3c18dcb..c799813 100644 --- a/conf/base.config +++ b/conf/base.config @@ -8,6 +8,7 @@ ---------------------------------------------------------------------------------------- */ + params { reference_base = "${params.outdir}/forte_references" } @@ -18,11 +19,17 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'ignore' } maxRetries = 3 maxErrors = '-1' // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index 7a748d4..07afea1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -180,13 +180,6 @@ process { 'mm39' } - withName: FUSIONREPORT_DOWNLOAD { - ext.when = ! params.skip_fusion_report - secret = ["COSMIC_PASSWD"] - storeDir = { "${params.reference_base}/fusionreport" } - ext.args = "--cosmic_usr ${params.cosmic_usr} --cosmic_passwd \$COSMIC_PASSWD" - } - withName: MERGE_CFF { publishDir = [ enabled: false diff --git a/docs/images/nf-core-forte_logo_dark.png b/docs/images/nf-core-forte_logo_dark.png new file mode 100644 index 0000000..7631d71 Binary files /dev/null and b/docs/images/nf-core-forte_logo_dark.png differ diff --git a/docs/images/nf-core-forte_logo_light.png b/docs/images/nf-core-forte_logo_light.png new file mode 100644 index 0000000..c6bdf27 Binary files /dev/null and b/docs/images/nf-core-forte_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 5f801ab..2dfbd58 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,7 +4,7 @@ This document describes the output produced by the FORTE pipeline. -The directories listed below will be created in the results directory (after the pipeline has finished. All paths are relative to the top-level results directory. +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. ## Pipeline overview @@ -229,6 +229,7 @@ FORTE uses a custom script to output a MAF file that combines all original colum - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index df32357..fd58167 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -8,12 +8,18 @@ This document details specific command-line options and how to arrange input dat ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with at least 2 columns. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with at least 2 columns, and a header row as shown in the examples below. -| sample | fastq_1 | -| :------ | :--------------------------- | -| sampleA | /path/to/sampleA_R1.fastq.gz | -| sampleB | /path/to/sampleB_R1.fastq.gz | +```bash +--input '[path to samplesheet file]' +``` + +Minimal example: + +```console +sample,fastq_1 +sampleA,/path/to/sampleA_R1.fastq.gz +``` ### Multiple runs of the same sample @@ -72,7 +78,7 @@ For each sample in this input, FORTE will use the corresponding RNA bam to calcu The typical command for running the pipeline is as follows: ```bash -nextflow run mskcc/forte --input samplesheet.csv --outdir --genome GRCh37 -profile singularity +nextflow run mskcc/forte --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile singularity ``` This will launch the pipeline with the `singularity` configuration profile. See below for more information about profiles. @@ -86,6 +92,31 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/forte -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -120,27 +151,42 @@ Forte performs QC analysis on targeted assays using Picard's `CollectHsMetrics` It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [mskcc/forte releases page](https://github.com/mskcc/forte/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [mskcc/forte releases page](https://github.com/mskcc/forte/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -151,11 +197,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. - `juno` - Configurations for running the pipeline on the juno server. - Includes configurations for using the LSF job scheduler and appropriate resource limits for the system. @@ -176,96 +221,35 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' +### Custom Containers -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -### Updating containers +### Custom Tool Arguments -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. - - For Docker: +### nf-core/configs - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. - - For Singularity: +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). - - For Conda: +## Azure Resource Requests - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). ## Running in the background diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 45992fa..01b8653 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -32,6 +33,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +81,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -109,7 +129,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -146,10 +166,10 @@ class NfcoreTemplate { } // - // Construct and send adaptive card - // https://adaptivecards.io + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + public static void IM_notification(workflow, params, summary_params, projectDir, log) { def hook_url = params.hook_url def summary = [:] @@ -170,7 +190,7 @@ class NfcoreTemplate { misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version + msg_fields['version'] = NfcoreTemplate.version(workflow) msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete @@ -178,13 +198,16 @@ class NfcoreTemplate { msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") msg_fields['projectDir'] = workflow.projectDir msg_fields['summary'] = summary << misc_fields // Render the JSON template def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) def json_message = json_template.toString() @@ -200,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // @@ -209,7 +247,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -297,10 +335,16 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/WorkflowForte.groovy b/lib/WorkflowForte.groovy index 2246d65..b8acd10 100755 --- a/lib/WorkflowForte.groovy +++ b/lib/WorkflowForte.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/forte.nf in the mskcc/forte pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowForte { @@ -11,10 +12,11 @@ class WorkflowForte { // public static void initialise(params, log) { + genomeExistsError(params, log) + if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) + Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." } } @@ -45,19 +47,76 @@ class WorkflowForte { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html - }} + } + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7d40ade..4ba1cee 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the mskcc/forte pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -18,55 +20,24 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta reference.fa -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } - // Print parameter summary log to screen - - log.info paramsSummaryLog(workflow, params, log) - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } @@ -75,11 +46,9 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } } - // // Get attribute from genome config file e.g. fasta // @@ -91,5 +60,4 @@ class WorkflowMain { } return null } - } diff --git a/main.nf b/main.nf index 5dfe5f6..c0a5e68 100644 --- a/main.nf +++ b/main.nf @@ -11,23 +11,46 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') -params.starfusion_url = WorkflowMain.getGenomeAttribute(params, 'starfusion_url') -params.refflat = WorkflowMain.getGenomeAttribute(params, 'refflat') -params.baits = WorkflowMain.getGenomeAttribute(params, 'baits') -params.cdna = WorkflowMain.getGenomeAttribute(params, 'cdna') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.starfusion_url = WorkflowMain.getGenomeAttribute(params, 'starfusion_url') +params.refflat = WorkflowMain.getGenomeAttribute(params, 'refflat') +params.baits = WorkflowMain.getGenomeAttribute(params, 'baits') +params.cdna = WorkflowMain.getGenomeAttribute(params, 'cdna') params.arriba_blacklist = WorkflowMain.getGenomeAttribute(params, 'arriba_blacklist') params.arriba_known_fusions = WorkflowMain.getGenomeAttribute(params, 'arriba_known_fusions') params.arriba_protein_domains = WorkflowMain.getGenomeAttribute(params, 'arriba_protein_domains') -params.metafusion_blocklist = WorkflowMain.getGenomeAttribute(params, 'metafusion_blocklist') -params.metafusion_gene_bed = WorkflowMain.getGenomeAttribute(params, 'metafusion_gene_bed') -params.metafusion_gene_info = WorkflowMain.getGenomeAttribute(params, 'metafusion_gene_info') -params.ensembl_version = WorkflowMain.getGenomeAttribute(params, 'ensembl_version') +params.metafusion_blocklist = WorkflowMain.getGenomeAttribute(params, 'metafusion_blocklist') +params.metafusion_gene_bed = WorkflowMain.getGenomeAttribute(params, 'metafusion_gene_bed') +params.metafusion_gene_info = WorkflowMain.getGenomeAttribute(params, 'metafusion_gene_info') +params.ensembl_version = WorkflowMain.getGenomeAttribute(params, 'ensembl_version') + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} WorkflowMain.initialise(workflow, params, log) diff --git a/modules.json b/modules.json index fa4c97f..c43247b 100644 --- a/modules.json +++ b/modules.json @@ -22,7 +22,7 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastp": { @@ -154,12 +154,12 @@ }, "umitools/dedup": { "branch": "master", - "git_sha": "7297204bf49273300a3dbfa4b7a4027c8683f1bd", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"], "patch": "modules/nf-core/umitools/extract/umitools-extract.diff" } diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf deleted file mode 100644 index 83f7aa5..0000000 --- a/modules/local/fusionreport/download/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process FUSIONREPORT_DOWNLOAD { - tag 'fusionreport' - label 'process_medium' - - // Note: 2.7X indices incompatible with AWS iGenomes. - conda 'bioconda::star=2.7.9a' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/fusion-report:0.0.1' : - 'cmopipeline/fusion-report:0.0.1' }" - //'docker.io/rannickscilifelab/fusion-report:2.1.5updated' : - //'docker.io/rannickscilifelab/fusion-report:2.1.5updated' }" - - output: - path "db" , emit: reference - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - fusion_report download \\ - $args \\ - db - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fusion_report: \$(fusion_report --version | sed 's/fusion-report //') - END_VERSIONS - """ -} diff --git a/modules/local/fusionreport/download/meta.yml b/modules/local/fusionreport/download/meta.yml deleted file mode 100644 index 21a15a8..0000000 --- a/modules/local/fusionreport/download/meta.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: fusionreport_download -description: Build DB for fusionreport -keywords: - - sort -tools: - - fusioncatcher: - description: Build DB for fusionreport - homepage: https://github.com/ndaniel/fusioncatcher/ - documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md - tool_dev_url: https://github.com/ndaniel/fusioncatcher/ - doi: "10.1101/011650" - licence: ["GPL v3"] - -input: - - username: - type: value - description: Organism for which the data is downloaded from Ensembl database and built - pattern: "*" - - passwd: - type: value - description: Organism for which the data is downloaded from Ensembl database and built - pattern: "*" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reference: - type: directory - description: directory containing the genome resource files required for fusioncatcher - pattern: "fusioncatcher-genome" - -authors: - - "@praveenraj2018" diff --git a/modules/local/fusionreport/run/main.nf b/modules/local/fusionreport/run/main.nf deleted file mode 100644 index ce9c10f..0000000 --- a/modules/local/fusionreport/run/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process FUSIONREPORT { - tag "$meta.id" - label 'process_low' - - // Note: 2.7X indices incompatible with AWS iGenomes. - conda 'bioconda::star=2.7.9a' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/fusion-report:0.0.1' : - 'cmopipeline/fusion-report:0.0.1' }" - - input: - tuple val(meta), val(callers), val(weights), path(fusions) - path(fusionreport_ref) - - output: - path "versions.yml" , emit: versions - tuple val(meta), path("*fusionreport.tsv") , emit: fusion_list - tuple val(meta), path("*fusionreport_filtered.tsv"), emit: fusion_list_filtered - tuple val(meta), path("*.html") , emit: report - tuple val(meta), path("*.csv") , optional:true , emit: fusionreport_csv - tuple val(meta), path("*.json"), optional:true , emit: fusionreport_json - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def caller_inputs = (1..callers.size()).collect{"--${callers[it-1]} ${fusions[it-1]} --${callers[it-1]}_weight ${weights[it-1]} "}.join(" ") - def prefix = task.ext.prefix ?: "${meta.id}" - """ - fusion_report run \\ - $prefix \\ - . \\ - $fusionreport_ref \\ - $caller_inputs \\ - $args \\ - - mv fusion_list.tsv ${prefix}.fusionreport.tsv - mv fusion_list_filtered.tsv ${prefix}.fusionreport_filtered.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fusion_report: \$(fusion_report --version | sed 's/fusion-report //') - END_VERSIONS - """ -} diff --git a/modules/local/fusionreport/run/meta.yml b/modules/local/fusionreport/run/meta.yml deleted file mode 100644 index f3d5cd8..0000000 --- a/modules/local/fusionreport/run/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: fusionreport -description: fusionreport -keywords: - - sort -tools: - - fusionreport: - description: fusionreport - homepage: https://github.com/matq007/fusion-report - documentation: https://matq007.github.io/fusion-report/#/ - doi: "10.1101/011650" - licence: ["GPL v3"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reference: - type: path - description: Path to fusionreport references - pattern: "*" - - arriba_fusions: - type: path - description: File - pattern: "*.fusions.tsv" - - pizzly_fusions: - type: path - description: File containing fusions from pizzly - pattern: "*.pizzly.txt" - - squid_fusions: - type: path - description: File containing fusions from squid - pattern: "*.annotated.txt" - - starfusion_fusions: - type: path - description: File containing fusions from STARfusion - pattern: "*.starfusion.fusion_predictions.tsv" - - fusioncatcher_fusions: - type: path - description: File containing fusions from fusioncatcher - pattern: "*.fusions.tsv" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusion_list: - type: file - description: File containing the summary of all fusions fed-in - pattern: "*.tsv" - - report: - type: file - description: HTML files - pattern: "*.html" - -authors: - - "@praveenraj2018, @rannick" diff --git a/modules/local/getbasecountsmultisample/main.nf b/modules/local/getbasecountsmultisample/main.nf index 355af64..64bdd36 100644 --- a/modules/local/getbasecountsmultisample/main.nf +++ b/modules/local/getbasecountsmultisample/main.nf @@ -4,8 +4,8 @@ process GETBASECOUNTSMULTISAMPLE { conda "bioconda::bamtools=2.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/getbasecountsmultisample:1.2.4' : - 'cmopipeline/getbasecountsmultisample:1.2.4' }" + 'docker://cmopipeline/getbasecountsmultisample:1.2.4' : + 'docker://cmopipeline/getbasecountsmultisample:1.2.4' }" input: tuple val(meta), path(bam), path(bai), path(variants) diff --git a/modules/local/maf_samplesheet_check/resources/usr/bin/check_maf_samplesheet.py b/modules/local/maf_samplesheet_check/resources/usr/bin/check_maf_samplesheet.py index 31c1fff..3a9bd43 100755 --- a/modules/local/maf_samplesheet_check/resources/usr/bin/check_maf_samplesheet.py +++ b/modules/local/maf_samplesheet_check/resources/usr/bin/check_maf_samplesheet.py @@ -24,9 +24,7 @@ class RowChecker: """ - VALID_FORMATS = ( - ".maf", - ) + VALID_FORMATS = (".maf",) def __init__( self, @@ -134,7 +132,7 @@ def sniff_format(handle): if not sniffer.has_header(peek): logger.critical("The given sample sheet does not appear to contain a header.") ## Commenting out this line because it sometimes fails when it shouldn't, and there is a downstream check for header names that is essentially redundant. - #sys.exit(1) + # sys.exit(1) dialect = sniffer.sniff(peek) return dialect diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index beaac8d..9ff7e27 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,10 +1,11 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet @@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in mskcc/forte/bin/ """ check_samplesheet.py \\ diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..f0c63f6 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..7685b33 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a..5f15a5f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: @@ -14,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -28,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..eec1db1 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..4274ed5 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/umitools/dedup/environment.yml b/modules/nf-core/umitools/dedup/environment.yml new file mode 100644 index 0000000..f443735 --- /dev/null +++ b/modules/nf-core/umitools/dedup/environment.yml @@ -0,0 +1,7 @@ +name: umitools_dedup +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf index 56ea046..c338535 100644 --- a/modules/nf-core/umitools/dedup/main.nf +++ b/modules/nf-core/umitools/dedup/main.nf @@ -2,7 +2,7 @@ process UMITOOLS_DEDUP { tag "$meta.id" label "process_medium" - conda "bioconda::umi_tools=1.1.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" @@ -42,7 +42,7 @@ process UMITOOLS_DEDUP { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$(umi_tools --version | sed 's/^.*UMI-tools version://; s/ *\$//') END_VERSIONS """ @@ -56,7 +56,7 @@ process UMITOOLS_DEDUP { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$(umi_tools --version | sed 's/^.*UMI-tools version://; s/ *\$//') END_VERSIONS """ } diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml index 534d4c6..38d3fd4 100644 --- a/modules/nf-core/umitools/dedup/meta.yml +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -7,8 +7,8 @@ keywords: tools: - umi_tools: description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ license: ["MIT"] input: @@ -61,8 +61,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" - "@klkeys" +maintainers: + - "@drpatelh" + - "@grst" + - "@klkeys" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 0000000..7d08ac0 --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 77d7435..0fd1f23 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -3,7 +3,7 @@ process UMITOOLS_EXTRACT { label "process_single" label "process_long" - conda "bioconda::umi_tools=1.1.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" @@ -33,7 +33,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$(umi_tools --version | sed 's/^.*UMI-tools version://; s/ *\$//') END_VERSIONS """ } else { @@ -49,7 +49,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$(umi_tools --version | sed 's/^.*UMI-tools version://; s/ *\$//') END_VERSIONS """ } diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index db64a0f..7695b27 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -1,15 +1,16 @@ name: umitools_extract description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place keywords: - - umitools + - UMI + - barcode - extract + - umitools tools: - umi_tools: description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + license: "MIT" input: - meta: type: map @@ -29,9 +30,7 @@ output: - reads: type: file description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. pattern: "*.{fastq.gz}" - log: type: file @@ -41,7 +40,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 0000000..22242d1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 0000000..54e77fb --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,866a2da05ce1af35cc07261ffe6bc31a" + ] + ], + "timestamp": "2023-10-17T08:25:55.427194" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 0000000..c866f5a --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 0000000..c3fb23d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/nextflow.config b/nextflow.config index 6fdfa8f..15448bf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { // References genome = 'GRCh37' igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false targets_base = null // Trimming @@ -33,7 +34,6 @@ params { run_oncokb_fusionannotator = false cosmic_usr = null fusion_report_cutoff = 1 - skip_fusion_report = false fusion_tool_cutoff = 1 // QC @@ -63,9 +63,16 @@ params { monochrome_logs = false hook_url = null help = false - validate_params = true - show_hidden_params = false schema_ignore_params = 'genomes' + version = false + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten @@ -73,13 +80,38 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +// Load nf-core/forte custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/forte.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/forte profiles: ${params.custom_config_base}/pipeline/forte.config") +// } profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { conda.enabled = true docker.enabled = false @@ -87,6 +119,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } mamba { conda.enabled = true @@ -96,58 +129,97 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false - singularity.runOptions = "-B $TMPDIR" + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } juno { includeConfig 'conf/juno.config' } } -includeConfig 'conf/igenomes.config' +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} +// Load igenomes.config if required +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] +} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -183,12 +255,12 @@ dag { manifest { name = 'mskcc/forte' - author = 'Anne Marie Noronha' + author = """Anne Marie Noronha""" homePage = 'https://github.com/mskcc/forte' - description = 'RNAseq pipeline' + description = """RNAseq pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '0.0.1' + nextflowVersion = '!>=23.04.0' + version = '1.0.1' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8a69d78..a3845e8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,11 +15,11 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/forte/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -42,6 +42,85 @@ } } }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, "max_job_request_options": { "title": "Max job request options", "type": "object", @@ -71,7 +150,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -90,6 +169,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -131,11 +216,12 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -151,13 +237,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -165,12 +244,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -179,6 +272,12 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, { "$ref": "#/definitions/max_job_request_options" }, @@ -187,14 +286,14 @@ } ], "properties": { + "maf_input": { + "type": "string", + "default": null + }, "extract_fq_read_group": { "type": "boolean", "default": true }, - "genome": { - "type": "string", - "default": "GRCh37" - }, "igenomes_base": { "type": "string", "default": "s3://ngi-igenomes/igenomes" @@ -219,10 +318,6 @@ "type": "boolean", "default": false }, - "dedup_umi_for_fusions": { - "type": "boolean", - "default": false - }, "run_oncokb_fusionannotator": { "type": "boolean", "default": false @@ -239,10 +334,6 @@ "type": "string", "default": "null/forte_references" }, - "fasta": { - "type": "string", - "default": "None" - }, "gtf": { "type": "string", "default": "None" @@ -258,6 +349,78 @@ "skip_fusion_report": { "type": "boolean", "default": false + }, + "fusion_tool_cutoff": { + "type": "integer", + "default": 1 + }, + "rseqc_modules": { + "type": "array", + "default": [ + "bam_stat", + "inner_distance", + "infer_experiment", + "junction_annotation", + "junction_saturation", + "read_distribution", + "read_duplication" + ] + }, + "dedup_umi_for_kallisto": { + "type": "boolean", + "default": true + }, + "kallisto_fragment_len": { + "type": "integer", + "default": 500 + }, + "kallisto_fragment_sd": { + "type": "integer", + "default": 150 + }, + "expression_quantifier": { + "type": "string", + "default": "featurecounts" + }, + "schema_ignore_params": { + "type": "string", + "default": "genomes" + }, + "baits": { + "type": "string", + "default": null + }, + "cdna": { + "type": "string", + "default": null + }, + "arriba_blacklist": { + "type": "string", + "default": null + }, + "arriba_known_fusions": { + "type": "string", + "default": null + }, + "arriba_protein_domains": { + "type": "string", + "default": null + }, + "metafusion_blocklist": { + "type": "string", + "default": null + }, + "metafusion_gene_bed": { + "type": "string", + "default": null + }, + "metafusion_gene_info": { + "type": "string", + "default": null + }, + "ensembl_version": { + "type": "integer", + "default": 75 } } } diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 4865128..b9e3bc5 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -1,4 +1,4 @@ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_config = Channel.fromPath("$projectDir/assets/analysis_multiqc_config.yml", checkIfExists: true) include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/nf-core/picard/collectrnaseqmetrics/main' include { PICARD_COLLECTHSMETRICS } from '../../modules/nf-core/picard/collecthsmetrics/main' diff --git a/tests/nextflow.config b/tests/nextflow.config index 02fad23..48bcb76 100755 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,7 +4,6 @@ params { tracedir = "${params.outdir}/pipeline_info/${params.workflow_timestamp}" reference_base = "output/references" publish_dir_mode = "copy" - singularity_pull_docker_container = false } process { diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..787aedf --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/forte.nf b/workflows/forte.nf index 44339eb..432a86e 100644 --- a/workflows/forte.nf +++ b/workflows/forte.nf @@ -1,20 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowForte.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, params.refflat, params.starfusion_url, params.metafusion_gene_bed, params.metafusion_blocklist, params.metafusion_gene_info ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowForte.initialise(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,7 +21,7 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/pipeline_multiqc_config.yml", checkIfExists: true) +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) @@ -85,7 +84,7 @@ workflow FORTE { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) @@ -204,7 +203,7 @@ workflow FORTE { workflow_summary = WorkflowForte.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowForte.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowForte.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() @@ -234,9 +233,10 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } }