diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 2db9edfa..8601db99 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -13,11 +13,8 @@ jobs: if: github.repository == 'nf-core/rnafusion' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch build arriba workflow via tower uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -27,13 +24,252 @@ jobs: parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "arriba": true, + "build_references": true } - profiles: test_full + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch arriba workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "arriba": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build squid workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "squid": true, + "build_references": true + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch squid workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "squid": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build starfusion workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "starfusion": true, + "build_references": true + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch starfusion workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "starfusion": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build fusioncatcher workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "fusioncatcher": true, + "build_references": true + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch fusioncatcher workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "fusioncatcher": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build pizzly workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "pizzly": true, + "build_references": true + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch pizzly workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "pizzly": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + - name: Launch stringtie workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "stringtie": true, + } + profiles: test_full,aws_tower - uses: actions/upload-artifact@v3 with: name: Tower debug log file path: | tower_action_*.log tower_action_*.json + diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 5ef7e96c..003f6786 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + - name: Launch build arriba workflow via tower uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -21,10 +21,259 @@ jobs: workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-test-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "arriba": true, + "stub": true, + "build_references": true } - profiles: test + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch arriba workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "arriba": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build squid workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "squid": true, + "stub": true, + "build_references": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch squid workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "squid": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build starfusion workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "starfusion": true, + "build_references": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + - name: Launch starfusion workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "starfusion": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build fusioncatcher workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "fusioncatcher": true, + "build_references": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch fusioncatcher workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "fusioncatcher": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch build pizzly workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "pizzly": true, + "build_references": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch pizzly workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "pizzly": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json + + - name: Launch stringtie workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "stringtie": true, + "stub": true + } + profiles: test,aws_tower - uses: actions/upload-artifact@v3 with: name: Tower debug log file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6726dd9..e62dcf41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,72 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Dry test arriba build run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ + --outdir /home/runner/work/rnafusion/rnafusion/results --arriba \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test arriba + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ + --outdir /home/runner/work/rnafusion/rnafusion/results --arriba \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test squid build + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ + --outdir /home/runner/work/rnafusion/rnafusion/results --squid \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test squid + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ + --outdir /home/runner/work/rnafusion/rnafusion/results --squid \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test pizzly build + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ + --outdir /home/runner/work/rnafusion/rnafusion/results --pizzly \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test pizzly + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ + --outdir /home/runner/work/rnafusion/rnafusion/results --pizzly \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test fusioncatcher build + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub --build_references \ + --outdir /home/runner/work/rnafusion/rnafusion/results --fusioncatcher \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test fusioncatcher + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ + --outdir /home/runner/work/rnafusion/rnafusion/results --fusioncatcher \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test starfusion build + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub build_references \ + --outdir /home/runner/work/rnafusion/rnafusion/results --starfusion \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} + + - name: Dry test starfusion + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub \ + --outdir /home/runner/work/rnafusion/rnafusion/results --starfusion \ + --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ + --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5470be26..e3e00523 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,368 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.3.4 - [date] +## v2.3.0 - [2022/04/24] + +### Added + +- Shell specification to bash +- COSMIC password put into quotes +- Trimmed reads QC in MultiQC +- Add `ARRIBA_VISUALISATION` to processed affected by `--skip_vis` +- Option `fusionreport_filter` to in/activate fusionreport displaying of fusions detected by 2 or more tools + +### Changed + +- `Arriba` visualisation now runs for FusionInspector (combined tools) results, not only `Arriba` results +- Updated metro map with trimming options and placed `Arriba` visualisation after `FusionInspector` +- Exit with error when using squid in combination with any ensembl version different from 102 + +### Fixed + +- Channel issue with indexing of files with using `--cram squid` +- `Arriba` references published in the correct folder + +### Removed + +## v2.2.0 - [2022/03/13] + +### Added + +- exitStatus 140 now part of the retry strategy +- stubs to all local modules +- `--stringtie` option added with StringTie v2.2.1 to detect splicing events. Not included in `fusion-report` or `fusionInspector` summaries. Included in the `--all` workflow +- Generation of ribosomal RNA interval list with build_references and use it in picard CollectRnaMetrics +- Add csv output to fusionreport +- Trimming workflow using `fastp`: use trimmed reads for all tools +- `whitelist` parameter to add custom fusions to the detected ones and consider the whole for the `fusionInspector` analysis +- Compression to CRAM files for arriba, squid and starfusion workflows (fusioncatcher and pizzly do not produce SAM/BAM files, fusioninspector BAM files are too small to benefit from compression) +- `--qiagen` option to download from QIAGEN instead of COSMIC (use QIAGEN user and password for `cosmic_username` and `cosmic_passwd`) +- Bumped `STAR genomegenerate` time request for building as it was always crashing for most users +- Fixed issue with arriba visualisation parameters [#326](https://github.com/nf-core/rnafusion/issues/326) + +### Changed + +- Test profiles unified under 'test' but if the references do not all need to be downloaded, run with `-stub` +- Update CUSTOM_DUMPSOFTWAREVERSIONS to use multiqc version 1.13 +- Updated to nf-core template 2.7.2, with all module updates +- `MultiQC` updated to 1.13a in process dumpsoftwareversion +- Patch fusion-report version with fixed mittelman DB and DB extraction date written into software_version.yaml +- `Arriba` references back to downloading with `build_references` instead of taking from container +- `Arriba` visualisation now running with `Arriba` v2.3.0 +- Updated `STAR-Fusion` to 1.12.0 + +### Fixed + +- AWS megatest to display on nf-core website +- `arriba` visualisation references updated to 2.3.0 +- Removed issue with multiple outputs in samtools view for squid + +### Removed + +- FUSIONINSPECTOR_DEV process as the option fusioninspector_limitSjdbInsertNsj is part of the main starfusion release + +## [2.1.0] nfcore/rnafusion - 2022/07/12 + +### Added + +- `FusionCatcher` single_end support for single reads ABOVE 130 bp +- `--fusioninspector_only` parameter to run FusionInspector standalone feeding gene list manually with parameter `--fusioninspector_fusions PATH` +- `--fusioncatcher_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionCatcher +- `--fusioninspector_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionInspector !!Any other value than default will use the dev version of FusionInspector!! +- OPTIONAL trimming option `--trim` for hard trimming to 75 bp in case of high read-through. Only fusioncatcher uses trimmed reads as STAR-based fusion detection tools are less sensitive to read-through +- `picard` metrics, STAR final log, and QualiMap output included in `MultiQC` report + +### Changed + +- `seq_platform` and `seq_center` changed from boolean to string +- `seq_platform` set to an empty string and `seq_center` set to an empty string if not existing +- Arriba use ensembl references-built starindex independently of `starfusion_build` parameter +- ftp to http protocol for STARFUSION_BUILD process `Pfam-A.hmm.gz` download as ftp causes issues on some servers +- Updated README and usage documentation with more detailed information and metro map +- Arriba use ensembl references-built starindex independently of starfusion_build parameter +- Update of the single-end reads support table in README, added recommendation to use single-end reads only in last resort +- STAR updated to 2.7.10a +- Arriba updated to 2.3.0, references for blacklist and protein domains changed to 2.3.0 from singularity/docker container -> arriba download of references not necessary any more +- multiQC updated to 1.13a +- picard updated to 2.27.4 +- dumpsoftwareversions module updated to use multiqc=1.12 containers + +### Fixed + +- FusionInspector does not mix sample reads with fusion lists and meta information from other samples anymore +- Arriba visualisation does not mix sample reads with fusion lists and meta information from other samples anymore +- logging of STAR-fusion and fusionreport version + +### Removed + +## [2.0.0] nfcore/rnafusion - 2022/05/19 + +Update to DSL2 and newer software/reference versions + +### Added + +- Added `qualimap/rnaseq v2.2.2d` from nf-core modules +- Added UCSC `gtfToGenePred v377` +- Added `picard CollectRnaSeqMetrics v2.26.10` +- Added `picard MarkDuplicates v2.26.10` from nf-core modules +- Added `cat/fastqc` from nf-core modules +- Added possibility for manually feeding the results of fusions from different tools to speed-up reruns +- STAR-Fusion references can be downloaded or built but downloaded references are NOT RECOMMENDED as not thoroughly tested (--starfusion_build parameter is true by default, use --starfusion_build false to use downloaded STAR-Fusion references). + +### Changed + +- Upgrade default ensembl version to `102` +- Upgrade to `nf-core/tools v2.3.2` +- Upgrade `Arriba v1.2.0` to `Arriba v2.2.1` +- Upgrade `FusionCatcher v1.20` to `FusionCatcher v1.33` +- Upgrade `STAR-fusion v1.8.1` to `STAR-fusion v1.10.1` +- Upgrade `STAR v2.7.1` to `STAR v2.7.9` +- Upgrade `fusion-report v2.1.3` to `fusion-report v2.1.5` +- Upgrade `kallisto v0.44.0` to `kallisto v0.46.2` +- Upgrade `fastqc v0.11.8` to `fastqc v0.11.9` +- Upgrade `samtools v1.9` to `samtools v1.15.1` +- Upgrade `arriba` references from `v1.2.0` to `v2.1.0` +- Upgrade `fusioncatcher` references from `v98` to `v102` +- Use `arriba` (detect only), `kallisto` and `STAR` from nf-core modules +- Instead of separate script to build the references, added `--build_references` argument in the main +- `--fasta` argument is not required with `--build_references` and set by default to the ensembl references built in the detection workflow +- CI test done on stubs of reference building for subprocesses ensembl and arriba + +Parameters for `STAR` for `arriba` changed from: + +```bash +--readFilesCommand zcat \\ + --outSAMtype BAM Unsorted \\ +--outStd BAM_Unsorted \\ +--outSAMunmapped Within \\ +--outBAMcompression 0 \\ +--outFilterMultimapNmax 1 \\ +--outFilterMismatchNmax 3 \\ +--chimSegmentMin 10 \\ +--chimOutType WithinBAM SoftClip \\ +--chimJunctionOverhangMin 10 \\ +--chimScoreMin 1 \\ +--chimScoreDropMax 30 \\ +--chimScoreJunctionNonGTAG 0 \\ +--chimScoreSeparation 1 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--chimSegmentReadGapMax 3 \\ +--sjdbOverhang ${params.read_length - 1} +``` + +to + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +As recommended [here](https://arriba.readthedocs.io/en/latest/workflow/). + +Parameters for `STAR` for `STAR-fusion` changed from: + +```bash +--twopassMode Basic \\ +--outReadsUnmapped None \\ +--chimSegmentMin 12 \\ +--chimJunctionOverhangMin 12 \\ +--alignSJDBoverhangMin 10 \\ +--alignMatesGapMax 100000 \\ +--alignIntronMax 100000 \\ +--chimSegmentReadGapMax 3 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--runThreadN ${task.cpus} \\ +--outSAMstrandField intronMotif ${avail_mem} \\ +--outSAMunmapped Within \\ +--outSAMtype BAM Unsorted \\ +--outSAMattrRGline ID:GRPundef \\ +--chimMultimapScoreRange 10 \\ +--chimMultimapNmax 10 \\ +--chimNonchimScoreDropMin 10 \\ +--peOverlapNbasesMin 12 \\ +--peOverlapMMp 0.1 \\ +--readFilesCommand zcat \\ +--sjdbOverhang ${params.read_length - 1} \\ +--chimOutJunctionFormat 1 +``` + +to + +```bash +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions +``` + +`Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz` used for squid and arriba, `Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz` used for STAR-fusion and the quality control as the quality control is based on the STAR-fusion alignment. + +### Fixed + +### Removed + +- Ericscript tool +- GRCh37 support. Subdirectory with params.genome are removed +- Running with conda + +## v1.3.0dev nfcore/rnafusion - 2020/07/15 + +- Using official STAR-Fusion container [#160](https://github.com/nf-core/rnafusion/issues/160) + +### Added + +- Added social preview image [#107](https://github.com/nf-core/rnafusion/issues/107) +- Added support for GRCh37 genome assembly [#77](https://github.com/nf-core/rnafusion/issues/77) + +### Changed + +- Upgrade `fusion-report v2.1.2` to `fusion-report v2.1.3` +- Upgrade `fusion-report v2.1.1` to `fusion-report v2.1.2` +- Upgrade `fusion-report v2.1.0` to `fusion-report v2.1.1` +- Upgrade `Arriba v1.1.0` to `Arriba v1.2.0` +- Upgrade `fusion-report v2.0.2` to `fusion-report v2.1.0` + +### Fixed + +- Missing `strip-components` in `download-references.nf/star-fusion` [#148](https://github.com/nf-core/rnafusion/issues/148) +- Missing version prefix for cdna [#143](https://github.com/nf-core/rnafusion/issues/143) +- `samtools` missing header in empty file for FusionInspector [ref](https://github.com/STAR-Fusion/STAR-Fusion/issues/191) +- Removed `profile` from helper scripts [#139](https://github.com/nf-core/rnafusion/issues/139) +- Wrong url path for `Pfam-A.hmm.gz` [#140](https://github.com/nf-core/rnafusion/issues/140) + +### Removed + +- Removed `scripts/download-singularity-img.sh` and `download-singularity-img.nf` as they are not necessary any more + +--- + +## [1.1.0] nfcore/rnafusion - 2020/02/10 + +- Fusion gene detection tools: + - `Arriba v1.1.0` + - `Ericscript v0.5.5` + - `Fusioncatcher v1.20` + - `Pizzly v0.37.3` + - `Squid v1.5` + - `STAR-Fusion v1.6.0` +- Visualization tools: + - `Arriba v1.1.0` + - `FusionInspector v1.3.1` +- Other tools: + - `fusion-report v2.0.1` + - `FastQ v0.11.8` + - `MultiQC v1.7` + - `STAR aligner v2.7.0f` + +### Added + +- Added `Arriba 1.1.0` [#63](https://github.com/nf-core/rnafusion/issues/63) +- Added Batch mode [#54](https://github.com/nf-core/rnafusion/issues/54) + +### Changed + +- Updated examples and configurations +- Upgraded `fusion-report v1.0.0` to `fusion-report v2.0.1` +- Divided `running_tools` into fusion and visualization tools +- Updated `STAR` in `Squid`, `Fusion-Inspector` version to `2.7.0f` +- Upgraded `STAR-Fusion v1.5.0` to `STAR-Fusion v1.6.0` [#83](https://github.com/nf-core/rnafusion/issues/83) +- Parameter `igenomesIgnore` renamed to `igenome` [#81](https://github.com/nf-core/rnafusion/issues/81) +- Finished STAR-Fusion file renaming [#18](https://github.com/nf-core/rnafusion/issues/18) +- Updated logos +- Updated to nf-core `1.8` TEMPLATE + +### Fixed + +- iGenomes optional, but not really [#91](https://github.com/nf-core/rnafusion/issues/91) +- Updated `fusioncatcher` to latest `1.20` version also solving [#95](https://github.com/nf-core/rnafusion/issues/95) + +### Removed + +- Variables `pizzly_fasta` and `pizzly_gtf` have been removed and replaced with `transcript` and `gtf` +- `Jenkisfile`, test configuration, pylintrc configuration +- Removed `igenomes.config` because the pipeline only supports `Ensembl` version + +--- + +## [1.0.2] nfcore/rnafusion - 2019/05/13 + +### Changed + +- Bumped nf-core template to 1.6 [#69](https://github.com/nf-core/rnafusion/pull/69) + +### Fixed + +- Fixed COSMIC parameters not wrapped in quotes [#75](https://github.com/nf-core/rnafusion/issues/75) +- Implemented output output for fusion tools [#72](https://github.com/nf-core/rnafusion/issues/72) +- Fixed reference download link for STAR-Fusion [#71](https://github.com/nf-core/rnafusion/issues/71) + +--- + +## [1.0.1] nfcore/rnafusion - 2019/04/06 + +### Added + +- Added support for extra parameters for tools STAR-Fusion, FusionCatcher and fusion-report +- Added example configuration for `singularity` and `docker` +- Added [fusion-report](https://github.com/matq007/fusion-report) into the stack [#62](https://github.com/nf-core/rnafusion/issues/62), [#55](https://github.com/nf-core/rnafusion/issues/55), [#53](https://github.com/nf-core/rnafusion/issues/53), [#51](https://github.com/nf-core/rnafusion/issues/51) +- Added nextflow helper script `download-singularity-img.nf` +- Added nextflow helper script `download-references.nf` +- Added `Jenkinsfile` for in-house testing + +### Changed + +- Updated installation of `FusionCatcher` (available now on bioconda) + +### Fixed + +- Fixed empty symlinks (`input.X`) in fusion-report [#68](https://github.com/nf-core/rnafusion/issues/68) +- Fixed FASTA issues [#60](https://github.com/nf-core/rnafusion/issues/60) +- Fixed centralized nf-core/config [#64](https://github.com/nf-core/rnafusion/issues/64) +- Fixed `scrape_software_versions.py` to parse tools versions correctly [#65](https://github.com/nf-core/rnafusion/issues/65) + +### Removed + +- Removed `Singularity` + +--- + +## [1.0] nfcore/rnafusion - 2018/02/14 + +Version 1.0 marks the first production release of this pipeline under the nf-core flag. +The pipeline includes additional help scripts to download references for fusion tools and Singularity images. + Initial release of nf-core/rnafusion, created with the [nf-core](https://nf-co.re/) template. ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 1d65b743..dca1640e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -18,6 +18,45 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Arriba](https://github.com/suhrig/arriba) + + > Uhrig S, Ellermann J, Walther T, Burkhardt P, Fröhlich M, Hutter B, Toprak UH, Neumann O, Stenzinger A, Scholl C, Fröhling S, Brors B. Accurate and efficient detection of gene fusions from RNA sequencing data. + > Genome Research. 2021 Mar 31;448-460. doi: 10.1101/gr.257246.119. Epub 2021 Jan 13. PubMed PMID: 33441414; PubMed Central PMCID: PMC7919457. + +- [FusionCatcher](https://github.com/ndaniel/fusioncatcher) + + > Nicorici D, Satalan M, Edgren H, Kangaspeska S, Murumagi A, Kallioniemi O, Virtanen S, Kilkku O. FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. BioRxiv, 2014 Nov. doi: 10.1101/011650. + +- [Fusion-report](https://github.com/matq007/fusion-report) + + > Proks M, Genomic Profiling of a Comprehensive Nation-wide Collection of Childhood Solid Tumors, Master Thesis, Supervisors: Grøntved L, Díaz de Ståhl T, Nistér M, Ewels P, Garcia MU, Juhos S, University of Southern Denmark, 2019, unpublished. + +- [Kallisto](https://pachterlab.github.io/kallisto/) + + > Bray NL, Pimentel H, Melsted P, Pachter L. Near-optimal probabilistic RNA-seq quantification. Nature Biotechnology 2016 Apr. 34, 525–527. doi:10.1038/nbt.3519. PMID: 27043002. + +- [Pizzly](https://github.com/pmelsted/pizzly) + Melsted P, Hateley S, Joseph IC, Pimentel H, Bray N, Pachter L. Fusion detection and quantification by pseudoalignment. BioRxiv, 2017 Jul. doi: 10.1101/166322. + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [Squid](https://github.com/Kingsford-Group/squid) + + > Ma C, Shao M, Kingsford C. SQUID: transcriptomic structural variation detection from RNA-seq. Genome Biol 2028 Apr. 19, 52. doi: 10.1186/s13059-018-1421-5. PubMed PMID: 29650026. PubMed Central PMCID: PMC5896115. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) + + > Haas BJ, Dobin A, Li B, Stransky N, Pochet N, Regev A. Accuracy assessment of fusion transcript detection via read-mapping and de novo fusion transcript assembly-based methods. Genome Biology 2019 Oct;20,213. doi: 10.1186/s13059-019-1842-9 + +- [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml) + > Shumate A, Wong B, Pertea G, Pertea M. Improved transcriptome assembly using a hybrid of long and short reads with StringTie. PLOS Computational Biology 18, 6 (2022), doi.org/10.1371/journal.pcbi.1009730 + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 504929dd..9d64ff1e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ![nf-core/rnafusion](docs/images/nf-core-rnafusion_logo_light.png#gh-light-mode-only) ![nf-core/rnafusion](docs/images/nf-core-rnafusion_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2565517-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2565517) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -12,20 +12,81 @@ ## Introduction -**nf-core/rnafusion** is a bioinformatics pipeline that ... - - - - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +**nf-core/rnafusion** is a bioinformatics best-practice analysis pipeline for RNA sequencing analysis pipeline with curated list of tools for detecting and visualizing fusion genes. + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +> **IMPORTANT: conda is not supported currently.** Run with singularity or docker. + +> GRCh38 is the only supported reference + +| Tool | Version | +| --------------------------------------------------------- | :------: | +| [Arriba](https://github.com/suhrig/arriba) | `2.3.0` | +| [FusionCatcher](https://github.com/ndaniel/fusioncatcher) | `1.33` | +| [Pizzly](https://github.com/pmelsted/pizzly) | `0.37.3` | +| [Squid](https://github.com/Kingsford-Group/squid) | `1.5` | +| [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) | `1.10.1` | +| [StringTie](https://github.com/gpertea/stringtie) | `2.2.1` | + +> Single-end reads are to be use as last-resort. Paired-end reads are recommended. FusionCatcher cannot be used with single-end reads shorter than 130 bp. + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnafusion/results). + +In rnafusion the full-sized test includes reference building and fusion detection. The test dataset is taken from [here](https://github.com/nf-core/test-datasets/tree/rnafusion/testdata/human). + +## Pipeline summary + +![nf-core/rnafusion metro map](docs/images/nf-core-rnafusion_metro_map.png) + +#### Build references + +`--build_references` triggers a parallel workflow to build all references + +1. Download ensembl fasta and gtf files +2. Create STAR index +3. Download arriba references +4. Download fusioncatcher references +5. Download pizzly references (kallisto index) +6. Download and build STAR-fusion references +7. Download fusion-report DBs + +#### Main workflow + +1. Input samplesheet check +2. Concatenate fastq files per sample +3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. Arriba subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [Samtool](https://github.com/samtools/samtools) sort + - [Samtool](https://github.com/samtools/samtools) index + - [Arriba](https://github.com/suhrig/arriba) fusion detection +5. Pizzly subworkflow + - [Kallisto](https://pachterlab.github.io/kallisto/) quantification + - [Pizzly](https://github.com/pmelsted/pizzly) fusion detection +6. Squid subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [Samtools view](http://www.htslib.org/): convert sam output from STAR to bam + - [Samtools sort](http://www.htslib.org/): bam output from STAR + - [SQUID](https://github.com/Kingsford-Group/squid) fusion detection + - [SQUID](https://github.com/Kingsford-Group/squid) annotate +7. STAR-fusion subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) fusion detection +8. Fusioncatcher subworkflow + - [FusionCatcher](https://github.com/ndaniel/fusioncatcher) fusion detection +9. Fusion-report subworkflow + - Merge all fusions detected by the different tools + - [Fusion-report](https://github.com/matq007/fusion-report) +10. FusionInspector subworkflow + - [FusionInspector](https://github.com/FusionInspector/FusionInspector) + - [Arriba](https://github.com/suhrig/arriba) visualisation +11. Stringtie subworkflow + - [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml) +12. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +13. QC for mapped reads ([`QualiMap: BAM QC`](https://kokonech.github.io/qualimap/HG00096.chr20_bamqc/qualimapReport.html)) +14. Index mapped reads ([samtools index](http://www.htslib.org/)) +15. Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-) and ([`picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-)) ## Usage @@ -39,7 +100,12 @@ First, prepare a samplesheet with your input data that looks as follows: -`samplesheet.csv`: + ```bash + nextflow run nf-core/rnafusion -profile test,YOURPROFILE --outdir -stub --all --build_references + + nextflow run nf-core/rnafusion -profile test,YOURPROFILE --outdir -stub --all + + ``` ```csv sample,fastq_1,fastq_2 @@ -50,9 +116,15 @@ Each row represents a fastq file (single-end) or a pair of fastq files (paired e --> -Now, you can run the pipeline using: +```console +nextflow run nf-core/rnafusion --input samplesheet.csv --outdir --genome GRCh38 --all -profile +``` - +```bash +nextflow run nf-core/rnafusion --input samplesheet.csv --outdir --genome GRCh37 -profile +``` + +> Note that paths need to be absolute and that runs with conda are not supported. ```bash nextflow run nf-core/rnafusion \ @@ -76,11 +148,14 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/rnafusion was originally written by Martin Proks, Annick Renevey. +nf-core/rnafusion was written by Martin Proks ([@matq007](https://github.com/matq007)), Maxime Garcia ([@maxulysse](https://github.com/maxulysse)) and Annick Renevey ([@rannick](https://github.com/rannick)) -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for their help in the development of this pipeline: - +- [Phil Ewels](https://github.com/ewels) +- [Rickard Hammarén](https://github.com/Hammarn) +- [Alexander Peltzer](https://github.com/apeltzer) +- [Praveen Raj](https://github.com/praveenraj2018) ## Contributions and Support @@ -90,10 +165,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/rnafusion for your analysis, please cite it using the following doi: [10.5281/zenodo.3946477](https://doi.org/10.5281/zenodo.3946477) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/adapter_fasta_test b/adapter_fasta_test new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_arriba.txt b/assets/dummy_file_arriba.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_fusioncatcher.txt b/assets/dummy_file_fusioncatcher.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_pizzly.txt b/assets/dummy_file_pizzly.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_squid.txt b/assets/dummy_file_squid.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_starfusion.txt b/assets/dummy_file_starfusion.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 62a8ff4e..0789eeb1 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,11 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/rnafusion Methods Description" section_href: "https://github.com/nf-core/rnafusion" plot_type: "html" +<<<<<<< HEAD ## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object +======= +>>>>>>> dev data: |

Methods

Data was processed using nf-core/rnafusion v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml new file mode 100644 index 00000000..ce53881a --- /dev/null +++ b/assets/multiqc_config.yaml @@ -0,0 +1,11 @@ +report_comment: > + This report has been generated by the nf-core/rnafusion + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + software_versions: + order: -1000 + nf-core-rnafusion-summary: + order: -1001 + +export_plots: true diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..4f9acc40 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fastq_1,fastq_2,strandedness +test_rnafusion,https://github.com/nf-core/test-datasets/raw/d6cd12c9a69c148ef986d156d110f741df482b04/testdata/human/reads_1.fq.gz,https://github.com/nf-core/test-datasets/raw/d6cd12c9a69c148ef986d156d110f741df482b04/testdata/human/reads_2.fq.gz,forward diff --git a/assets/samplesheet_valid.csv b/assets/samplesheet_valid.csv new file mode 100644 index 00000000..4ac8f7b1 --- /dev/null +++ b/assets/samplesheet_valid.csv @@ -0,0 +1,2 @@ +sample,fastq_1,fastq_2,strandedness +test,https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz,https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_2.fq.gz,forward diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 4a758fe0..57cf8e6b 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,12 +1,10 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" +#!/usr/bin/env python3 import argparse import csv import logging +import os import sys from collections import Counter from pathlib import Path @@ -29,12 +27,19 @@ class RowChecker: ".fastq.gz", ) + VALID_STRANDEDNESSES = ( + "reverse", + "forward", + "unstranded", + ) + def __init__( self, sample_col="sample", first_col="fastq_1", second_col="fastq_2", single_col="single_end", + strandedness="strandedness", **kwargs, ): """ @@ -57,6 +62,7 @@ def __init__( self._first_col = first_col self._second_col = second_col self._single_col = single_col + self._strandedness = strandedness self._seen = set() self.modified = [] @@ -73,6 +79,7 @@ def validate_and_transform(self, row): self._validate_first(row) self._validate_second(row) self._validate_pair(row) + self._validate_strandedness(row) self._seen.add((row[self._sample_col], row[self._first_col])) self.modified.append(row) @@ -113,6 +120,11 @@ def _validate_fastq_format(self, filename): f"It should be one of: {', '.join(self.VALID_FORMATS)}" ) + def _validate_strandedness(self, row): + """Assert that the strandedness given is one of unstranded/forward/reverse""" + if row[self._strandedness] not in self.VALID_STRANDEDNESSES: + raise AssertionError(f"Strandedness must be one of {', '.join(self.VALID_STRANDEDNESSES)}") + def validate_unique_samples(self): """ Assert that the combination of sample name and FASTQ filename is unique. @@ -143,17 +155,13 @@ def read_head(handle, num_lines=10): def sniff_format(handle): """ Detect the tabular format. - Args: handle (text file): A handle to a `text file`_ object. The read position is expected to be at the beginning (index 0). - Returns: csv.Dialect: The detected tabular format. - .. _text file: https://docs.python.org/3/glossary.html#term-text-file - """ peek = read_head(handle) handle.seek(0) @@ -174,21 +182,16 @@ def check_samplesheet(file_in, file_out): CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. file_out (pathlib.Path): Where the validated and transformed samplesheet should be created; always in CSV format. - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: + This function checks that the samplesheet follows the following structure: + sample,fastq_1,fastq_2,strandedness + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,forward + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,forward + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,,forward https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """ - required_columns = {"sample", "fastq_1", "fastq_2"} + + required_columns = {"sample", "fastq_1", "fastq_2", "strandedness"} # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_in.open(newline="") as in_handle: reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) diff --git a/bin/get_rrna_transcripts.py b/bin/get_rrna_transcripts.py new file mode 100755 index 00000000..46812caf --- /dev/null +++ b/bin/get_rrna_transcripts.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +from pathlib import Path + + +def get_rrna_intervals(file_in, file_out): + """ + Get the commented out header + Get lines containing ``#`` or ``gene_type rRNA`` or ```` or ``gene_type rRNA_pseudogene`` or ``gene_type MT_rRNA`` + Create output file + + Args: + file_in (pathlib.Path): The given GTF file. + file_out (pathlib.Path): Where the ribosomal RNA GTF file should + be created; always in GTF format. + """ + + patterns = { + "#", + 'transcript_biotype "Mt_rRNA"', + 'transcript_biotype "rRNA"', + 'transcript_biotype "rRNA_pseudogene"', + } + line_starts = {"MT", "1", "2", "3", "4", "5", "6", "7", "8", "9"} + out_lines = [] + with file_in.open() as f: + data = f.readlines() + for line in data: + for pattern in patterns: + if pattern in line: + for line_start in line_starts: + if line.startswith(line_start): + out_lines.append(line) + + with file_out.open(mode="w") as out_file: + out_file.writelines(out_lines) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Extract ribosomal RNA intervals from a gtf file.", + epilog="Example: python get_rrna_transcripts.py ", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Input in GTF format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output intervals in GTF format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + get_rrna_intervals(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/megafusion.py b/bin/megafusion.py new file mode 100755 index 00000000..6f27b296 --- /dev/null +++ b/bin/megafusion.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python + +import argparse +import logging +import sys +from pathlib import Path +import pandas as pd +import ast + +logger = logging.getLogger() + +FUSIONINSPECTOR_MAP = { + "fusion": {"column": 0, "delimiter": "\t", "element": 0}, + "chromosomeA": {"column": 7, "delimiter": ":", "element": 0}, + "chromosomeB": {"column": 10, "delimiter": ":", "element": 0}, + "posA": {"column": 7, "delimiter": ":", "element": 1}, + "posB": {"column": 10, "delimiter": ":", "element": 1}, + "strand1": {"column": 7, "delimiter": ":", "element": 2}, + "strand2": {"column": 10, "delimiter": ":", "element": 2}, + "geneA": {"column": 0, "delimiter": "--", "element": 0}, + "geneB": {"column": 0, "delimiter": "--", "element": 1}, + "split_reads": {"column": 1, "delimiter": "\t", "element": 0}, + "discordant_pairs": {"column": 2, "delimiter": "\t", "element": 0}, + "ffpm": {"column": 25, "delimiter": "\t", "element": 0}, +} + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "--fusioninspector", + metavar="FUSIONINSPECTOR", + type=Path, + help="FusionInspector output in TSV format.", + ) + parser.add_argument( + "--fusionreport", + metavar="FUSIONREPORT", + type=Path, + help="Fusionreport output in TSV format.", + ) + parser.add_argument("--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample") + parser.add_argument( + "--out", + metavar="OUT", + type=Path, + help="Output path.", + ) + return parser.parse_args(argv) + + +def header_def(sample): + return '##fileformat=VCFv4.1\n\ +##ALT=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( + sample + ) + + +def read_fusioninspector(fusioninspector_file, col_num, delimiter, element): + with open(fusioninspector_file) as fusioninspector: + return [line.split()[col_num].split(delimiter)[element] for line in fusioninspector if not line.startswith("#")] + + +def build_fusioninspector_dataframe(file, map): + new_dict = {} + for key in FUSIONINSPECTOR_MAP: + new_dict[key] = read_fusioninspector( + file, + map[key]["column"], + map[key]["delimiter"], + map[key]["element"], + ) + return pd.DataFrame.from_dict(new_dict).set_index("fusion") + + +def read_build_fusionreport(fusionreport_file): + with open(fusionreport_file) as f: + from_html = [line.split('rows": [')[1] for line in f if 'name="fusion_list' in line] + expression = from_html[0].split('], "tool')[0] + fusion_report = pd.DataFrame.from_dict(ast.literal_eval(expression)).set_index("fusion") + if not "arriba" in fusion_report.columns: + fusion_report["arriba"] = "" + if not "fusioncatcher" in fusion_report.columns: + fusion_report["fusioncatcher"] = "" + if not "pizzly" in fusion_report.columns: + fusion_report["pizzly"] = "" + if not "squid" in fusion_report.columns: + fusion_report["squid"] = "" + if not "starfusion" in fusion_report.columns: + fusion_report["starfusion"] = "" + return fusion_report + + +def column_manipulation(df): + df["ALT"] = "" + df = df.reset_index() + df["FORMAT"] = "GT:DV:RV:FFPM" + df["ID"] = "." + df["QUAL"] = "." + df["FILTER"] = "PASS" + df["REF"] = "N" + + for index, row in df.iterrows(): + # ALT + if not row["strand1"] in ["+", "-"] or not row["strand2"] in ["+", "-"]: + df.loc[index, "ALT"] = "N[{}:{}[".format(df["chromosomeB"], row["posB"]) + elif row["strand1"] == "-" and row["strand2"] == "-": + df.loc[index, "ALT"] = "[{}:{}[N".format(row["chromosomeB"], row["posB"]) + elif row["strand1"] == "+" and row["strand2"] == "-": + df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) + elif row["strand1"] == "-" and row["strand2"] == "+": + df.loc[index, "ALT"] = "N]{}:{}]".format(row["chromosomeB"], row["posB"]) + else: + df.loc[index, "ALT"] = "N[{}:{}[".format(row["chromosomeB"], row["posB"]) + # INFO + df.loc[index, "INFO"] = ( + "SVTYPE=BND;CHRA={};CHRB={};GENEA={};GENEB={};ORIENTATION={},{};FOUND_DB={};" + "ARRIBA={};FUSIONCATCHER={};PIZZLY={};SQUID={};STARFUSION={};TOOL_HITS={};SCORE={}".format( + row["chromosomeA"], + row["chromosomeB"], + row["geneA"], + row["geneB"], + row["strand1"], + row["strand2"], + row["found_db"], + row["arriba"], + row["fusioncatcher"], + row["pizzly"], + row["squid"], + row["starfusion"], + row["tools_hits"], + row["score"], + ) + ) + # FORMAT + df.loc[index, "Sample"] = "./1:{}:{}:{}".format(row["split_reads"], row["discordant_pairs"], row["ffpm"]) + return df + + +def write_vcf(df_to_print, header, out_file): + df_to_print[ + [ + "chromosomeA", + "posA", + "ID", + "REF", + "ALT", + "QUAL", + "FILTER", + "INFO", + "FORMAT", + "Sample", + ] + ].to_csv( + path_or_buf=out_file, + sep="\t", + header=None, + index=False, + ) + + with open(out_file, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(header.rstrip("\r\n") + "\n" + content) + + +def megafusion(fusioninspector_in_file, fusionreport_in_file, sample, out): + """Convert fusion information from FusionInspector and fusion-report into a vcf file. Adapted from https://github.com/J35P312/MegaFusion""" + merged_df = build_fusioninspector_dataframe(fusioninspector_in_file, FUSIONINSPECTOR_MAP).join( + read_build_fusionreport(fusionreport_in_file), how="left" + ) + write_vcf(column_manipulation(merged_df), header_def(sample), out) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if not args.fusioninspector.is_file() or not args.fusionreport.is_file(): + logger.error(f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!") + sys.exit(2) + megafusion(args.fusioninspector, args.fusionreport, args.sample, args.out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index 51042a02..165a5e88 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,22 +10,15 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } + shell = ['/bin/bash', '-euo', 'pipefail'] errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } diff --git a/conf/genomes.config b/conf/genomes.config new file mode 100644 index 00000000..e7a01b6d --- /dev/null +++ b/conf/genomes.config @@ -0,0 +1,24 @@ +/* + * ------------------------------------------------- + * Nextflow config file for reference genome + * ------------------------------------------------- + * These references have to be build manually due + * to ERCC spike-ins. + * Can be used by any config that customizes the base + * path using $params.genomes_base / --genomes_base + */ + +params { + genomes { + 'GRCh38' { + fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" + fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa.fai" + gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" + chrgtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf" + transcript = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.cdna.all.fa.gz" + refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf.refflat" + rrna_intervals= "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" + + } + } +} diff --git a/conf/modules.config b/conf/modules.config index da58a5d8..7b8f2787 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,24 +18,382 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: ARRIBA { + publishDir = [ + path: { "${params.outdir}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}.arriba" } + } + + withName: ARRIBA_DOWNLOAD { + publishDir = [ + path: { "${params.genomes_base}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: ARRIBA_VISUALISATION { + ext.when = { !params.fusioninspector_only && (params.starfusion || params.all) } + publishDir = [ + path: { "${params.outdir}/arriba_visualisation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: ENSEMBL_DOWNLOAD { + publishDir = [ + path: { "${params.genomes_base}/ensembl" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: FASTP { + ext.args = params.trim_tail ? "--trim_tail1 ${params.trim_tail}" : '' + } + withName: FASTQC { ext.args = '--quiet' + ext.when = { !params.skip_qc } } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: FASTQC_FOR_TRIM { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc_for_trim" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FUSIONCATCHER { + ext.args = "--limitSjdbInsertNsj ${params.fusioncatcher_limitSjdbInsertNsj}" + } + + withName: FUSIONCATCHER_DOWNLOAD { + publishDir = [ + path: { "${params.genomes_base}/fusioncatcher" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FUSIONINSPECTOR { + ext.when = { !params.skip_vis } + ext.args = { params.fusioninspector_limitSjdbInsertNsj != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } + + } + + withName: FUSIONREPORT { + ext.when = { !params.skip_vis } + ext.args = "--export csv" + ext.args2 = { params.fusionreport_filter ? "--tool-cutoff 2" : "--tool-cutoff 1"} + publishDir = [ + path: { "${params.outdir}/fusionreport/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FUSIONREPORT_DOWNLOAD { + ext.args = { params.qiagen ? "--qiagen" : "" } + publishDir = [ + path: { "${params.genomes_base}/fusion_report_db" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: GATK4_BEDTOINTERVALLIST { + publishDir = [ + path: { "${params.genomes_base}/ensembl" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: GTF_TO_REFFLAT { + publishDir = [ + path: { "${params.genomes_base}/ensembl" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: KALLISTO_INDEX { + ext.args = '-k 31' + publishDir = [ + path: { "${params.genomes_base}/pizzly" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: MEGAFUSION { + ext.when = {!params.fusioninspector_only} + } + + + + withName: MULTIQC { + ext.when = { !params.skip_qc } + } + + withName: PICARD_COLLECTRNASEQMETRICS { + ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + + } + + withName: PICARD_MARKDUPLICATES { + ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + } + + withName: PIZZLY { + ext.args = "-k 31 --align-score 2 --insert-size 400 --cache index.cache.txt" + publishDir = [ + path: { "${params.outdir}/pizzly" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: QUALIMAP_RNASEQ { + ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all)} + } + + withName: REFORMAT { + ext.args = "forcetrimright=75" + ext.args2 = "forcetrimleft=75" + } + + withName: SAMPLESHEET_CHECK { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: SAMTOOLS_INDEX_FOR_ARRIBA { + publishDir = [ + path: { "${params.outdir}/samtools_index_for_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_INDEX_FOR_QC { + ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all)} + publishDir = [ + path: { "${params.outdir}/samtools_index_for_qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_INDEX_FOR_STARFUSION { + publishDir = [ + path: { "${params.outdir}/samtools_index_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_FAIDX { + publishDir = [ + path: { "${params.genomes_base}/ensembl" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: SAMTOOLS_SORT_FOR_ARRIBA { + ext.prefix = { "${meta.id}_sorted" } + publishDir = [ + path: { "${params.outdir}/samtools_sort_for_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_SORT_FOR_SQUID_CHIMERIC { + ext.prefix = { "${meta.id}_chimeric_sorted" } + publishDir = [ + path: { "${params.outdir}/samtools_sort_for_squid_chimeric" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_VIEW_FOR_ARRIBA { + ext.args = { "--output-fmt cram" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC { + ext.prefix = { "${meta.id}_chimeric" } + ext.args = { "--output-fmt bam" } + publishDir = [ + path: { "${params.outdir}/samtools_view_for_squid_chimeric" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_VIEW_FOR_SQUID_CRAM { + ext.args = { "--output-fmt cram" } + publishDir = [ + path: { "${params.outdir}/cram_squid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}_chimeric" } + publishDir = [ + path: { "${params.outdir}/cram_squid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: SAMTOOLS_VIEW_FOR_STARFUSION { + ext.args = { "--output-fmt cram" } + publishDir = [ + path: { "${params.outdir}/cram_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: STAR_FOR_ARRIBA { + publishDir = [ + path: { "${params.outdir}/star_for_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--readFilesCommand zcat \ + --outSAMtype BAM Unsorted \ + --outSAMunmapped Within \ + --outBAMcompression 0 \ + --outFilterMultimapNmax 50 \ + --peOverlapNbasesMin 10 \ + --alignSplicedMateMapLminOverLmate 0.5 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimSegmentMin 10 \ + --chimOutType WithinBAM HardClip \ + --chimJunctionOverhangMin 10 \ + --chimScoreDropMax 30 \ + --chimScoreJunctionNonGTAG 0 \ + --chimScoreSeparation 1 \ + --chimSegmentReadGapMax 3 \ + --chimMultimapNmax 50' + } + + withName: STAR_FOR_SQUID { + publishDir = [ + path: { "${params.outdir}/star_for_squid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--twopassMode Basic \ + --chimOutType SeparateSAMold \ + --chimSegmentMin 20 \ + --chimJunctionOverhangMin 12 \ + --alignSJDBoverhangMin 10 \ + --outReadsUnmapped Fastx \ + --outSAMstrandField intronMotif \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat' + } + + withName: STAR_FOR_STARFUSION { + publishDir = [ + path: { "${params.outdir}/star_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--twopassMode Basic \ + --outReadsUnmapped None \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMstrandField intronMotif \ + --outSAMunmapped Within \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 8 \ + --chimOutJunctionFormat 1 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimMultimapScoreRange 3 \ + --chimScoreJunctionNonGTAG -4 \ + --chimMultimapNmax 20 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --alignInsertionFlush Right \ + --alignSplicedMateMapLminOverLmate 0 \ + --alignSplicedMateMapLmin 30 \ + --chimOutType Junctions' + } + + withName: STAR_GENOMEGENERATE { + ext.args = "--sjdbOverhang ${params.read_length - 1}" + cpus = { check_max( 24 * task.attempt, 'cpus' ) } + memory = { check_max( 100.GB * task.attempt, 'memory' ) } + time = { check_max( 2.d * task.attempt, 'time' ) } + publishDir = [ + path: { "${params.genomes_base}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: STARFUSION_BUILD { + cpus = { check_max( 24 * task.attempt, 'cpus' ) } + memory = { check_max( 100.GB * task.attempt, 'memory' ) } + time = { check_max( 2.d * task.attempt, 'time' ) } + publishDir = [ + path: { "${params.genomes_base}/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: STARFUSION_DOWNLOAD { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + publishDir = [ + path: { "${params.genomes_base}/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: STRINGTIE_MERGE { + publishDir = [ + path: { "${params.outdir}/stringtie/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/test.config b/conf/test.config index 796db260..236d1a39 100644 --- a/conf/test.config +++ b/conf/test.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/rnafusion -profile test, --outdir + nextflow run nf-core/rnafusion -profile test, --outdir -stub ---------------------------------------------------------------------------------------- */ @@ -16,14 +16,9 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_memory = 6.GB + max_time = 6.h // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' } diff --git a/conf/test_full.config b/conf/test_full.config index e2522d72..eb8f5cd6 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -3,10 +3,8 @@ Nextflow config file for running full-size tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. - Use as follows: - nextflow run nf-core/rnafusion -profile test_full, --outdir - + nextflow run nf-core/sarek -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,10 +13,8 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + + // Other params - // Genome references - genome = 'R64-1-1' -} + } diff --git a/containers/arriba/Dockerfile b/containers/arriba/Dockerfile new file mode 100644 index 00000000..ea36d90f --- /dev/null +++ b/containers/arriba/Dockerfile @@ -0,0 +1,14 @@ +FROM nfcore/base:1.9 + +LABEL authors="Martin Proks" \ + description="Docker image containing all requirements for nfcore/rnafusion pipeline" + +# Install the conda environment +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-rnafusion-arriba_1.2.0/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-rnafusion-arriba_1.2.0 > nf-core-rnafusion-arriba_1.2.0.yml diff --git a/containers/arriba/environment.yml b/containers/arriba/environment.yml new file mode 100644 index 00000000..20a1024e --- /dev/null +++ b/containers/arriba/environment.yml @@ -0,0 +1,14 @@ +name: nf-core-rnafusion-arriba_1.2.0 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::arriba=1.2.0 + - bioconda::bioconductor-genomicalignments + - bioconda::bioconductor-genomicranges + - bioconda::samtools=1.9 + - bioconda::star=2.7.1a + - conda-forge::openssl=1.0 + - conda-forge::r-circlize + - conda-forge::readline=6.2 diff --git a/containers/ericscript/Dockerfile b/containers/ericscript/Dockerfile new file mode 100644 index 00000000..70b87c48 --- /dev/null +++ b/containers/ericscript/Dockerfile @@ -0,0 +1,17 @@ +FROM nfcore/base:1.9 + +LABEL authors="Martin Proks" \ + description="Docker image containing all requirements for nfcore/rnafusion pipeline" + +# Install the conda environment +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-rnafusion-ericscript_0.5.5/bin:$PATH + +# Ignore database check (https://github.com/nf-core/rnafusion/issues/119) +RUN echo 1 > /opt/conda/envs/nf-core-rnafusion-ericscript_0.5.5/share/ericscript-0.5.5-4/lib/data/_resources/.flag.dbexists + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-rnafusion-ericscript_0.5.5 > nf-core-rnafusion-ericscript_0.5.5.yml diff --git a/containers/ericscript/environment.yml b/containers/ericscript/environment.yml new file mode 100644 index 00000000..1e76273c --- /dev/null +++ b/containers/ericscript/environment.yml @@ -0,0 +1,8 @@ +name: nf-core-rnafusion-ericscript_0.5.5 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ericscript=0.5.5 + - conda-forge::ncurses=6.1 diff --git a/containers/fusioncatcher/Dockerfile b/containers/fusioncatcher/Dockerfile new file mode 100644 index 00000000..77efaf57 --- /dev/null +++ b/containers/fusioncatcher/Dockerfile @@ -0,0 +1,49 @@ +FROM ubuntu:18.04 + +LABEL Description="This image is used to run FusionCatcher" Version="1.33" + +RUN apt-get -y clean \ + && apt-get -y update \ + && apt-get -y install \ + automake \ + build-essential \ + bzip2 \ + cmake \ + curl \ + g++ \ + gawk \ + gcc \ + gzip \ + libc6-dev \ + libncurses5-dev \ + libtbb2 \ + libtbb-dev \ + make \ + parallel \ + pigz \ + python \ + python-dev \ + python-biopython \ + python-numpy \ + python-openpyxl \ + python-xlrd \ + tar \ + unzip \ + wget \ + zip \ + zlib1g \ + zlib1g-dev \ + zlibc \ + default-jdk \ + && apt-get -y clean + +WORKDIR /opt + +###################### +## INSTALLATION +###################### + +RUN wget --no-check-certificate http://sf.net/projects/fusioncatcher/files/bootstrap.py -O bootstrap.py \ + && python bootstrap.py -t -y -i /opt/fusioncatcher/v1.33/ + +ENV PATH /opt/fusioncatcher/v1.33/bin:$PATH diff --git a/containers/pizzly/Dockerfile b/containers/pizzly/Dockerfile new file mode 100644 index 00000000..0056bf9b --- /dev/null +++ b/containers/pizzly/Dockerfile @@ -0,0 +1,14 @@ +FROM nfcore/base:1.9 + +LABEL authors="Martin Proks" \ + description="Docker image containing all requirements for nfcore/rnafusion pipeline" + +# Install the conda environment +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-rnafusion-pizzly_0.37.3/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-rnafusion-pizzly_0.37.3 > nf-core-rnafusion-pizzly_0.37.3.yml diff --git a/containers/pizzly/environment.yml b/containers/pizzly/environment.yml new file mode 100644 index 00000000..79974871 --- /dev/null +++ b/containers/pizzly/environment.yml @@ -0,0 +1,9 @@ +name: nf-core-rnafusion-pizzly_0.37.3 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kallisto=0.44.0 + - bioconda::pizzly=0.37.3 + - conda-forge::pigz=2.3.4 diff --git a/containers/squid/Dockerfile b/containers/squid/Dockerfile new file mode 100644 index 00000000..20d390ee --- /dev/null +++ b/containers/squid/Dockerfile @@ -0,0 +1,19 @@ +FROM nfcore/base:1.9 + +LABEL authors="Martin Proks" \ + description="Docker image containing all requirements for nfcore/rnafusion pipeline" + +# Install the conda environment +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin:$PATH + +RUN cd /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin \ + && wget https://raw.githubusercontent.com/Kingsford-Group/squid/f45c9025d41cffd982ecbbdd52844e5a4f074de9/utils/AnnotateSQUIDOutput.py \ + && chmod +x /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin/AnnotateSQUIDOutput.py \ + && ln -s /opt/conda/envs/nf-core-rnafusion-squid_1.5-star2.7.1a/bin/python3 /bin/python + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-rnafusion-squid_1.5-star2.7.1a > nf-core-rnafusion-squid_1.5-star2.7.1a.yml diff --git a/containers/squid/environment.yml b/containers/squid/environment.yml new file mode 100644 index 00000000..7385b163 --- /dev/null +++ b/containers/squid/environment.yml @@ -0,0 +1,11 @@ +name: nf-core-rnafusion-squid_1.5-star2.7.1a +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.9 + - bioconda::squid=1.5 + - bioconda::star=2.7.1a + - conda-forge::numpy=1.18.1 + - conda-forge::python=3.7.6 diff --git a/docs/images/BTB_logo.png b/docs/images/BTB_logo.png new file mode 100644 index 00000000..6a197b80 Binary files /dev/null and b/docs/images/BTB_logo.png differ diff --git a/docs/images/BTB_logo.svg b/docs/images/BTB_logo.svg new file mode 100644 index 00000000..099f1101 --- /dev/null +++ b/docs/images/BTB_logo.svg @@ -0,0 +1,184 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/NGI_logo.png b/docs/images/NGI_logo.png new file mode 100644 index 00000000..3f4b769e Binary files /dev/null and b/docs/images/NGI_logo.png differ diff --git a/docs/images/NGI_logo.svg b/docs/images/NGI_logo.svg new file mode 100644 index 00000000..aef40fd8 --- /dev/null +++ b/docs/images/NGI_logo.svg @@ -0,0 +1,333 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/SDU_logo.png b/docs/images/SDU_logo.png new file mode 100644 index 00000000..38e60b4b Binary files /dev/null and b/docs/images/SDU_logo.png differ diff --git a/docs/images/SciLifeLab_logo.png b/docs/images/SciLifeLab_logo.png new file mode 100644 index 00000000..bc4dbda6 Binary files /dev/null and b/docs/images/SciLifeLab_logo.png differ diff --git a/docs/images/SciLifeLab_logo.svg b/docs/images/SciLifeLab_logo.svg new file mode 100644 index 00000000..b8a44b79 --- /dev/null +++ b/docs/images/SciLifeLab_logo.svg @@ -0,0 +1,99 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/nf-core-rnafusion_metro_map.png b/docs/images/nf-core-rnafusion_metro_map.png new file mode 100644 index 00000000..314b61a2 Binary files /dev/null and b/docs/images/nf-core-rnafusion_metro_map.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.svg b/docs/images/nf-core-rnafusion_metro_map.svg new file mode 100644 index 00000000..38eb709b --- /dev/null +++ b/docs/images/nf-core-rnafusion_metro_map.svg @@ -0,0 +1,932 @@ + + + + + + + + + + + + + + + fastq + + + + + + + + + + + + + + + txt + + + + + + + + + + + + + + + + + + align + Arriba + SQUID + pizzly + + + align + + + kallisto + hardtrimming + fastptrimming + + FusionCatcher + + + align + + + + + + + + + + + + + + + + + + + + STAR-Fusion + FastQC + Qualimap + MultiQC + FusionInspector + fusion-report + PicardCollectRnaSeqMetrics+PicardCollectWgsMetrics + SQUIDannotate + + + + + + + + + + + + + + + + + + + + + fusioncatcher + starfusion + qc + + Workflows: + + StringTie + + + + + + arriba + squid + pizzly + stringtie + + + Arribavisualisation + + diff --git a/docs/images/rnafusion_logo.png b/docs/images/rnafusion_logo.png new file mode 100644 index 00000000..548f1578 Binary files /dev/null and b/docs/images/rnafusion_logo.png differ diff --git a/docs/images/rnafusion_logo.svg b/docs/images/rnafusion_logo.svg new file mode 100644 index 00000000..fcd196c8 --- /dev/null +++ b/docs/images/rnafusion_logo.svg @@ -0,0 +1,208 @@ + +image/svg+xmlnf- + +core/ + +rnafusion + + \ No newline at end of file diff --git a/docs/images/summary_graph_1.png b/docs/images/summary_graph_1.png new file mode 100644 index 00000000..61d32e59 Binary files /dev/null and b/docs/images/summary_graph_1.png differ diff --git a/docs/images/summary_graph_2.png b/docs/images/summary_graph_2.png new file mode 100644 index 00000000..75dffc13 Binary files /dev/null and b/docs/images/summary_graph_2.png differ diff --git a/docs/images/summary_graph_3.png b/docs/images/summary_graph_3.png new file mode 100644 index 00000000..6b87cff6 Binary files /dev/null and b/docs/images/summary_graph_3.png differ diff --git a/docs/output.md b/docs/output.md index dc6e6d48..b7afedf1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,16 +6,365 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - -## Pipeline overview +## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC +- [Download and build references](#references) - Build references needed to run the rest of the pipeline +- [STAR](#star) - Alignment for arriba, squid and STAR-fusion +- [Cat](#cat) - Concatenated fastq files per sample ID +- [Arriba](#arriba) - Arriba fusion detection +- [Pizzly](#pizzly) - Pizzly fusion detection +- [Squid](#squid) - Squid fusion detection +- [STAR-fusion](#starfusion) - STAR-fusion fusion detection +- [StringTie](#stringtie) - StringTie assembly +- [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection +- [Samtools](#samtools) - SAM/BAM file manipulation +- [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman and FusionGBD databases +- [FusionInspector](#fusionInspector) - IGV-based visualisation tool for fusions filtered by fusion-report +- [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report for FusionInspector fusions +- [Qualimap](#qualimap) - Quality control of alignment +- [Picard](#picard) - Collect metrics +- [FastQC](#fastqc) - Raw read quality control - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +### Download and build references + +
+Output files + +- `genomes_base/` + - `arriba` + - `blacklist_hg38_GRCh38_v2.1.0.tsv.gz` + - `protein_domains_hg38_GRCh38_v2.1.0.gff3` + - `cytobands_hg38_GRCh38_v2.1.0.tsv` + - `ensembl` + - `Homo_sapiens.GRCh38.{ensembl_version}.all.fa` + - `Homo_sapiens.GRCh38.{ensembl_version}.cdna.all.fa.gz` + - `Homo_sapiens.GRCh38.{ensembl_version}.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf.refflat` + - `Homo_sapiens.GRCh38.{ensembl_version}.interval_list` + - `fusioncatcher` + - `human_v` - dir with all references for fusioncatcher + - `fusion_report_db` + - `cosmic.db` + - `fusiongdb.db` + - `fusiongdb2.db` + - `mitelman.db` + - `pizzly` + - `kallisto` - file containing the kallisto index + - `star` - dir with STAR index + - `starfusion` + - files and dirs used to build the index + - `ctat_genome_lib_build_dir` - dir containing the index + +(Only files or folders used by the pipeline are mentioned explicitly.) + +
+ +### STAR + +STAR is used to align to genome reference + +STAR is run 3 times: + +For arriba with the parameters: + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +For squid with the parameters: + +```bash +--twopassMode Basic \ +--chimOutType SeparateSAMold \ +--chimSegmentMin 20 \ +--chimJunctionOverhangMin 12 \ +--alignSJDBoverhangMin 10 \ +--outReadsUnmapped Fastx \ +--outSAMstrandField intronMotif \ +--outSAMtype BAM SortedByCoordinate \ +--readFilesCommand zcat +``` + +For STAR-fusion with the parameters: + +```bash +--twopassMode Basic \ +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions +``` + +> STAR_FOR_STARFUSION uses `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.chr.gtf` whereas STAR_FOR_ARRIBA and STAR_FOR_SQUID use `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.gtf` + +
+Output files + +- `star_for_` +_ **Common** +_ `.Log.final.out` +_ `.Log.progress.out` +_ `.SJ.out.tab` +_ **For arriba:** +_ `.Aligned.out.bam` +_ **For squid:** +_ `.Aligned.sortedByCoord.out.bam` +_ `.Chimeric.out.sam` +_ `.unmapped_1.fastq.gz` +_ `.unmapped_2.fastq.gz` +_ **For starfusion:** +_ `.Aligned.sortedByCoord.out.bam` +_ `.Chimeric.out.junction` +
+ +### Cat + +Cat is used to concatenate fastq files belonging to the same sample. + +
+Output files + +- `cat` + - `_1.merged.fastq.gz` + - `_2.merged.fastq.gz` + +
+ +### Arriba + +Arriba is used for i) detect fusion and ii) output a PDF report for the fusions found (visualisation): + +#### Detection + +
+Output files + +- `arriba` + - `.arriba.fusions.tsv` - contains the identified fusions + - `.arriba.fusions.discarded.tsv` + +
+ +#### Visualisation + +
+Output files + +- `arriba_visualisation` + - `.pdf` + +
+ +### Pizzly + +The first step of the pizzly workflow is to run `kallisto quant`: + +#### Kallisto + +
+Output files + +- `kallisto` + - `.kallisto_quant.fusions.txt` + +
+ +Pizzly refines kallisto output. + +#### Pizzly + +Pizzly uses the following arguments: + +```bash +-k 31 \ +--align-score 2 \ +--insert-size 400 \ +--cache index.cache.txt +``` + +
+Output files + +- `pizzly` + - `.pizzly.txt` - contains the identified fusions + - `.pizzly.unfiltered.json` + +
+ +### Squid + +Squid is run in two steps: i) fusion detection and ii) fusion annotation but the output is in a common `squid` directory. + +
+Output files + +- `squid` + - `.squid.fusions_sv.txt` - contains the identified fusions + - `.squid.fusions.annotated.txt`- contains the identified fusions annotatedvi + +
+ +### STAR-fusion + +
+Output files + +- `starfusion` + - `.starfusion.fusion_predictions.tsv` - contains the identified fusions + - `.starfusion.abridged.tsv` + - `- contains the identified fusions.starfusion.abridged.coding_effect.tsv` + +
+ +### StringTie + +
+Output files + +- `stringtie//stringtie.merged.gtf` - merged gtf from annotation and stringtie output gtfs +
+ +### FusionCatcher + +
+Output files + +- `fusioncatcher` +_ `.fusioncatcher.fusion-genes.txt` +_ `.fusioncatcher.summary.txt` \* `.fusioncatcher.log` +
+ +### Samtools + +#### Samtools view + +Samtools view is used to convert the chimeric SAM output from STAR_FOR_SQUID to BAM + +
+Output files + +- `samtools_view_for_squid` + - `_chimeric.bam` - sorted BAM file + +
+ +#### Samtools sort + +Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation) and the chimeric BAM from STAR_FOR_SQUID + +
+Output files + +- `samtools_sort_for_` + - `(_chimeric)_sorted.bam` - sorted BAM file + +
+ +#### Samtools index + +Samtools index is used to index BAM files from STAR_FOR_ARRIBA (for arriba visualisation) and STAR_FOR_STARFUSION (for QC) + +
+Output files + +- `samtools_for_` + - `.(Aligned.sortedByCoord).out.bam.bai` - + +
+ +### Fusion-report + +
+Output files + +- `fusionreport` + - + - `.fusionreport.tsv` + - `.fusionreport_filtered.tsv` + - `index.html` - general report for all filtered fusions + - `.html` - specific report for each filtered fusion + +
+ +The score is explained [on the original fusion-report github page](https://matq007.github.io/fusion-report/#/score). + +### FusionInspector + +
+Output files + +- `fusioninspector` + - `.fusion_inspector_web.html` - visualisation report described in details [here](https://github.com/FusionInspector/FusionInspector/wiki/FusionInspector-Visualizations) + - `FusionInspector.log` + - `.FusionInspector.fusions.abridged.tsv` + +
+ +### Qualimap + +
+Output files + +- `qualimap` + - `qualimapReport.html` - HTML report + - `rnaseq_qc_results.txt` - TXT results + - `css` - dir for html style + - `images_qualimapReport`- dir for html images + - `raw_data_qualimapReport` - dir for html raw data + +
+ +### Picard + +Picard CollectRnaMetrics and picard MarkDuplicates share the same outpur directory. + +
+Output files + +- `picard` + - `.MarkDuplicates.metrics.txt` - metrics from CollectRnaMetrics + - `_rna_metrics.txt` - metrics from MarkDuplicates + - `.bam` - BAM file with marked duplicates + +
+ ### FastQC
diff --git a/docs/usage.md b/docs/usage.md index bcf9dd8d..48bb9e10 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,4 +1,4 @@ -# nf-core/rnafusion: Usage +# nf-core/rnafusion: Usage ## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/rnafusion/usage](https://nf-co.re/rnafusion/usage) @@ -6,58 +6,286 @@ ## Introduction - +The pipeline is divided into two parts: -## Samplesheet input +1. Download and build references + - specified with `--build_references` parameter + - required only once before running the pipeline + - **Important**: rerun with each new release +2. Detecting fusions + - Supported tools: `Arriba`, `FusionCatcher`, `pizzly`, `SQUID`, `STAR-Fusion` and `StringTie` + - QC: `Fastqc` and `MultiQC` + - Fusion visualization: `Arriba` (only fusion detected with Arriba), `fusion-report` and `FusionInspector` + +### 1. Download and build references + +The rnafusion pipeline needs references for the fusion detection tools, so downloading these is a **requirement**. +Whilst it is possible to download and build each reference manually, it is advised to download references with the rnafusion pipeline. + +First register for a free account at COSMIC at [https://cancer.sanger.ac.uk/cosmic/register](https://cancer.sanger.ac.uk/cosmic/register) using your university email. The account is **only activated upon** clicking the link in the registration email. -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +Download the references as shown below including your COSMIC credentials. + +> Note that this step takes about 24 hours to complete on HPC. + +> Do not provide a samplesheet via the `input` parameter, otherwise the pipeline will run the analysis directly after downloading the references (except if that is what you want). ```bash ---input '[path to samplesheet file]' +nextflow run nf-core/rnafusion \ + --build_references --all \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir ``` -### Multiple runs of the same sample +References for each tools can also be downloaded separately with: -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +```bash +nextflow run nf-core/rnafusion \ + --build_references -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir +``` -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +#### Using QIAGEN download insead of SANGER (non-academic usage) for the COSMIC database + +```bash +nextflow run nf-core/rnafusion \ + --build_references -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir --qiagen +``` + +#### References directory tree + +```text +references/ +|-- arriba +|-- ensembl +|-- fusion_report_db +|-- fusioncatcher +| `-- human_v102 +|-- pizzly +|-- star +`-- starfusion ``` -### Full samplesheet +#### Issues with building references + +If process `FUSIONREPORT_DOWNLOAD` times out, it could be due to network restriction (e.g. if trying to run on HPC). As this process is lightweight in compute, storage and time, running on local machines with the following options might solve the issue: + +```bash +nextflow run nf-core/rnafusion \ + --build_references \ + --cosmic_username --cosmic_passwd \ + --fusionreport \ + --genomes_base \ + --outdir +``` + +Adjustments for compute requirements can be done by feeding a custom configuration with `-c /PATH/TO/CUSTOM/CONFIG`. +Where the custom configuration could look like (adaptation to local machine necessary): + +```text +process { + withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD' { + memory = '8.GB' + cpus = 4 + } +} +``` + +The four `fusion-report` files: `cosmic.db`, `fusiongdb.db`, `fusiongdb2.db`, `mitelman.db` +should then be copied into the HPC `/references/fusion_report_db`. + +#### Non-human references + +Non-human references, not supported by default, can be built manually and fed to rnafusion using the parameter `--_ref`. + +#### STAR-Fusion references downloaded vs built + +By default STAR-Fusion references are **built**. You can also download them from [CTAT](https://github.com/NCIP/Trinity_CTAT/wiki) by using the flag `--starfusion_build FALSE` for both reference building and fusion detection. This allows more flexibility for different organisms but **be aware that STAR-Fusion reference download is not recommended as not fully tested!** -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +### 2. Detecting fusions + +This step can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `pizzly`, `squid`, `starfusion`, `stringtie`) use the `--all` parameter: + +```bash +nextflow run nf-core/rnafusion \ + --all \ + --input \ + --genomes_base \ + --outdir +``` + +> **IMPORTANT: Either `--all` or `--`** is necessary to run detection tools + +`--genomes_base` should be the path to the directory containing the folder `references/` that was built in step 1 `build_references`. + +Alternatively, to run only a specific detection tool use: `--tool`: + +```bash +nextflow run nf-core/rnafusion \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir +``` + +#### Trimming + +There are 2 options to trim + +1. fastp + In this case all tools use the trimmed reads. Quality and adapter trimming by default. In addition, tail trimming and adapter_fastq specification are possible. Example usage: + +```bash +nextflow run nf-core/rnafusion \ +-- -- ... \ +--input \ +--genomes_base \ +--outdir \ +--fastp_trim \ +--trim_tail (optional) \ +--adapter_fastq (optional) +``` + +2. hard trimming + In this case, only reads fed to fusioncatcher are trimmed. This is a harsh workaround in case of high read-through. The recommended trimming is thus the fastp_trim one. The trimming is done at 75 bp from the tails. Example usage: + +```bash +nextflow run nf-core/rnafusion \ +-- -- ... \ +--input \ +--genomes_base \ +--outdir \ +--trim +``` + +#### Filter fusions detected by 2 or more tools + +```bash +nextflow run nf-core/rnafusion \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --fusioninspector_filter + --fusionreport_filter +``` +`--fusioninspector_filter` feed only fusions detected by 2 or more tools to fusioninspector for closer analysis (false by default). +`--fusionreport_filter` displays only fusions detected by 2 or more tools in fusionreport html index (true by default). + +#### Adding custom fusions to consider as well as the detected set: whitelist + +```bash +nextflow run nf-core/rnafusion \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --whitelist +``` + +The custom fusion file should have the following format: + +``` +GENE1--GENE2 +GENE3--GENE4 +``` + +#### Running FusionInspector only + +FusionInspector can be run standalone with: + +```bash +nextflow run nf-core/rnafusion \ +--fusioninspector_only \ +--fusioninspector_fusions \ +--input \ +--outdir +``` + +The custom fusion file should have the following format: + +``` +GENE1--GENE2 +GENE3--GENE4 +``` + +#### Skipping QC + +```bash +nextflow run nf-core/rnafusion \ +--skip_qc \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all QC-related processes. + +#### Skipping visualisation + +```bash +nextflow run nf-core/rnafusion \ +--skip_vis \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all visualisation processes, including `fusion-report`, `FusionInspector` and `Arriba` visualisation. + +#### Optional manual feed-in of fusion files + +It is possible to give the output of each tool manually using the argument: `--_fusions PATH/TO/FUSION/FILE`: this feature need more testing, don't hesitate to open an issue if you encounter problems. + +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. The pipeline will detect whether a sample is single- or paired-end from the samplesheet - the `fastq_2` column is empty for single-end. The samplesheet has to be a comma-separated file (.csv) but can have as many columns as you desire. There is a strict requirement for the first 4 columns to match those defined in the table below with the header row included. A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,forward +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,forward +``` + +| Column | Description | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `strandedness` | Strandedness: forward or reverse. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +As you can see above for multiple runs of the same sample, the `sample` name has to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```bash +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,forward +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,forward +``` + ## Running the pipeline -The typical command for running the pipeline is as follows: +The typical command for running the pipeline is as follows. ```bash -nextflow run nf-core/rnafusion --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/rnafusion --input ./samplesheet.csv --outdir ./results -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -88,12 +316,20 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' <...> ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +### Options + +#### Set different `--limitSjdbInsertNsj` parameter + +There are two parameters to increase the `--limitSjdbInsertNsj` parameter if necessary: + +- `--fusioncatcher_limitSjdbInsertNsj`, default: 2000000 +- `--fusioninspector_limitSjdbInsertNsj`, default: 1000000 + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -102,6 +338,13 @@ When you run the above command, Nextflow automatically pulls the pipeline code f nextflow pull nf-core/rnafusion ``` +#### Compress to CRAM file + +Use the parameter `--cram` to compress the BAM files to CRAM for specific tools. Options: arriba, squid, starfusion. Leave no space between options: + +- `--cram arriba,squid,starfusion`, default: [] +- `--cram arriba` + ### Reproducibility It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. @@ -150,6 +393,11 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters + - Needs to run in two steps: with `--build_references` first and then without `--build_references` to run the analysis + - !!!! Run with `-stub` as all references need to be downloaded otherwise !!!! ### `-resume` @@ -187,7 +435,7 @@ In most cases, you will only need to create a custom config as a one-off but if See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). --> ## Azure Resource Requests diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index a8a3d0dc..6478740f 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -11,9 +11,8 @@ class WorkflowMain { // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.151721952\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/main.nf b/main.nf index af935a03..07b9c174 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,20 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.chrgtf = WorkflowMain.getGenomeAttribute(params, 'chrgtf') +params.transcript = WorkflowMain.getGenomeAttribute(params, 'transcript') +params.refflat = WorkflowMain.getGenomeAttribute(params, 'refflat') +params.rrna_intervals = WorkflowMain.getGenomeAttribute(params, 'rrna_intervals') + +/* +======================================================================================== + PARAMETER VALUES +======================================================================================== +*/ + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -49,13 +62,20 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RNAFUSION } from './workflows/rnafusion' +include { BUILD_REFERENCES } from './workflows/build_references' +include { RNAFUSION } from './workflows/rnafusion' + // // WORKFLOW: Run main nf-core/rnafusion analysis pipeline // workflow NFCORE_RNAFUSION { - RNAFUSION () + + if (params.build_references) { + BUILD_REFERENCES () + } else { + RNAFUSION() + } } /* diff --git a/modules.json b/modules.json index c8d72c8f..78b30658 100644 --- a/modules.json +++ b/modules.json @@ -5,19 +5,109 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "custom/dumpsoftwareversions": { + "arriba": { "branch": "master", "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", "installed_by": ["modules"] }, + "cat/cat": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "20a508676f40d0fd3f911ac595af91ec845704c4", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "kallisto/index": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", + "installed_by": ["modules"] + }, + "picard/collectwgsmetrics": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "2f88b26e9804b99e98f7cd08e74c3f88288a3358", + "installed_by": ["modules"] + }, + "qualimap/rnaseq": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "star/align": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "stringtie/merge": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "stringtie/stringtie": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] } } diff --git a/modules/local/arriba/download/main.nf b/modules/local/arriba/download/main.nf new file mode 100644 index 00000000..166ed69f --- /dev/null +++ b/modules/local/arriba/download/main.nf @@ -0,0 +1,41 @@ +process ARRIBA_DOWNLOAD { + tag "arriba" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + output: + path "versions.yml" , emit: versions + path "*" , emit: reference + + script: + """ + wget https://github.com/suhrig/arriba/releases/download/v2.3.0/arriba_v2.3.0.tar.gz -O arriba_v2.3.0.tar.gz + tar -xzvf arriba_v2.3.0.tar.gz + rm arriba_v2.3.0.tar.gz + mv arriba_v2.3.0/database/* . + rm -r arriba_v2.3.0 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + + stub: + """ + touch blacklist_hg38_GRCh38_v2.3.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.3.0.gff3 + touch cytobands_hg38_GRCh38_v2.3.0.tsv + touch known_fusions_hg38_GRCh38_v2.3.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.3.0.gff3 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ +} diff --git a/modules/local/arriba/download/meta.yml b/modules/local/arriba/download/meta.yml new file mode 100644 index 00000000..55e50b11 --- /dev/null +++ b/modules/local/arriba/download/meta.yml @@ -0,0 +1,26 @@ +name: arriba_download +description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. +keywords: + - fusion + - arriba +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Folder with arriba references + pattern: "*" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf new file mode 100644 index 00000000..b55666ca --- /dev/null +++ b/modules/local/arriba/visualisation/main.nf @@ -0,0 +1,53 @@ +process ARRIBA_VISUALISATION { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::arriba=2.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' : + 'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(fusions) + path gtf + path protein_domains + path cytobands + + output: + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def cytobands = cytobands ? " --cytobands=$cytobands" : "" + def prefix = task.ext.prefix ?: "${meta.id}" + def protein_domains = protein_domains ? "--proteinDomains=$protein_domains" : "" + """ + draw_fusions.R \\ + --fusions=$fusions \\ + --alignments=$bam \\ + --output=${prefix}.pdf \\ + --annotation=${gtf} \\ + $cytobands \\ + $protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pdf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/local/arriba/visualisation/meta.yml b/modules/local/arriba/visualisation/meta.yml new file mode 100644 index 00000000..a7418ca2 --- /dev/null +++ b/modules/local/arriba/visualisation/meta.yml @@ -0,0 +1,54 @@ +name: arriba_visualisation +description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. +keywords: + - visualisation + - arriba +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAMindex file + pattern: "*.{bai}" + - fusions: + type: file + description: Arriba fusions file + pattern: "*.{tsv}" + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pdf: + type: file + description: File contains fusions visualisation + pattern: "*.{pdf}" + +authors: + - "@rannick" diff --git a/modules/local/convert2bed/main.nf b/modules/local/convert2bed/main.nf new file mode 100644 index 00000000..c4313f0a --- /dev/null +++ b/modules/local/convert2bed/main.nf @@ -0,0 +1,36 @@ +process CONVERT2BED { + tag "$meta.id" + label 'process_single' + + conda "bioconda::bedops=2.4.41" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedops:2.4.41--h9f5acd7_0' : + 'quay.io/biocontainers/bedops:2.4.41--h9f5acd7_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*.bed") , emit: bed + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + convert2bed -i gtf < $gtf > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + cat <<-END_VERSIONS > versions.yml + "${task.process}": + convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') + END_VERSIONS + """ +} diff --git a/modules/local/convert2bed/meta.yml b/modules/local/convert2bed/meta.yml new file mode 100644 index 00000000..e578330b --- /dev/null +++ b/modules/local/convert2bed/meta.yml @@ -0,0 +1,30 @@ +name: +description: convert from GTF to BED format + - convert2bed +tools: + - convert2bed: + description: convert from GTF to BED format + homepage: https://pachterlab.github.io/kallisto/ + documentation: https://bedops.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/bedops/bedops + doi: "" + licence: ["GNU GENERAL PUBLIC LICENSE"] + +input: + - gtf: + type: file + description: Path to GTF file + pattern: "*.{gtf*}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: bed file + pattern: "*.bed" + +authors: + - "@rannick" diff --git a/modules/local/ensembl/main.nf b/modules/local/ensembl/main.nf new file mode 100644 index 00000000..d6d71e6d --- /dev/null +++ b/modules/local/ensembl/main.nf @@ -0,0 +1,54 @@ +process ENSEMBL_DOWNLOAD { + tag "ensembl" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + input: + val ensembl_version + + output: + path "versions.yml" , emit: versions + path "Homo_sapiens.${params.genome}.${ensembl_version}.all.fa" , emit: fasta + path "Homo_sapiens.${params.genome}.${ensembl_version}.gtf" , emit: gtf + path "Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf" , emit: chrgtf + path "Homo_sapiens.${params.genome}.${ensembl_version}.cdna.all.fa.gz", emit: transcript + + script: + """ + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${params.genome}.dna.chromosome.{1..22}.fa.gz + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${params.genome}.dna.chromosome.{MT,X,Y}.fa.gz + + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/cdna/Homo_sapiens.${params.genome}.cdna.all.fa.gz -O Homo_sapiens.${params.genome}.${ensembl_version}.cdna.all.fa.gz + + gunzip -c Homo_sapiens.${params.genome}.dna.chromosome.* > Homo_sapiens.${params.genome}.${ensembl_version}.all.fa + gunzip Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz + gunzip Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz + + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + + stub: + """ + touch "Homo_sapiens.${params.genome}.${ensembl_version}.all.fa" + touch "Homo_sapiens.${params.genome}.${ensembl_version}.gtf" + touch "Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf" + touch "Homo_sapiens.${params.genome}.${ensembl_version}.cdna.all.fa.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + +} diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf new file mode 100644 index 00000000..fa072bf4 --- /dev/null +++ b/modules/local/fusioncatcher/detect/main.nf @@ -0,0 +1,58 @@ +process FUSIONCATCHER { + tag "$meta.id" + label 'process_high' + + conda "bioconda::fusioncatcher=1.33" + container "docker.io/clinicalgenomics/fusioncatcher:1.33" + + input: + tuple val(meta), path(fasta) + path reference + + output: + tuple val(meta), path("*.fusioncatcher.fusion-genes.txt") , optional:true , emit: fusions + tuple val(meta), path("*.fusioncatcher.summary.txt") , optional:true , emit: summary + tuple val(meta), path("*.fusioncatcher.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads = fasta.toString().replace(" ", ",") + def single_end = meta.single_end ? "--single-end" : "" + """ + fusioncatcher.py \\ + -d $reference \\ + -i $reads \\ + -p $task.cpus \\ + -o . \\ + --skip-blat \\ + $single_end \\ + $args + + mv final-list_candidate-fusion-genes.txt ${prefix}.fusioncatcher.fusion-genes.txt + mv summary_candidate_fusions.txt ${prefix}.fusioncatcher.summary.txt + mv fusioncatcher.log ${prefix}.fusioncatcher.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)| sed 's/fusioncatcher.py //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.fusioncatcher.fusion-genes.txt + touch ${prefix}.fusioncatcher.summary.txt + touch ${prefix}.fusioncatcher.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)| sed 's/fusioncatcher.py //') + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/detect/meta.yml b/modules/local/fusioncatcher/detect/meta.yml new file mode 100644 index 00000000..7c8ee425 --- /dev/null +++ b/modules/local/fusioncatcher/detect/meta.yml @@ -0,0 +1,53 @@ +name: fusioncatcher +description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data +keywords: + - fusioncatcher +tools: + - fusioncatcher: + description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data + homepage: https://github.com/ndaniel/fusioncatcher + documentation: https://github.com/ndaniel/fusioncatcher/wiki + tool_dev_url: https://github.com/ndaniel/fusioncatcher + doi: "10.1101/011650v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq}" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fusions: + type: file + description: Final list of candidate fusion genes + pattern: "*.fusioncatcher.fusion-genes.txt" + - summary: + type: file + description: Summary of fusion results + pattern: "*.fusioncatcher_summary.txt" + - log: + type: file + description: Log of fusion results + pattern: "*.fusioncatcher.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@praveenraj2018. @rannick" diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf new file mode 100644 index 00000000..156e70b6 --- /dev/null +++ b/modules/local/fusioncatcher/download/main.nf @@ -0,0 +1,51 @@ +process FUSIONCATCHER_DOWNLOAD { + tag "fusioncatcher_download" + label 'process_medium' + + conda "bioconda::fusioncatcher=1.33" + container "docker.io/clinicalgenomics/fusioncatcher:1.33" + + output: + path "*" , emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def human_version = "v102" + def url = "http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.aa" + """ + if wget --spider "$url" 2>/dev/null; then + wget $args $url + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ab + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ac + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ad + cat human_${human_version}.tar.gz.* | tar xz + rm human_${human_version}.tar* + else + fusioncatcher-build \\ + -g homo_sapiens \\ + -o human_${human_version} \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ + + stub: + def human_version = "v102" + """ + mkdir human_${human_version} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/download/meta.yml b/modules/local/fusioncatcher/download/meta.yml new file mode 100644 index 00000000..40421a4e --- /dev/null +++ b/modules/local/fusioncatcher/download/meta.yml @@ -0,0 +1,25 @@ +name: fusioncatcher_download +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioninspector/main.nf b/modules/local/fusioninspector/main.nf new file mode 100644 index 00000000..6f59a590 --- /dev/null +++ b/modules/local/fusioninspector/main.nf @@ -0,0 +1,50 @@ +process FUSIONINSPECTOR { + tag "$meta.id" + label 'process_high' + + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::samtools=1.9 bioconda::star=2.7.8a" + container 'docker.io/trinityctat/starfusion:1.12.0' + + input: + tuple val(meta), path(reads), path(fusion_list) + path reference + + output: + tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv + path "*" , emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" + def args = task.ext.args ?: '' + """ + FusionInspector \\ + --fusions $fusion_list \\ + --genome_lib ${reference} \\ + $fasta \\ + --CPU ${task.cpus} \\ + -O . \\ + --out_prefix $prefix \\ + --vis $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + """ + touch FusionInspector.log + touch FusionInspector.fusions.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} diff --git a/modules/local/fusioninspector/meta.yml b/modules/local/fusioninspector/meta.yml new file mode 100644 index 00000000..cc03239b --- /dev/null +++ b/modules/local/fusioninspector/meta.yml @@ -0,0 +1,40 @@ +name: fusioninspector +description: Validation of Fusion Transcript Predictions +keywords: + - fusioninspector +tools: + - fusioninspector: + description: Validation of Fusion Transcript Predictions + homepage: https://github.com/FusionInspector/FusionInspector + documentation: https://github.com/FusionInspector/FusionInspector/wiki + tool_dev_url: https://github.com/FusionInspector/FusionInspector + doi: 10.1101/2021.08.02.454639" + licence: https://github.com/FusionInspector/FusionInspector/blob/master/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq*}" + - reference: + type: directory + description: Path to ctat references + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Genome resource path + pattern: "*" + +authors: + - "@rannick" diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf new file mode 100644 index 00000000..8c35be4a --- /dev/null +++ b/modules/local/fusionreport/detect/main.nf @@ -0,0 +1,65 @@ +process FUSIONREPORT { + tag "$meta.id" + label 'process_medium' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda "bioconda::star=2.7.9a" + container "docker.io/clinicalgenomics/fusion-report:2.1.5p2.1" + + + input: + tuple val(meta), path(reads), path(arriba_fusions), path(pizzly_fusions), path(squid_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) + path(fusionreport_ref) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*fusionreport.tsv") , emit: fusion_list + tuple val(meta), path("*fusionreport_filtered.tsv") , emit: fusion_list_filtered + tuple val(meta), path("index.html") , emit: report + tuple val(meta), path("*_*.html") , emit: html + tuple val(meta), path("*.csv") , optional:true, emit: csv + tuple val(meta), path("*.json") , optional:true, emit: json + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def tools = params.arriba || params.all ? "--arriba ${arriba_fusions} " : '' + tools += params.pizzly || params.all ? "--pizzly ${pizzly_fusions} " : '' + tools += params.squid || params.all ? "--squid ${squid_fusions} " : '' + tools += params.starfusion || params.all ? "--starfusion ${starfusion_fusions} " : '' + tools += params.fusioncatcher || params.all ? "--fusioncatcher ${fusioncatcher_fusions} " : '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + fusion_report run $meta.id . $fusionreport_ref $tools --allow-multiple-gene-symbols $args $args2 + + mv fusion_list.tsv ${prefix}.fusionreport.tsv + mv fusion_list_filtered.tsv ${prefix}.fusionreport_filtered.tsv + [ ! -f fusions.csv ] || mv fusions.csv ${prefix}.fusions.csv + [ ! -f fusions.json ] || mv fusions.json ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + fusion_report DB retrieval: \$(cat $fusionreport_ref/DB-timestamp.txt) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fusionreport_filtered.tsv + touch ${prefix}.fusionreport.tsv + touch index.html + touch AAA_BBB.html + touch ${prefix}.fusions.csv + touch ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ +} diff --git a/modules/local/fusionreport/detect/meta.yml b/modules/local/fusionreport/detect/meta.yml new file mode 100644 index 00000000..f3d5cd88 --- /dev/null +++ b/modules/local/fusionreport/detect/meta.yml @@ -0,0 +1,59 @@ +name: fusionreport +description: fusionreport +keywords: + - sort +tools: + - fusionreport: + description: fusionreport + homepage: https://github.com/matq007/fusion-report + documentation: https://matq007.github.io/fusion-report/#/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: path + description: Path to fusionreport references + pattern: "*" + - arriba_fusions: + type: path + description: File + pattern: "*.fusions.tsv" + - pizzly_fusions: + type: path + description: File containing fusions from pizzly + pattern: "*.pizzly.txt" + - squid_fusions: + type: path + description: File containing fusions from squid + pattern: "*.annotated.txt" + - starfusion_fusions: + type: path + description: File containing fusions from STARfusion + pattern: "*.starfusion.fusion_predictions.tsv" + - fusioncatcher_fusions: + type: path + description: File containing fusions from fusioncatcher + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusion_list: + type: file + description: File containing the summary of all fusions fed-in + pattern: "*.tsv" + - report: + type: file + description: HTML files + pattern: "*.html" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf new file mode 100644 index 00000000..5dca9b2a --- /dev/null +++ b/modules/local/fusionreport/download/main.nf @@ -0,0 +1,40 @@ +process FUSIONREPORT_DOWNLOAD { + tag 'fusionreport' + label 'process_medium' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda "bioconda::star=2.7.9a" + container "docker.io/clinicalgenomics/fusion-report:2.1.5p2.1" + + input: + val(username) + val(passwd) + + output: + path "*" , emit: reference + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + fusion_report download --cosmic_usr "$username" --cosmic_passwd "$passwd" $args ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ + + stub: + """ + touch cosmic.db + touch fusiongdb2.db + touch fusiongdb.db + touch mitelman.db + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ + +} diff --git a/modules/local/fusionreport/download/meta.yml b/modules/local/fusionreport/download/meta.yml new file mode 100644 index 00000000..21a15a89 --- /dev/null +++ b/modules/local/fusionreport/download/meta.yml @@ -0,0 +1,35 @@ +name: fusionreport_download +description: Build DB for fusionreport +keywords: + - sort +tools: + - fusioncatcher: + description: Build DB for fusionreport + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - username: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + - passwd: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: directory containing the genome resource files required for fusioncatcher + pattern: "fusioncatcher-genome" + +authors: + - "@praveenraj2018" diff --git a/modules/local/kallisto/quant/main.nf b/modules/local/kallisto/quant/main.nf new file mode 100644 index 00000000..7d3e5dfb --- /dev/null +++ b/modules/local/kallisto/quant/main.nf @@ -0,0 +1,47 @@ +process KALLISTO_QUANT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::kallisto=0.46.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : + 'quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1' }" + + + input: + tuple val(meta), path(reads) + path index + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*fusions.txt") , emit: txt + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kallisto quant \ + -t $task.cpus \ + -i $index \ + --fusion \ + -o . \ + $reads + mv fusion.txt ${prefix}.kallisto_quant.fusions.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.kallisto_quant.fusions.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ +} + diff --git a/modules/local/kallisto/quant/meta.yml b/modules/local/kallisto/quant/meta.yml new file mode 100644 index 00000000..31821aa6 --- /dev/null +++ b/modules/local/kallisto/quant/meta.yml @@ -0,0 +1,44 @@ +name: kallisto_quant +description: runs the kallisto quantification algorithm + - quant +tools: + - kallisto: + description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. + homepage: https://pachterlab.github.io/kallisto/ + documentation: https://pachterlab.github.io/kallisto/manual + tool_dev_url: https://github.com/pachterlab/kallisto + doi: "" + licence: ["BSD-2-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq}" + - reference: + type: directory + description: Path to kallisto index + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions: + type: file + description: fusions + pattern: "*.txt" + +authors: + - "@rannick" diff --git a/modules/local/megafusion/main.nf b/modules/local/megafusion/main.nf new file mode 100644 index 00000000..d8cb5db0 --- /dev/null +++ b/modules/local/megafusion/main.nf @@ -0,0 +1,41 @@ +process MEGAFUSION { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'quay.io/biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(tsv), path(report) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*vcf") , emit: vcf + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + megafusion.py --fusioninspector $tsv --fusionreport $report --sample ${prefix} --out ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/megafusion/meta.yml b/modules/local/megafusion/meta.yml new file mode 100644 index 00000000..a25cd74c --- /dev/null +++ b/modules/local/megafusion/meta.yml @@ -0,0 +1,39 @@ +name: megafusion +description: megafusion +keywords: + - sort +tools: + - fusionreport: + description: megafusion + homepage: Adapted from https://github.com/J35P312/MegaFusion + documentation: https://github.com/J35P312/MegaFusion + doi: "" + licence: [""] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: path + description: Path to FusionInspector tsv output + pattern: "*" + - report: + type: path + description: Path to fusionreport report + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: File containing the summary of all fusions as vcf file + pattern: "*.tsv" + +authors: + - "@rannick" diff --git a/modules/local/picard/collectrnaseqmetrics/main.nf b/modules/local/picard/collectrnaseqmetrics/main.nf new file mode 100644 index 00000000..f1b2fff5 --- /dev/null +++ b/modules/local/picard/collectrnaseqmetrics/main.nf @@ -0,0 +1,68 @@ +process PICARD_COLLECTRNASEQMETRICS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::picard=2.27.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + path(refflat) + path(rrna_intervals) + + output: + tuple val(meta), path("*rna_metrics.txt") , emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def strandedness = '' + // def strandedness = '--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND' + if ("${meta.strandedness}" == 'forward') { + strandedness = '--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND' + } else if ("${meta.strandedness}" == 'reverse') { + strandedness = '--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND' + } else { + strandedness = '--STRAND_SPECIFICITY NONE' + } + + def rrna = rrna_intervals == [] ? '' : "--RIBOSOMAL_INTERVALS ${rrna_intervals}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CollectRnaMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CollectRnaSeqMetrics \\ + --TMP_DIR ./tmp \\ + ${strandedness} \\ + ${rrna} \\ + --REF_FLAT ${refflat} \\ + --INPUT ${bam} \\ + --OUTPUT ${prefix}_rna_metrics.txt \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectRnaMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_rna_metrics.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectRnaMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/local/picard/collectrnaseqmetrics/meta.yml b/modules/local/picard/collectrnaseqmetrics/meta.yml new file mode 100644 index 00000000..0cae7d07 --- /dev/null +++ b/modules/local/picard/collectrnaseqmetrics/meta.yml @@ -0,0 +1,53 @@ +name: picard_collectrnaseqmetrics +description: Produces RNA alignment metrics for a SAM or BAM file. +keywords: + - alignment + - metrics + - statistics + - quality + - bam + - RNA +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - refflat: + type: file + description: Gene annotations in refFlat form + - rrna_intervals: + type: file + description: Location of rRNA sequences in genome, in interval_list format +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" diff --git a/modules/local/pizzly/detect/main.nf b/modules/local/pizzly/detect/main.nf new file mode 100644 index 00000000..9f10caf8 --- /dev/null +++ b/modules/local/pizzly/detect/main.nf @@ -0,0 +1,49 @@ +process PIZZLY { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::kallisto=0.46.2 bioconda::pizzly==0.37.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pizzly:0.37.3--py36_2' : + 'quay.io/biocontainers/pizzly:0.37.3--h470a237_3' }" + + input: + tuple val(meta), path(txt) + path transcript + path gtf + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*pizzly.txt") , emit: fusions + tuple val(meta), path("*unfiltered.json") , emit: fusions_unfiltered + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pizzly \\ + $args \\ + --gtf $gtf \\ + --fasta $transcript \\ + --output ${prefix}.pizzly $txt + + pizzly_flatten_json.py ${prefix}.pizzly.json ${prefix}.pizzly.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pizzly: \$(pizzly --version | grep pizzly | sed -e "s/pizzly version: //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pizzly.txt + touch ${prefix}.pizzly.unfiltered.json + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pizzly: \$(pizzly --version | grep pizzly | sed -e "s/pizzly version: //g") + END_VERSIONS + """ +} + diff --git a/modules/local/pizzly/detect/meta.yml b/modules/local/pizzly/detect/meta.yml new file mode 100644 index 00000000..930b3a62 --- /dev/null +++ b/modules/local/pizzly/detect/meta.yml @@ -0,0 +1,44 @@ +name: pizzly +description: Pizzly detection of fusions. +keywords: + - fusion + - pizzly +tools: + - pizzly: + description: Fast fusion detection using kallisto + homepage: https://github.com/pmelsted/pizzly + documentation: https://github.com/pmelsted/pizzly + tool_dev_url: https://github.com/pmelsted/pizzly + doi: "" + licence: ["BSD-2-Clause"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta*}" + - reference: + type: directory + description: Path to kallisto index + pattern: "*" + - gtf: + type: file + description: gtf reference + pattern: "*.gtf" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions: + type: file + description: fusions + pattern: "*pizzly.txt" + - unfiltered: + type: file + description: unfiltered fusions + pattern: "*unfiltered.json" + +authors: + - "@rannick" diff --git a/modules/local/pizzly/download/main.nf b/modules/local/pizzly/download/main.nf new file mode 100644 index 00000000..571cd4dc --- /dev/null +++ b/modules/local/pizzly/download/main.nf @@ -0,0 +1,40 @@ +process PIZZLY_DOWNLOAD { + tag "pizzly" + label 'process_medium' + + conda "bioconda::kallisto=0.46.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : + 'quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1' }" + + input: + path transcript + + output: + path "versions.yml" , emit: versions + path "index.idx" , emit: reference + + script: + def args = task.ext.args ?: '' + """ + kallisto index \\ + -i index.idx \\ + $args \\ + $transcript + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS """ + + stub: + """ + touch index.idx + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ + +} diff --git a/modules/local/reformat/main.nf b/modules/local/reformat/main.nf new file mode 100644 index 00000000..969b0e34 --- /dev/null +++ b/modules/local/reformat/main.nf @@ -0,0 +1,53 @@ +process REFORMAT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bbmap=38.90" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' : + 'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }" + + + input: + tuple val(meta), path(reads) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*trimmed.fq.gz") , emit: reads_out + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def in1 = "in=${reads[0]}" + def in2 = meta.single_end ? "" : "in=${reads[1]}" + def out1 ="out=${prefix}_R1_trimmed.fq.gz" + def out2 =meta.single_end ? "" : "out=${prefix}_R2_trimmed.fq.gz" + + """ + reformat.sh $in1 $out1 $args + reformat.sh $in2 $out2 $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + reformat.sh: \$(echo \$(reformat.sh --version 2>&1)| sed -e "s/BBMap version //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def out1 ="out=${prefix}_R1_trimmed.fq.gz" + def out2 =meta.single_end ? "" : "out=${prefix}_R2_trimmed.fq.gz" + """ + touch $out1 + touch $out2 + cat <<-END_VERSIONS > versions.yml + "${task.process}": + reformat.sh: \$(echo \$(reformat.sh --version 2>&1)| sed -e "s/BBMap version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/reformat/meta.yml b/modules/local/reformat/meta.yml new file mode 100644 index 00000000..86e0970a --- /dev/null +++ b/modules/local/reformat/meta.yml @@ -0,0 +1,40 @@ +name: fusionreport +description: fusionreport +keywords: + - sort +tools: + - fusionreport: + description: fusionreport + homepage: https://github.com/matq007/fusion-report + documentation: https://matq007.github.io/fusion-report/#/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq}*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: FASTQ file + pattern: "*.{fq.gz}" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/rrnatranscripts/main.nf b/modules/local/rrnatranscripts/main.nf new file mode 100644 index 00000000..e4afc284 --- /dev/null +++ b/modules/local/rrnatranscripts/main.nf @@ -0,0 +1,42 @@ +process RRNA_TRANSCRIPTS { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3" + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*rrna_intervals.gtf") , emit: rrna_gtf + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/rnafusion/bin/ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + get_rrna_transcripts.py $gtf ${prefix}_rrna_intervals.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_rrna_intervals.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 9226705d..9473ea1b 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -3,10 +3,12 @@ process SAMPLESHEET_CHECK { label 'process_single' conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'biocontainers/python:3.8.3' }" + input: path samplesheet diff --git a/modules/local/squid/annotate/main.nf b/modules/local/squid/annotate/main.nf new file mode 100644 index 00000000..19975b00 --- /dev/null +++ b/modules/local/squid/annotate/main.nf @@ -0,0 +1,41 @@ + +process SQUID_ANNOTATE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::squid=1.5" + container "docker.io/nfcore/rnafusion:squid_1.5-star2.7.1a" + + + + input: + tuple val(meta), path(txt) + path gtf + + output: + tuple val(meta), path("*annotated.txt") , emit: fusions_annotated + path "versions.yml" , emit: versions + + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + AnnotateSQUIDOutput.py $gtf $txt ${prefix}.squid.fusions.annotated.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.squid.fusions.annotated.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') + END_VERSIONS + """ +} diff --git a/modules/local/squid/annotate/meta.yml b/modules/local/squid/annotate/meta.yml new file mode 100644 index 00000000..e1a1f0d2 --- /dev/null +++ b/modules/local/squid/annotate/meta.yml @@ -0,0 +1,36 @@ +name: squid +description: Squid detection of fusions. +keywords: + - fusion + - pizzly +tools: + - pizzly: + description: Fusion detection using squid + homepage: https://github.com/Kingsford-Group/squid + documentation: https://github.com/Kingsford-Group/squid + tool_dev_url: https://github.com/Kingsford-Group/squid + doi: "" + licence: ["BSD-3-Clause"] + +input: + - fusions: + type: directory + description: Path to squid fusions + pattern: "*.txt" + - gtf: + type: file + description: gtf reference + pattern: "*.gtf" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions_annotated: + type: file + description: squid fusions annotated + pattern: "*squid.fusions.annotated.txt" + +authors: + - "@rannick" diff --git a/modules/local/squid/detect/main.nf b/modules/local/squid/detect/main.nf new file mode 100644 index 00000000..3ccb6e3e --- /dev/null +++ b/modules/local/squid/detect/main.nf @@ -0,0 +1,40 @@ + +process SQUID { + tag "squid" + label 'process_medium' + + conda "bioconda::squid=1.5" + container "docker.io/nfcore/rnafusion:squid_1.5-star2.7.1a" + + + + input: + tuple val(meta), path(bam), path(chimeric_bam) + + output: + tuple val(meta), path("*sv.txt") , emit: fusions + path "versions.yml" , emit: versions + + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + squid -b $bam -c $chimeric_bam -o ${prefix}.squid.fusions + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.squid.fusions_sv.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + squid: \$(echo \$(squid --version 2>&1) | sed 's/v//') + END_VERSIONS + """ +} diff --git a/modules/local/squid/detect/meta.yml b/modules/local/squid/detect/meta.yml new file mode 100644 index 00000000..a7f1e61a --- /dev/null +++ b/modules/local/squid/detect/meta.yml @@ -0,0 +1,41 @@ +name: squid +description: Squid detection of fusions. +keywords: + - fusion + - pizzly +tools: + - pizzly: + description: Fusion detection using squid + homepage: https://github.com/Kingsford-Group/squid + documentation: https://github.com/Kingsford-Group/squid + tool_dev_url: https://github.com/Kingsford-Group/squid + doi: "" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - chimeric_bam: + type: file + description: BAM/CRAM/SAM file containing only chimeric sorted reads + pattern: "*.{bam,cram,sam}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions: + type: directory + description: Path to squid fusions + pattern: "*.txt" + +authors: + - "@rannick" diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf new file mode 100644 index 00000000..da5b0c3b --- /dev/null +++ b/modules/local/starfusion/build/main.nf @@ -0,0 +1,54 @@ +process STARFUSION_BUILD { + tag 'star-fusion' + + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::trinity=2.13.2 bioconda::samtools=1.9 bioconda::star=2.7.8a" + container "docker.io/trinityctat/starfusion:1.12.0" + + input: + path fasta + path gtf + + output: + path "*" , emit: reference + + script: + def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/usr/local/src/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl" + """ + export TMPDIR=/tmp + wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam34.0/Pfam-A.hmm.gz --no-check-certificate + wget https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz -O CTAT_HumanFusionLib_Mar2021.dat.gz --no-check-certificate + wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate + wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm --no-check-certificate + wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3f --no-check-certificate + wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3i --no-check-certificate + wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate + wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate + gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm + $binPath \\ + --genome_fa $fasta \\ + --gtf $gtf \\ + --annot_filter_rule AnnotFilterRule.pm \\ + --fusion_annot_lib CTAT_HumanFusionLib_Mar2021.dat.gz \\ + --pfam_db Pfam-A.hmm \\ + --dfam_db homo_sapiens_dfam.hmm \\ + --max_readlength $params.read_length \\ + --CPU $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + """ + mkdir ctat_genome_lib_build_dir + touch ref_annot.cdna.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + +} diff --git a/modules/local/starfusion/build/meta.yml b/modules/local/starfusion/build/meta.yml new file mode 100644 index 00000000..c87b251b --- /dev/null +++ b/modules/local/starfusion/build/meta.yml @@ -0,0 +1,31 @@ +name: starfusion_downloadgenome +description: Download STAR-fusion genome resource required to run STAR-Fusion caller +keywords: + - downoad +tools: + - star-fusion: + description: Fusion calling algorithm for RNAseq data + homepage: https://github.com/STAR-Fusion/ + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion + doi: "10.1186/s13059-019-1842-9" + licence: ["GPL v3"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta}" + - gtf: + type: file + description: genome gtf file + pattern: "*.{gtf}" + +output: + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/detect/main.nf b/modules/local/starfusion/detect/main.nf new file mode 100644 index 00000000..fba671b6 --- /dev/null +++ b/modules/local/starfusion/detect/main.nf @@ -0,0 +1,55 @@ +process STARFUSION { + tag "$meta.id" + label 'process_high' + + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::=2.13.2 bioconda::samtools=1.9 bioconda::star=2.7.8a" + container 'docker.io/trinityctat/starfusion:1.12.0' + + input: + tuple val(meta), path(reads), path(junction) + path reference + + output: + tuple val(meta), path("*.fusion_predictions.tsv") , emit: fusions + tuple val(meta), path("*.abridged.tsv") , emit: abridged + tuple val(meta), path("*.coding_effect.tsv") , optional: true , emit: coding_effect + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" + def args = task.ext.args ?: '' + """ + STAR-Fusion \\ + --genome_lib_dir $reference \\ + $fasta \\ + -J $junction \\ + --CPU $task.cpus \\ + --examine_coding_effect \\ + --output_dir . \\ + $args + + mv star-fusion.fusion_predictions.tsv ${prefix}.starfusion.fusion_predictions.tsv + mv star-fusion.fusion_predictions.abridged.tsv ${prefix}.starfusion.abridged.tsv + mv star-fusion.fusion_predictions.abridged.coding_effect.tsv ${prefix}.starfusion.abridged.coding_effect.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.starfusion.fusion_predictions.tsv + touch ${prefix}.starfusion.abridged.tsv + touch ${prefix}.starfusion.abridged.coding_effect.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} + + diff --git a/modules/local/starfusion/detect/meta.yml b/modules/local/starfusion/detect/meta.yml new file mode 100644 index 00000000..7337dad5 --- /dev/null +++ b/modules/local/starfusion/detect/meta.yml @@ -0,0 +1,56 @@ +name: starfusion +description: Fast and Accurate Fusion Transcript Detection from RNA-Seq +keywords: + - Fusion +tools: + - star-fusion: + description: Fast and Accurate Fusion Transcript Detection from RNA-Seq + homepage: https://github.com/STAR-Fusion/STAR-Fusion + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion/releases + doi: "10.1101/120295v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome_lib: + type: path + description: STAR-fusion reference genome lib folder + - junction: + type: file + description: Chimeric junction output from STAR aligner + pattern: "*.{out.junction}" + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{versions.yml}" + - fusions: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion_predictions.tsv}" + - abridged: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion.abridged.tsv}" + - coding_effect: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{coding_effect.tsv}" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf new file mode 100644 index 00000000..9c9b1482 --- /dev/null +++ b/modules/local/starfusion/download/main.nf @@ -0,0 +1,32 @@ +process STARFUSION_DOWNLOAD { + tag 'star-fusion' + + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.12.0 bioconda::trinity=2.13.2 bioconda::samtools=1.9 bioconda::star=2.7.8a" + container 'docker.io/trinityctat/starfusion:1.12.0' + + output: + path "ctat_genome_lib_build_dir/*" , emit: reference + path "ctat_genome_lib_build_dir/ref_annot.gtf", emit: chrgtf + + + script: + """ + wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz --no-check-certificate + + tar xvf GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz + + rm GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz + + mv */ctat_genome_lib_build_dir . + """ + + stub: + """ + mkdir ctat_genome_lib_build_dir + touch ref_annot.cdna.fa + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} diff --git a/modules/local/starfusion/download/meta.yml b/modules/local/starfusion/download/meta.yml new file mode 100644 index 00000000..24e84252 --- /dev/null +++ b/modules/local/starfusion/download/meta.yml @@ -0,0 +1,25 @@ +name: starfusion_downloadgenome +description: Download STAR-fusion genome resource required to run STAR-Fusion caller +keywords: + - downoad +tools: + - star-fusion: + description: Fusion calling algorithm for RNAseq data + homepage: https://github.com/STAR-Fusion/ + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion + doi: "10.1186/s13059-019-1842-9" + licence: ["GPL v3"] + +output: + - reference: + type: directory + description: Genome resource path + pattern: "star-fusion-genome" + - gtf: + type: file + description: genome gtf file + pattern: "*.{gtf}" + +authors: + - "@praveenraj2018,@rannick" diff --git a/modules/local/uscs/custom_gtftogenepred/main.nf b/modules/local/uscs/custom_gtftogenepred/main.nf new file mode 100644 index 00000000..46052b5a --- /dev/null +++ b/modules/local/uscs/custom_gtftogenepred/main.nf @@ -0,0 +1,37 @@ +process GTF_TO_REFFLAT { + label 'process_low' + + conda "bioconda::ucsc-gtftogenepred=377" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:377--ha8a8165_5' : + 'quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5' }" + + input: + path gtf + + output: + path('*.refflat'), emit: refflat + + script: + def genepred = gtf + '.genepred' + def refflat = gtf + '.refflat' + """ + gtfToGenePred -genePredExt -geneNameAsName2 ${gtf} ${genepred} + paste ${genepred} ${genepred} | cut -f12,16-25 > ${refflat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ + + stub: + def refflat = gtf + '.refflat' + """ + touch ${refflat} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ +} diff --git a/modules/local/uscs/custom_gtftogenepred/meta.yml b/modules/local/uscs/custom_gtftogenepred/meta.yml new file mode 100644 index 00000000..09711f43 --- /dev/null +++ b/modules/local/uscs/custom_gtftogenepred/meta.yml @@ -0,0 +1,34 @@ +name: gtf_to_refflat +description: generate gene annotations in refFlat form + - gtftorefflat +tools: + - gtf_to_refflat: + description: generate gene annotations in refFlat form + homepage: https://pachterlab.github.io/kallisto/ + documentation: https://pachterlab.github.io/kallisto/manual + tool_dev_url: https://github.com/pachterlab/kallisto + doi: "" + licence: ["BSD-2-Clause"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta*}" + - reference: + type: directory + description: Path to kallisto index + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions: + type: file + description: fusions + pattern: "*.txt" + +authors: + - "@rannick" diff --git a/modules/nf-core/arriba/main.nf b/modules/nf-core/arriba/main.nf new file mode 100644 index 00000000..e4b48be2 --- /dev/null +++ b/modules/nf-core/arriba/main.nf @@ -0,0 +1,66 @@ +process ARRIBA { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::arriba=2.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' : + 'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }" + + input: + tuple val(meta), path(bam) + path fasta + path gtf + path blacklist + path known_fusions + path structural_variants + path tags + path protein_domains + + output: + tuple val(meta), path("*.fusions.tsv") , emit: fusions + tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def blacklist = blacklist ? "-b $blacklist" : "-f blacklist" + def known_fusions = known_fusions ? "-k $known_fusions" : "" + def structural_variants = structural_variants ? "-d $structual_variants" : "" + def tags = tags ? "-t $tags" : "" + def protein_domains = protein_domains ? "-p $protein_domains" : "" + + """ + arriba \\ + -x $bam \\ + -a $fasta \\ + -g $gtf \\ + -o ${prefix}.fusions.tsv \\ + -O ${prefix}.fusions.discarded.tsv \\ + $blacklist \\ + $known_fusions \\ + $structural_variants \\ + $tags \\ + $protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub > ${prefix}.fusions.tsv + echo stub > ${prefix}.fusions.discarded.tsv + + echo "${task.process}:" > versions.yml + echo ' arriba: 2.2.1' >> versions.yml + """ +} diff --git a/modules/nf-core/arriba/meta.yml b/modules/nf-core/arriba/meta.yml new file mode 100644 index 00000000..119dd912 --- /dev/null +++ b/modules/nf-core/arriba/meta.yml @@ -0,0 +1,74 @@ +name: arriba +description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. +keywords: + - fusion + - arriba +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Assembly FASTA file + pattern: "*.{fasta}" + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - blacklist: + type: file + description: Blacklist file + pattern: "*.{tsv}" + - known_fusions: + type: file + description: Known fusions file + pattern: "*.{tsv}" + - structural_variants: + type: file + description: Structural variants file + pattern: "*.{tsv}" + - tags: + type: file + description: Tags file + pattern: "*.{tsv}" + - protein_domains: + type: file + description: Protein domains file + pattern: "*.{gff3}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusions: + type: file + description: File contains fusions which pass all of Arriba's filters. + pattern: "*.{fusions.tsv}" + - fusions_fail: + type: file + description: File contains fusions that Arriba classified as an artifact or that are also observed in healthy tissue. + pattern: "*.{fusions.discarded.tsv}" + +authors: + - "@praveenraj2018,@rannick" diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..840af4b9 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,62 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::pigz=2.3.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'quay.io/biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..8acc0bfa --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,37 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..8a0b5600 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,80 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..c836598e --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,39 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d43..da033408 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..5eeb9b09 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,102 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastp=0.23.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : + 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..197ea7ca --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,73 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 00000000..41fab003 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,55 @@ +process GATK4_BEDTOINTERVALLIST { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(bed) + path dict + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 00000000..986f1592 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,40 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" +output: + - interval_list: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..bc324ada --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,51 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_medium' + + conda "bioconda::gatk4=4.3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }" + + input: + path fasta + + output: + path "*.dict" , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..69c23581 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,32 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - dictionary + - fasta +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/modules/nf-core/kallisto/index/main.nf b/modules/nf-core/kallisto/index/main.nf new file mode 100644 index 00000000..a24024b4 --- /dev/null +++ b/modules/nf-core/kallisto/index/main.nf @@ -0,0 +1,44 @@ +process KALLISTO_INDEX { + tag "$fasta" + label 'process_medium' + + conda "bioconda::kallisto=0.46.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kallisto:0.46.2--h4f7b962_1' : + 'quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1' }" + + input: + path fasta + + output: + path "kallisto" , emit: idx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + kallisto \\ + index \\ + $args \\ + -i kallisto \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ + + stub: + """ + touch kallisto + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/kallisto/index/meta.yml b/modules/nf-core/kallisto/index/meta.yml new file mode 100644 index 00000000..47f40c9a --- /dev/null +++ b/modules/nf-core/kallisto/index/meta.yml @@ -0,0 +1,31 @@ +name: kallisto_index +description: Create kallisto index +keywords: + - index +tools: + - kallisto: + description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. + homepage: https://pachterlab.github.io/kallisto/ + documentation: https://pachterlab.github.io/kallisto/manual + tool_dev_url: https://github.com/pachterlab/kallisto + + licence: ["BSD-2-Clause"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - idx: + type: index + description: Kallisto genome index + pattern: "*.idx" + +authors: + - "@ggabernet" diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf new file mode 100644 index 00000000..827dfb23 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -0,0 +1,60 @@ +process PICARD_COLLECTWGSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::picard=3.0.0 r::r-base" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta2), path(fai) + path intervallist + + output: + tuple val(meta), path("*_metrics"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + def interval = intervallist ? "--INTERVALS ${intervallist}" : '' + if (!task.memory) { + log.info '[Picard CollectWgsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CollectWgsMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectWgsMetrics.coverage_metrics \\ + --REFERENCE_SEQUENCE ${fasta} \\ + $interval + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectWgsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml new file mode 100644 index 00000000..2f8dbd3c --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -0,0 +1,69 @@ +name: picard_collectwgsmetrics +description: Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments. +keywords: + - alignment + - metrics + - statistics + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Aligned reads file + pattern: "*.{bam, cram}" + - bai: + type: file + description: (Optional) Aligned reads file index + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta,fna}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome fasta file index + pattern: "*.{fai}" + - intervallist: + type: file + description: Picard Interval List. Defines which contigs to include. Can be generated from a BED file with GATK BedToIntervalList. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf new file mode 100644 index 00000000..be243a95 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -0,0 +1,61 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam) + path fasta + path fai + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.metrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + MarkDuplicates \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --REFERENCE_SEQUENCE $fasta \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml new file mode 100644 index 00000000..3f2357bb --- /dev/null +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -0,0 +1,60 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - metrics: + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@projectoriented" diff --git a/modules/nf-core/qualimap/rnaseq/main.nf b/modules/nf-core/qualimap/rnaseq/main.nf new file mode 100644 index 00000000..ad15ebc2 --- /dev/null +++ b/modules/nf-core/qualimap/rnaseq/main.nf @@ -0,0 +1,63 @@ +process QUALIMAP_RNASEQ { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::qualimap=2.2.2d" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : + 'quay.io/biocontainers/qualimap:2.2.2d--1' }" + + input: + tuple val(meta), path(bam) + path gtf + + output: + tuple val(meta), path("${prefix}"), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + def memory = task.memory.toGiga() + "G" + + def strandedness = 'non-strand-specific' + if (meta.strandedness == 'forward') { + strandedness = 'strand-specific-forward' + } else if (meta.strandedness == 'reverse') { + strandedness = 'strand-specific-reverse' + } + """ + unset DISPLAY + mkdir tmp + export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp + qualimap \\ + --java-mem-size=$memory \\ + rnaseq \\ + $args \\ + -bam $bam \\ + -gtf $gtf \\ + -p $strandedness \\ + $paired_end \\ + -outdir $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..ce6580d2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.fai"), emit: fai + tuple val(meta), path ("*.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..fe2fe9a1 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,47 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..8b95687a --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..8bd2fa6f --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..84c167cd --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,43 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..07328431 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,48 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" +authors: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..729c85e5 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,66 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(input), path(index) + path fasta + path qname + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def readnames = qname ? "--qname-file ${qname}": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..2e597d34 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,79 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: optional file + description: BAM.BAI/CRAM.CRAI file + pattern: "*.{.bai,.crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + # bai, csi, and crai are created with `--write-index` + - bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf new file mode 100644 index 00000000..0e3bd713 --- /dev/null +++ b/modules/nf-core/star/align/main.nf @@ -0,0 +1,97 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + + input: + tuple val(meta), path(reads) + path index + path gtf + val star_ignore_sjdbgtf + val seq_platform + val seq_center + + output: + tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted + tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix' $seq_platform " : "--outSAMattrRGline ID:$prefix 'SM:$prefix' $seq_platform " + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn $reads \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $ignore_gtf \\ + $seq_center \\ + $args + + $mv_unsorted_bam + + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + touch ${prefix}.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml new file mode 100644 index 00000000..7ee10f1c --- /dev/null +++ b/modules/nf-core/star/align/meta.yml @@ -0,0 +1,81 @@ +name: star_align +description: Align reads to a reference genome using STAR +keywords: + - align + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: directory + description: STAR genome index + pattern: "star" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - log_final: + type: file + description: STAR final log file + pattern: "*Log.final.out" + - log_out: + type: file + description: STAR lot out file + pattern: "*Log.out" + - log_progress: + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam_sorted: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - bam_transcript: + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - bam_unsorted: + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - fastq: + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - tab: + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - junction: + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + +authors: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 00000000..91462489 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,96 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + + input: + path fasta + path gtf + + output: + path "star" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 00000000..8181157a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,37 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - fasta: + type: file + description: Fasta file of the reference genome + - gtf: + type: file + description: GTF file of the reference genome + +output: + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/stringtie/merge/main.nf b/modules/nf-core/stringtie/merge/main.nf new file mode 100644 index 00000000..f1635afc --- /dev/null +++ b/modules/nf-core/stringtie/merge/main.nf @@ -0,0 +1,46 @@ +process STRINGTIE_MERGE { + label 'process_medium' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda "bioconda::stringtie=2.2.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : + 'quay.io/biocontainers/stringtie:2.2.1--hecb563c_2' }" + + input: + path stringtie_gtf + path annotation_gtf + + output: + path "stringtie.merged.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + """ + stringtie \\ + --merge $stringtie_gtf \\ + $reference \\ + -o stringtie.merged.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch stringtie.merged.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/merge/meta.yml b/modules/nf-core/stringtie/merge/meta.yml new file mode 100644 index 00000000..2e9784fe --- /dev/null +++ b/modules/nf-core/stringtie/merge/meta.yml @@ -0,0 +1,37 @@ +name: stringtie_merge +description: Merges the annotation gtf file and the stringtie output gtf files +keywords: + - merge + - gtf + - reference +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] +input: + - stringtie_gtf: + type: file + description: | + Stringtie transcript gtf output(s). + pattern: "*.gtf" + - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). + pattern: "*.gtf" +output: + - merged_gtf: + type: map + description: | + Merged gtf from annotation and stringtie output gtfs. + pattern: "*.gtf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@yuukiiwa" diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf new file mode 100644 index 00000000..2d5b035f --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -0,0 +1,68 @@ +process STRINGTIE_STRINGTIE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::stringtie=2.2.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : + 'quay.io/biocontainers/stringtie:2.2.1--hecb563c_2' }" + + input: + tuple val(meta), path(bam) + path annotation_gtf + + output: + tuple val(meta), path("*.transcripts.gtf"), emit: transcript_gtf + tuple val(meta), path("*.abundance.txt") , emit: abundance + tuple val(meta), path("*.coverage.gtf") , optional: true, emit: coverage_gtf + tuple val(meta), path("*.ballgown") , optional: true, emit: ballgown + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + def ballgown = annotation_gtf ? "-b ${prefix}.ballgown" : "" + def coverage = annotation_gtf ? "-C ${prefix}.coverage.gtf" : "" + + def strandedness = '' + if (meta.strandedness == 'forward') { + strandedness = '--fr' + } else if (meta.strandedness == 'reverse') { + strandedness = '--rf' + } + """ + stringtie \\ + $bam \\ + $strandedness \\ + $reference \\ + -o ${prefix}.transcripts.gtf \\ + -A ${prefix}.gene.abundance.txt \\ + $coverage \\ + $ballgown \\ + -p $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.transcripts.gtf + touch ${prefix}.gene.abundance.txt + touch ${prefix}.coverage.gtf + touch ${prefix}.ballgown + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/stringtie/meta.yml b/modules/nf-core/stringtie/stringtie/meta.yml new file mode 100644 index 00000000..75518470 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/meta.yml @@ -0,0 +1,57 @@ +name: stringtie_stringtie +description: Transcript assembly and quantification for RNA-Se +keywords: + - transcript + - assembly + - quantification + - gtf + +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + Stringtie transcript gtf output(s). + - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - transcript_gtf: + type: file + description: transcript gtf + pattern: "*.{transcripts.gtf}" + - coverage_gtf: + type: file + description: coverage gtf + pattern: "*.{coverage.gtf}" + - abudance: + type: file + description: abudance + pattern: "*.{abudance.txt}" + - ballgown: + type: file + description: for running ballgown + pattern: "*.{ballgown}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 2c39b442..3f1432be 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,14 +9,12 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - + input = "fake_input_to_build_refs.csv" + build_references = false + cosmic_username = null + cosmic_passwd = null + qiagen = false // MultiQC options multiqc_config = null @@ -25,6 +23,74 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // Genome + genome = 'GRCh38' + genomes_base = "${params.outdir}/references" + ensembl_version = 102 + read_length = 100 + genomes = [:] + starfusion_build = true + + // Filtering + fusioninspector_filter = false + fusionreport_filter = true + + // Trimming + trim = false + fastp_trim = false + trim_tail = null + adapter_fasta = [] + + // Compression + cram = [] + + // Alignment options + star_ignore_sjdbgtf = false + seq_center = null + seq_platform = null + fusioncatcher_limitSjdbInsertNsj = 2000000 + fusioninspector_limitSjdbInsertNsj = 1000000 + + // Enable or disable tools + all = false + arriba = false + fusioncatcher = false + pizzly = false + squid = false + starindex = false + starfusion = false + stringtie = false + fusionreport = false + fusioninspector_only = false + + // Skip steps + skip_qc = false + skip_vis = false + + // Path to references + ensembl_ref = "${params.genomes_base}/ensembl" + arriba_ref = "${params.genomes_base}/arriba" + arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.3.0.tsv.gz" + arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.3.0.tsv" + arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.3.0.tsv.gz" + arriba_ref_protein_domain = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.3.0.gff3" + fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v102" + pizzly_ref = "${params.genomes_base}/pizzly/kallisto" + squid_ref = "${params.genomes_base}/squid" + starfusion_ref = "${params.genomes_base}/starfusion/ctat_genome_lib_build_dir" + starindex_ref = "${params.genomes_base}/star" + fusionreport_ref = "${params.genomes_base}/fusion_report_db" + + + // Path to fusion outputs + arriba_fusions = null + pizzly_fusions = null + squid_fusions = null + starfusion_fusions = null + fusioncatcher_fusions = null + fusioninspector_fusions = null + whitelist = null + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -43,7 +109,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -51,18 +117,20 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false validationSchemaIgnoreParams = 'genomes' validationShowHiddenParams = false validate_params = true - } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +includeConfig 'conf/genomes.config' + // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" @@ -161,13 +229,18 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + test { + includeConfig 'conf/test.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + gitpod { executor.name = 'local' executor.cpus = 16 executor.memory = 60.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -189,6 +262,7 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -227,8 +301,13 @@ manifest { homePage = 'https://github.com/nf-core/rnafusion' description = """Nextflow rnafusion analysis pipeline, part of the nf-core community.""" mainScript = 'main.nf' +<<<<<<< HEAD nextflowVersion = '!>=23.04.0' version = '2.3.4' +======= + nextflowVersion = '!>=22.10.1' + version = '2.3.4' +>>>>>>> dev doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 6a47c09a..195ba8d3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,12 +5,29 @@ "description": "Nextflow rnafusion analysis pipeline, part of the nf-core community.", "type": "object", "definitions": { + "skip_steps": { + "title": "Skip steps", + "type": "object", + "description": "Skip analysis steps", + "default": "", + "properties": { + "skip_qc": { + "type": "boolean", + "description": "Skip QC steps" + }, + "skip_vis": { + "type": "boolean", + "description": "Skip visualisation steps" + } + }, + "fa_icon": "fas fa-fast-forward" + }, "input_output_options": { "title": "Input/output options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["genomes_base", "outdir"], "properties": { "input": { "type": "string", @@ -39,6 +56,255 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "build_references": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Specifies which analysis type for the pipeline - either build references or analyse data" + }, + "genomes_base": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to reference folder" + }, + "ensembl_version": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "ensembl version", + "default": 105 + }, + "starfusion_build": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "If set, starfusion references are built from scratch instead of downloaded (default)" + }, + "read_length": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Read length", + "default": 100 + }, + "all": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run all references/analyses" + }, + "arriba": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run arriba references/analyses" + }, + "arriba_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba references" + }, + "arriba_ref_blacklist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference blacklist" + }, + "arriba_ref_cytobands": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference cytobands" + }, + "arriba_ref_known_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference known fusions" + }, + "arriba_ref_protein_domain": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference protein domain" + }, + "arriba_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba output" + }, + "ensembl_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to ensembl references" + }, + "fusioncatcher": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run fusioncatcher references/analyses" + }, + "fusioncatcher_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher output" + }, + "fusioncatcher_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioncatcher" + }, + "fusioncatcher_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher references" + }, + "fusioninspector_filter": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Feed filtered fusionreport fusions to fusioninspector" + }, + "fusioninspector_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioninspector STAR process" + }, + "fusioninspector_only": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Skip fusion-report. --fusioninspector_fusions PATH needed to provide a fusion list as input" + }, + "fusioninspector_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to a fusion list file built with format GENE1--GENE2" + }, + "fusionreport": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build fusionreport references" + }, + "fusionreport_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusionreport references" + }, + "fusionreport_filter": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "default": true, + "description": "Display fusions identified with 2 tools or more" + }, + "pizzly": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run pizzly references/analyses" + }, + "pizzly_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to pizzly output" + }, + "pizzly_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to pizzly references" + }, + "squid": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run squid references/analyses" + }, + "squid_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to squid output" + }, + "squid_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to squid references" + }, + "starfusion": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starfusion references/analyses" + }, + "starfusion_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion output" + }, + "starfusion_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion references" + }, + "starindex": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starindex references/analyses" + }, + "starindex_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starindex references" + }, + "stringtie": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Run stringtie analysis" + }, + "whitelist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusions to add to the input of fusioninspector" + }, + "cosmic_username": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC username" + }, + "cosmic_passwd": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC password" + }, + "qiagen": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Use QIAGEN instead of SANGER to download COSMIC database" + } + } + }, + "read_trimming_options": { + "title": "Read trimming options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust read trimming criteria.", + "properties": { + "trim": { + "type": "boolean", + "description": "Preform trimming of reads, default: false", + "fa_icon": "fas fa-cut" + }, + "fastp_trim": { + "type": "boolean", + "description": "Preform fastp trimming of reads, default: false", + "fa_icon": "fas fa-cut" + }, + "trim_tail": { + "type": "integer", + "description": "Preform tail trimming of reads, default: null", + "fa_icon": "fas fa-cut" + }, + "adapter_fasta": { + "type": "string", + "description": "Path to adapter fasta file: default: []", + "fa_icon": "fas fa-cut" + } + } + }, + "compression_options": { + "title": "Alignment compression options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Option to compress BAM files to CRAM.", + "properties": { + "cram": { + "type": "string", + "description": "List of tools for which to compress BAM file to CRAM,default: [], options: arriba, squid, starfusion. Leave no space between options", + "fa_icon": "fas fa-cut" } } }, @@ -51,8 +317,7 @@ "genome": { "type": "string", "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "fa_icon": "fas fa-book" }, "fasta": { "type": "string", @@ -61,23 +326,55 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, - "igenomes_base": { + "fai": { "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?ai(\\.gz)?$", + "description": "Path to FASTA genome index file.", + "fa_icon": "far fa-file-code" }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "gtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf?(\\.gz)?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "chrgtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf?(\\.gz)?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "transcript": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "refflat": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.refflat?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "rrna_intervals": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.interval_list?$", + "description": "Path to ribosomal interval list.", + "fa_icon": "far fa-file-code" } } }, @@ -279,14 +576,44 @@ "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "seq_center": { + "type": "string", + "description": "Sequencing center", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as CN" + }, + "seq_platform": { + "type": "string", + "description": "Sequencing platform", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as PL." + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "description": "Whether to ignore the GTF in STAR alignment", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "Setting false will use GTF file for STAR alignment" } } } }, "allOf": [ + { + "$ref": "#/definitions/skip_steps" + }, { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/read_trimming_options" + }, + { + "$ref": "#/definitions/compression_options" + }, { "$ref": "#/definitions/reference_genome_options" }, diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf new file mode 100644 index 00000000..0712f787 --- /dev/null +++ b/subworkflows/local/arriba_workflow.nf @@ -0,0 +1,65 @@ +include { ARRIBA } from '../../modules/nf-core/arriba/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA} from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA} from '../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' + +workflow ARRIBA_WORKFLOW { + take: + reads + ch_gtf + ch_fasta + ch_starindex_ref + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) + + if ((params.arriba || params.all) && !params.fusioninspector_only) { + + STAR_FOR_ARRIBA( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) + + SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions) + + SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions) + + bam_indexed = SAMTOOLS_SORT_FOR_ARRIBA.out.bam.join(SAMTOOLS_INDEX_FOR_ARRIBA.out.bai) + + if (params.arriba_fusions) { + // [meta, reads], fusions -> [meta, fusions] + ch_arriba_fusions = reads.combine( Channel.value( file( params.arriba_fusions, checkIfExists: true ) ) ) + .map { meta, reads, fusions -> [ meta, fusions ] } + ch_arriba_fusion_fail = ch_dummy_file + } else { + ARRIBA ( STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, params.arriba_ref_blacklist, params.arriba_ref_known_fusions, [], [], params.arriba_ref_protein_domain ) + ch_versions = ch_versions.mix(ARRIBA.out.versions) + + ch_arriba_fusions = ARRIBA.out.fusions + ch_arriba_fusion_fail = ARRIBA.out.fusions_fail.map{ meta, file -> return file} + } + + if (params.cram.contains('arriba') ){ + SAMTOOLS_VIEW_FOR_ARRIBA(bam_indexed, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) + + } + + + + } + else { + ch_arriba_fusions = reads.combine(Channel.value( file(ch_dummy_file, checkIfExists:true ) ) ) + .map { meta, reads, fusions -> [ meta, fusions ] } + + ch_arriba_fusion_fail = ch_dummy_file + } + + emit: + fusions = ch_arriba_fusions + fusions_fail = ch_arriba_fusion_fail + versions = ch_versions.ifEmpty(null) + } + diff --git a/subworkflows/local/fusioncatcher_workflow.nf b/subworkflows/local/fusioncatcher_workflow.nf new file mode 100644 index 00000000..a9058dc6 --- /dev/null +++ b/subworkflows/local/fusioncatcher_workflow.nf @@ -0,0 +1,33 @@ +include { FUSIONCATCHER } from '../../modules/local/fusioncatcher/detect/main' + + +workflow FUSIONCATCHER_WORKFLOW { + take: + reads + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_fusioncatcher.txt", checkIfExists: true) + + if ((params.fusioncatcher || params.all) && !params.fusioninspector_only) { + if (params.fusioncatcher_fusions){ + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(params.fusioncatcher_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } else { + FUSIONCATCHER ( + reads, + params.fusioncatcher_ref + ) + ch_fusioncatcher_fusions = FUSIONCATCHER.out.fusions + } + } + else { + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } + + emit: + fusions = ch_fusioncatcher_fusions + versions = ch_versions.ifEmpty(null) + } + diff --git a/subworkflows/local/fusioninspector_workflow.nf b/subworkflows/local/fusioninspector_workflow.nf new file mode 100644 index 00000000..388f42a6 --- /dev/null +++ b/subworkflows/local/fusioninspector_workflow.nf @@ -0,0 +1,48 @@ +include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { MEGAFUSION } from '../../modules/local/megafusion/main' +include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' + +workflow FUSIONINSPECTOR_WORKFLOW { + take: + reads + fusion_list + fusion_list_filtered + report + bam_sorted_indexed + ch_gtf + + main: + ch_versions = Channel.empty() + index ="${params.starfusion_ref}" + ch_fusion_list = params.fusioninspector_filter ? fusion_list_filtered : fusion_list + + if (params.whitelist) { + ch_whitelist = ch_fusion_list.combine(Channel.value(file(params.whitelist, checkIfExists:true))) + .map { meta, fusions, whitelist -> [ meta, [fusions, whitelist] ] } + + CAT_CAT(ch_whitelist) // fusioninspector takes care of possible duplicates + ch_versions = ch_versions.mix(CAT_CAT.out.versions) + + ch_fusion_list = CAT_CAT.out.file_out + } + + reads_fusion = reads.join(ch_fusion_list ) + + FUSIONINSPECTOR( reads_fusion, index) + ch_versions = ch_versions.mix(FUSIONINSPECTOR.out.versions) + + fusion_data = FUSIONINSPECTOR.out.tsv.join(report) + MEGAFUSION(fusion_data) + ch_versions = ch_versions.mix(MEGAFUSION.out.versions) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only && !params.skip_vis) { + bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) + ARRIBA_VISUALISATION(bam_sorted_indexed_fusions, ch_gtf, params.arriba_ref_protein_domain, params.arriba_ref_cytobands) + ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) + } + + emit: + versions = ch_versions.ifEmpty(null) +} + diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf new file mode 100644 index 00000000..478986a4 --- /dev/null +++ b/subworkflows/local/fusionreport_workflow.nf @@ -0,0 +1,44 @@ +include { FUSIONREPORT } from '../../modules/local/fusionreport/detect/main' + + +workflow FUSIONREPORT_WORKFLOW { + take: + reads + fusionreport_ref + arriba_fusions + pizzly_fusions + squid_fusions + starfusion_fusions + fusioncatcher_fusions + + main: + ch_versions = Channel.empty() + ch_report = Channel.empty() + + if (!params.fusioninspector_only) { + reads_fusions = reads + .join(arriba_fusions, remainder: true) + .join(pizzly_fusions, remainder: true) + .join(squid_fusions, remainder: true) + .join(starfusion_fusions, remainder: true) + .join(fusioncatcher_fusions, remainder: true) + + FUSIONREPORT(reads_fusions, fusionreport_ref) + ch_fusion_list = FUSIONREPORT.out.fusion_list + ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered + ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + ch_report = FUSIONREPORT.out.report + } else { + ch_fusion_list = reads.combine(Channel.value(file(params.fusioninspector_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + + ch_fusion_list_filtered = ch_fusion_list + } + + emit: + versions = ch_versions.ifEmpty(null) + fusion_list = ch_fusion_list + fusion_list_filtered = ch_fusion_list_filtered + report = ch_report.ifEmpty(null) +} + diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0aecf87f..b0155b6e 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -24,8 +24,10 @@ workflow INPUT_CHECK { def create_fastq_channel(LinkedHashMap row) { // create meta map def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + meta.strandedness = row.strandedness + // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] diff --git a/subworkflows/local/pizzly_workflow.nf b/subworkflows/local/pizzly_workflow.nf new file mode 100644 index 00000000..7675432b --- /dev/null +++ b/subworkflows/local/pizzly_workflow.nf @@ -0,0 +1,37 @@ +include { KALLISTO_QUANT } from '../../modules/local/kallisto/quant/main' +include { PIZZLY } from '../../modules/local/pizzly/detect/main' + +workflow PIZZLY_WORKFLOW { + take: + reads + ch_gtf + ch_transcript + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_pizzly.txt", checkIfExists: true) + + if ((params.pizzly || params.all) && !params.fusioninspector_only) { + if (params.pizzly_fusions) { + ch_pizzly_fusions = reads.combine(Channel.value(file(params.pizzly_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } else { + KALLISTO_QUANT(reads, params.pizzly_ref ) + ch_versions = ch_versions.mix(KALLISTO_QUANT.out.versions) + + PIZZLY( KALLISTO_QUANT.out.txt, ch_transcript, ch_gtf ) + ch_versions = ch_versions.mix(PIZZLY.out.versions) + + ch_pizzly_fusions = PIZZLY.out.fusions + } + } + else { + ch_pizzly_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } + + emit: + fusions = ch_pizzly_fusions + versions = ch_versions.ifEmpty(null) + } + diff --git a/subworkflows/local/qc_workflow.nf b/subworkflows/local/qc_workflow.nf new file mode 100644 index 00000000..a2a1dec9 --- /dev/null +++ b/subworkflows/local/qc_workflow.nf @@ -0,0 +1,47 @@ +// +// Check input samplesheet and get read channels +// + +include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_QC } from '../../modules/nf-core/samtools/index/main' +include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/local/picard/collectrnaseqmetrics/main' +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/picard/markduplicates/main' + +workflow QC_WORKFLOW { + take: + bam_sorted + ch_chrgtf + ch_refflat + ch_fasta + ch_fai + ch_rrna_interval + + main: + ch_versions = Channel.empty() + + QUALIMAP_RNASEQ(bam_sorted, ch_chrgtf) + ch_versions = ch_versions.mix(QUALIMAP_RNASEQ.out.versions) + ch_qualimap_qc = Channel.empty().mix(QUALIMAP_RNASEQ.out.results) + + SAMTOOLS_INDEX_FOR_QC(bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_QC.out.versions) + + bam_indexed = bam_sorted.join(SAMTOOLS_INDEX_FOR_QC.out.bai) + + PICARD_COLLECTRNASEQMETRICS(bam_indexed, ch_refflat, ch_rrna_interval) + ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions) + ch_rnaseq_metrics = Channel.empty().mix(PICARD_COLLECTRNASEQMETRICS.out.metrics) + + PICARD_MARKDUPLICATES(bam_sorted, ch_fasta, ch_fai) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_duplicate_metrics = Channel.empty().mix(PICARD_MARKDUPLICATES.out.metrics) + + + emit: + versions = ch_versions.ifEmpty(null) + qualimap_qc = ch_qualimap_qc + rnaseq_metrics = ch_rnaseq_metrics + duplicate_metrics = ch_duplicate_metrics + +} + diff --git a/subworkflows/local/squid_workflow.nf b/subworkflows/local/squid_workflow.nf new file mode 100644 index 00000000..c4f29425 --- /dev/null +++ b/subworkflows/local/squid_workflow.nf @@ -0,0 +1,80 @@ +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC } from '../../modules/nf-core/samtools/view/main' +include { SQUID } from '../../modules/local/squid/detect/main' +include { SQUID_ANNOTATE } from '../../modules/local/squid/annotate/main' +include { STAR_ALIGN as STAR_FOR_SQUID } from '../../modules/nf-core/star/align/main' + +workflow SQUID_WORKFLOW { + + take: + reads + ch_gtf + ch_starindex_ensembl_ref + ch_fasta + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_squid.txt", checkIfExists: true) + + if ((params.squid || params.all) && !params.fusioninspector_only) { + if (params.squid_fusions){ + ch_squid_fusions = reads.combine(Channel.value(file(params.squid_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } else { + + STAR_FOR_SQUID(reads, ch_starindex_ensembl_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_SQUID.out.versions) + + STAR_FOR_SQUID.out.sam + .map { meta, sam -> + return [meta, sam, []] + }.set { chimeric_sam } + + + + SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC (chimeric_sam, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.versions) + + SAMTOOLS_SORT_FOR_SQUID_CHIMERIC (SAMTOOLS_VIEW_FOR_SQUID_CHIMERIC.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.versions) + + bam_chimeric = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) + + if (params.cram.contains('squid')){ + SAMTOOLS_INDEX_FOR_SQUID(STAR_FOR_SQUID.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID.out.versions) + SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC(SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.versions) + + bam_sorted_indexed = STAR_FOR_SQUID.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_SQUID.out.bai) + chimeric_sorted_indexed = SAMTOOLS_SORT_FOR_SQUID_CHIMERIC.out.bam.join(SAMTOOLS_INDEX_FOR_SQUID_CHIMERIC.out.bai) + + SAMTOOLS_VIEW_FOR_SQUID_CRAM (bam_sorted_indexed, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) + SAMTOOLS_VIEW_FOR_SQUID_CRAM_CHIMERIC (chimeric_sorted_indexed, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_SQUID_CRAM.out.versions) + } + + SQUID (bam_chimeric) + ch_versions = ch_versions.mix(SQUID.out.versions) + + SQUID_ANNOTATE (SQUID.out.fusions, ch_gtf) + ch_versions = ch_versions.mix(SQUID_ANNOTATE.out.versions) + + ch_squid_fusions = SQUID_ANNOTATE.out.fusions_annotated + } + } + else { + ch_squid_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } + + emit: + fusions = ch_squid_fusions + versions = ch_versions.ifEmpty(null) + } + diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf new file mode 100644 index 00000000..1656ec7a --- /dev/null +++ b/subworkflows/local/starfusion_workflow.nf @@ -0,0 +1,58 @@ +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION } from '../../modules/nf-core/samtools/index/main' +include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main' +include { STARFUSION } from '../../modules/local/starfusion/detect/main' + +workflow STARFUSION_WORKFLOW { + take: + reads + ch_chrgtf + ch_starindex_ref + ch_fasta + + main: + ch_versions = Channel.empty() + ch_align = Channel.empty() + bam_sorted_indexed = Channel.empty() + + ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { + if (params.starfusion_fusions){ + ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } else { + STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_chrgtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) + ch_align = STAR_FOR_STARFUSION.out.bam_sorted + + SAMTOOLS_INDEX_FOR_STARFUSION(STAR_FOR_STARFUSION.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION.out.versions) + bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) + + if (params.cram.contains('starfusion')){ + SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) + } + reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) + + STARFUSION( reads_junction, params.starfusion_ref) + ch_versions = ch_versions.mix(STARFUSION.out.versions) + + ch_starfusion_fusions = STARFUSION.out.fusions + ch_star_stats = STAR_FOR_STARFUSION.out.log_final + } + } + else { + ch_starfusion_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + ch_star_stats = Channel.empty() + } + emit: + fusions = ch_starfusion_fusions + star_stats = ch_star_stats + bam_sorted = ch_align + versions = ch_versions.ifEmpty(null) + ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty(null) + } + diff --git a/subworkflows/local/stringtie_workflow.nf b/subworkflows/local/stringtie_workflow.nf new file mode 100644 index 00000000..d9e1c6c3 --- /dev/null +++ b/subworkflows/local/stringtie_workflow.nf @@ -0,0 +1,34 @@ +include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie/main' +include { STRINGTIE_MERGE } from '../../modules/nf-core/stringtie/merge/main' + + +workflow STRINGTIE_WORKFLOW { + take: + bam_sorted + ch_chrgtf + + main: + ch_versions = Channel.empty() + ch_stringtie_gtf = Channel.empty() + + if ((params.stringtie || params.all) && !params.fusioninspector_only) { + STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + STRINGTIE_STRINGTIE + .out + .transcript_gtf + .map { it -> it[1] } + .set { stringtie_gtf } + + STRINGTIE_MERGE ( stringtie_gtf, ch_chrgtf ) + ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) + ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf + } + + emit: + stringtie_gtf = ch_stringtie_gtf.ifEmpty(null) + versions = ch_versions.ifEmpty(null) + + } + diff --git a/subworkflows/local/trim_workflow.nf b/subworkflows/local/trim_workflow.nf new file mode 100644 index 00000000..01baeec7 --- /dev/null +++ b/subworkflows/local/trim_workflow.nf @@ -0,0 +1,52 @@ +include { REFORMAT } from '../../modules/local/reformat/main' +include { FASTQC as FASTQC_FOR_TRIM } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { FASTQC as FASTQC_FOR_FASTP } from '../../modules/nf-core/fastqc/main' + + +workflow TRIM_WORKFLOW { + take: + reads + + main: + ch_versions = Channel.empty() + ch_reports = Channel.empty() + + if (params.trim) { + + REFORMAT( reads ) + ch_versions = ch_versions.mix(REFORMAT.out.versions) + FASTQC_FOR_TRIM (REFORMAT.out.reads_out) + ch_versions = ch_versions.mix(FASTQC_FOR_TRIM.out.versions) + + ch_reads_all = reads + ch_reads_fusioncatcher = REFORMAT.out.reads_out + ch_reports = FASTQC_FOR_TRIM.out.zip.collect{it[1]}.ifEmpty([]) + } + else if (params.fastp_trim) { + FASTP(reads, params.adapter_fasta, false, false) + ch_versions = ch_versions.mix(FASTP.out.versions) + + FASTQC_FOR_FASTP(FASTP.out.reads) + ch_versions = ch_versions.mix(FASTQC_FOR_FASTP.out.versions) + + ch_reads_all = FASTP.out.reads + ch_reads_fusioncatcher = ch_reads_all + ch_reports = ch_reports.mix( + FASTQC_FOR_FASTP.out.zip.collect{it[1]}.ifEmpty([]), + FASTP.out.json.collect{meta, json -> json}, + FASTP.out.html.collect{meta, html -> html} + ) + } + else { + ch_reads_all = reads + ch_reads_fusioncatcher = reads + } + + emit: + ch_reads_all + ch_reads_fusioncatcher + ch_reports + versions = ch_versions.ifEmpty(null) + } + diff --git a/workflows/build_references.nf b/workflows/build_references.nf new file mode 100644 index 00000000..c43ecc84 --- /dev/null +++ b/workflows/build_references.nf @@ -0,0 +1,106 @@ +/* +======================================================================================== + IMPORT LOCAL MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { ARRIBA_DOWNLOAD } from '../modules/local/arriba/download/main' +include { ENSEMBL_DOWNLOAD } from '../modules/local/ensembl/main' +include { FUSIONCATCHER_DOWNLOAD } from '../modules/local/fusioncatcher/download/main' +include { FUSIONREPORT_DOWNLOAD } from '../modules/local/fusionreport/download/main' +include { STARFUSION_BUILD } from '../modules/local/starfusion/build/main' +include { STARFUSION_DOWNLOAD } from '../modules/local/starfusion/download/main' +include { GTF_TO_REFFLAT } from '../modules/local/uscs/custom_gtftogenepred/main' +include { RRNA_TRANSCRIPTS } from '../modules/local/rrnatranscripts/main' +include { CONVERT2BED } from '../modules/local/convert2bed/main' +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' +include { KALLISTO_INDEX as PIZZLY_INDEX } from '../modules/nf-core/kallisto/index/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/gatk4/createsequencedictionary/main' +include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +workflow BUILD_REFERENCES { + + ENSEMBL_DOWNLOAD( params.ensembl_version ) + ENSEMBL_DOWNLOAD.out.fasta + .map { it -> tuple(id:it.baseName, it) } + .set { ch_fasta_w_meta } + + SAMTOOLS_FAIDX(ch_fasta_w_meta) + GATK4_CREATESEQUENCEDICTIONARY(ENSEMBL_DOWNLOAD.out.fasta) + + ENSEMBL_DOWNLOAD.out.gtf + .map { it -> tuple(id:it.baseName, it) } + .set { ch_gtf_w_meta } + RRNA_TRANSCRIPTS(ch_gtf_w_meta) + CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) + + GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) + + + if (params.starindex || params.all || params.starfusion || params.arriba || params.squid ) { + STAR_GENOMEGENERATE( ENSEMBL_DOWNLOAD.out.fasta, ENSEMBL_DOWNLOAD.out.gtf ) + } + + if (params.arriba || params.all) { + ARRIBA_DOWNLOAD() + } + + if (params.fusioncatcher || params.all) { + FUSIONCATCHER_DOWNLOAD() + } + + if (params.pizzly || params.all) { + PIZZLY_INDEX( ENSEMBL_DOWNLOAD.out.transcript ) + } + + if (params.starfusion || params.all) { + if (params.starfusion_build){ + STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.fasta, ENSEMBL_DOWNLOAD.out.chrgtf ) + } else { + STARFUSION_DOWNLOAD() + } + } + + if (params.starfusion_build){ + GTF_TO_REFFLAT(ENSEMBL_DOWNLOAD.out.chrgtf) + } else { + GTF_TO_REFFLAT(STARFUSION_DOWNLOAD.out.chrgtf) + } + + if (params.fusionreport || params.all) { + FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ) + } + +} + +/* +======================================================================================== + COMPLETION EMAIL AND SUMMARY +======================================================================================== +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) +} + +/* +======================================================================================== + THE END +======================================================================================== +*/ diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 1ea5267f..0658e97a 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -15,6 +15,44 @@ log.info logo + paramsSummaryLog(workflow) + citation WorkflowRnafusion.initialise(params, log) +// Check mandatory parameters + +if (file(params.input).exists() || params.build_references) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet does not exist or was not specified!' } +if (params.fusioninspector_only && !params.fusioninspector_fusions) { exit 1, 'Parameter --fusioninspector_fusions PATH_TO_FUSION_LIST expected with parameter --fusioninspector_only'} + +ch_chrgtf = params.starfusion_build ? file(params.chrgtf) : file("${params.starfusion_ref}/ref_annot.gtf") +ch_starindex_ref = params.starfusion_build ? params.starindex_ref : "${params.starfusion_ref}/ref_genome.fa.star.idx" +ch_starindex_ensembl_ref = params.starindex_ref +ch_refflat = params.starfusion_build ? file(params.refflat) : "${params.ensembl_ref}/ref_annot.gtf.refflat" +ch_rrna_interval = params.starfusion_build ? file(params.rrna_intervals) : "${params.ensembl_ref}/ref_annot.interval_list" + + +def checkPathParamList = [ + params.fasta, + params.fai, + params.gtf, + ch_chrgtf, + params.transcript, + ch_refflat, + ch_rrna_interval +] + + +for (param in checkPathParamList) if ((param) && !params.build_references) file(param, checkIfExists: true) +if (params.fasta[0,1] == "s3") { + log.info "INFO: s3 path detected, check for absolute path and trailing '/' not performed" +} +else { + for (param in checkPathParamList) if ((param.toString())!= file(param).toString() && !params.build_references) { exit 1, "Problem with ${param}: ABSOLUTE PATHS are required! Check for trailing '/' at the end of paths too." } +} +if ((params.squid || params.all) && params.ensembl_version != 102) { exit 1, 'Ensembl version is not supported by squid' } + +ch_fasta = file(params.fasta) +ch_gtf = file(params.gtf) +ch_transcript = file(params.transcript) +ch_fai = file(params.fai) + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -35,7 +73,18 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' + +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' +include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' +include { PIZZLY_WORKFLOW } from '../subworkflows/local/pizzly_workflow' +include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +include { SQUID_WORKFLOW } from '../subworkflows/local/squid_workflow' +include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' +include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' +include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' +include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' +include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,10 +95,13 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -69,23 +121,146 @@ workflow RNAFUSION { INPUT_CHECK ( file(params.input) ) + .reads + .map { + meta, fastq -> + def meta_clone = meta.clone() + meta_clone.id = meta_clone.id.split('_')[0..-2].join('_') + [ meta_clone, fastq ] } + .groupTuple(by: [0]) + .branch { + meta, fastq -> + single : fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple: fastq.size() > 1 + return [ meta, fastq.flatten() ] + } + .set { ch_fastq } ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ // ! There is currently no tooling to help you write a sample sheet schema + CAT_FASTQ ( + ch_fastq.multiple + ) + .reads + .mix(ch_fastq.single) + .set { ch_cat_fastq } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + + // // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.reads + ch_cat_fastq ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + TRIM_WORKFLOW ( + ch_cat_fastq + ) + ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher + ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all + + + // Run STAR alignment and Arriba + ARRIBA_WORKFLOW ( + ch_reads_all, + ch_gtf, + ch_fasta, + ch_starindex_ensembl_ref + ) + ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions.first().ifEmpty(null)) + + // Run pizzly/kallisto + + PIZZLY_WORKFLOW ( + ch_reads_all, + ch_gtf, + ch_transcript + ) + ch_versions = ch_versions.mix(PIZZLY_WORKFLOW.out.versions.first().ifEmpty(null)) + + +// Run squid + + SQUID_WORKFLOW ( + ch_reads_all, + ch_gtf, + ch_starindex_ensembl_ref, + ch_fasta + ) + ch_versions = ch_versions.mix(SQUID_WORKFLOW.out.versions.first().ifEmpty(null)) + + +//Run STAR fusion + STARFUSION_WORKFLOW ( + ch_reads_all, + ch_chrgtf, + ch_starindex_ref, + ch_fasta + ) + ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions.first().ifEmpty(null)) + + +//Run fusioncatcher + FUSIONCATCHER_WORKFLOW ( + ch_reads_fusioncatcher + ) + ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions.first().ifEmpty(null)) + + +//Run stringtie + STRINGTIE_WORKFLOW ( + STARFUSION_WORKFLOW.out.bam_sorted, + ch_chrgtf + ) + ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions.first().ifEmpty(null)) + + + //Run fusion-report + FUSIONREPORT_WORKFLOW ( + ch_reads_all, + params.fusionreport_ref, + ARRIBA_WORKFLOW.out.fusions, + PIZZLY_WORKFLOW.out.fusions, + SQUID_WORKFLOW.out.fusions, + STARFUSION_WORKFLOW.out.fusions, + FUSIONCATCHER_WORKFLOW.out.fusions + ) + ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions.first().ifEmpty(null)) + + + //Run fusionInpector + FUSIONINSPECTOR_WORKFLOW ( + ch_reads_all, + FUSIONREPORT_WORKFLOW.out.fusion_list, + FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, + FUSIONREPORT_WORKFLOW.out.report, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + ch_chrgtf + ) + ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions.first().ifEmpty(null)) + + + //QC + QC_WORKFLOW ( + STARFUSION_WORKFLOW.out.bam_sorted, + ch_chrgtf, + ch_refflat, + ch_fasta, + ch_fai, + ch_rrna_interval + ) + ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions.first().ifEmpty(null)) + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + // // MODULE: MultiQC // @@ -100,6 +275,13 @@ workflow RNAFUSION { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.qualimap_qc.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_reports.ifEmpty([])) + + MULTIQC ( ch_multiqc_files.collect(), @@ -107,9 +289,14 @@ workflow RNAFUSION { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() + + multiqc_report = MULTIQC.out.report.toList() + ch_versions = ch_versions.mix(MULTIQC.out.versions) } + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY