diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..3f27dab4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -84,7 +84,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==2.8.0 - name: Run nf-core lint env: diff --git a/.nf-core.yml b/.nf-core.yml index 5f4a4733..7442cda9 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -5,6 +5,8 @@ lint: - docs/images/nf-core-treeval_logo_light.png - docs/images/nf-core-treeval_logo_dark.png files_unchanged: + - .github/workflows/linting.yml + - .github/CONTRIBUTING.md - LICENSE - .github/ISSUE_TEMPLATE/bug_report.yml - assets/sendmail_template.txt diff --git a/CHANGELOG.md b/CHANGELOG.md old mode 100644 new mode 100755 index a2de11b5..e7ee3034 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,16 +3,60 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.0.0] - Ancient Atlantis - [2023-06-12] +## [1.0.0] - Ancient Atlantis - [2023-06-27] Initial release of sanger-tol/treeval, created with the [nf-core](https://nf-co.re/) template. The essential pathways of the gEVAL pipeline have now been converted to Nextflow DSL2 from vr-runner, snakemake and wr. Of the original pipeline there is only Bionano left to implement. -### `Added` - -### `Fixed` - -### `Dependencies` - -### `Deprecated` +### Enhancements & Fixes + +- Updated to nf-core/tools template v2.8.0. +- Subworkflow to generate channels from input yaml. +- Subworkflow to generate genome summary file using samtools +- Subworkflow to generate busco gene tracks and ancestral busco mapping. +- Subworkflow to generate HiC maps with cooler, juicebox and pretext. +- Subworkflow to generate gene alignments using miniprot and minimap2. +- Subworkflow to generate insilico digest tracks. +- Subworkflow to generate longread coverage tracks from pacbio data. +- Subworkflow to generate punchlists detailing regions of interest in the genome. +- Subworkflow to generate repeat density tracks. +- Subworkflow to generate tracks detailing self complementary regions. +- Subworkflow to generate syntenic alignments to high quality genomes. +- Subworkflow to generate tracks containing telomeric sites. +- Custom Groovy for reporting to provide file metrics and resource usage. + +### Parameters + +| Old Parameter | New Parameter | +| ------------- | ------------- | +| - | --input | + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Module | Old Version | New Versions | +| ------------------------------ | ----------- | ---------------- | +| bedtools | - | 2.31.0 | +| busco | - | 5.4.3 | +| bwa-mem2 | - | 2.2.1 | +| cat | - | 2.3.4 | +| cooler | - | 0.9.2 | +| gnu-sort | - | 8.25 | +| minimap2 + samtools | - | 2.24 + 1.14 | +| miniprot | - | 0.11--he4a0461_2 | +| mummer | - | 3.23 | +| paftools (minimap2 + samtools) | - | 2.24 + 1.14 | +| pretextmap + samtools | - | 0.1.9 + 1.17 | +| samtools | - | 1.17 | +| seqtk | - | 1.4 | +| tabix | - | 1.11 | +| ucsc | - | 377 | +| windowmasker (blast) | - | 2.14.0 | + +### Fixed + +### Dependencies + +### Deprecated diff --git a/CITATIONS.md b/CITATIONS.md old mode 100644 new mode 100755 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 index dbbab5cd..ac4a5f34 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022-2023 Genome Research Ltd. +Copyright (c) 2022 - 2023 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 020c8849..4e0cf096 --- a/README.md +++ b/README.md @@ -7,63 +7,25 @@ ## Introduction -**sanger-tol/treeval** is a bioinformatics best-practice analysis pipeline for the generation of data supplemental to the curation of reference quality genomes. This pipeline has been written to generate flat files compatable with [JBrowse2](https://jbrowse.org/jb2/). +**sanger-tol/treeval** is a bioinformatics best-practice analysis pipeline for the generation of data supplemental to the curation of reference quality genomes. This pipeline has been written to generate flat files compatible with [JBrowse2](https://jbrowse.org/jb2/). The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! -## Pipeline summary - -The version 1 pipeline will be made up of the following steps, (r) = Steps run in Rapid: - -- INPUT_READ (r) - - > The reading of the input yaml and conversion into channels for the sub-workflows. - -- GENERATE_GENOME (r) - - > Generate .genome for the input genome using SAMTOOLS FAIDX. - -- GENERATE_ALIGNMENT - - > Peptides will run pep_alignment.nf with Miniprot. - - > CDNA, RNA and CDS will run through nuc_alignment.nf with Minimap2. - -- INSILICO DIGEST - - > Generates a map of enzymatic digests using 3 Bionano enzymes. - -- SELFCOMP - - > Identifies regions of self-complementary sequencs using Mummer. - -- SYNTENY - - > Generates syntenic alignments between other high quality genomes via Minimap2. - -- BUSCO_ANNOTATION - - > Lepidopteran Element Analysis. Using BUSCO and custom python scripts to parse ancestral Lepidoptera gene. This will eventually have a number of clade specific sub-workflows. - > BUSCO genes extraction based on BUSCO full_table.tsv. - -- LONGREAD_COVERAGE (r) - - > Calculating the coverage of reads across the genome. - -- FIND_GAPS (r) - - > Identifying gaps in the input genome using seqtk cutn. - -- FIND_TELOMERE (r) - - > Identify sites of a given telomeric sequence. - -- REPEAT_DENSITY (r) - - > Generate a graph showing the relative amount of repeat in a given chunk. - -- HIC_MAPPING (r) - > Generation of HiC maps for the curation of a genome, these include: pretext_hires, pretext_lowres and cooler maps. +The treeval pipeline has a sister pipeline currently named [curationpretext](https://github.com/sanger-tol/curationpretext) which acts to regenerate the pretext maps and accessory files during genomic curation in order to confirm interventions. This pipeline is sufficiently different to the treeval implementation that it is written as it's own pipeline. + +1. Parse input yaml ( YAML_INPUT ) +2. Generate my.genome file ( GENERATE_GENOME ) +3. Generate insilico digests of the input assembly ( INSILICO_DIGEST ) +4. Generate gene alignments with high quality data against the input assembly ( GENE_ALIGNMENT ) +5. Generate a repeat density graph ( REPEAT_DENSITY ) +6. Generate a gap track ( GAP_FINDER ) +7. Generate a map of self complementary sequence ( SELFCOMP ) +8. Generate syntenic alignments with a closely related high quality assembly ( SYNTENY ) +9. Generate a coverage track using PacBio data ( LONGREAD_COVERAGE ) +10. Generate HiC maps, pretext and higlass using HiC cram files ( HIC_MAPPING ) +11. Generate a telomere track based on input motif ( TELO_FINDER ) +12. Run Busco and convert results into bed format ( BUSCO_ANNOTATION ) +13. Ancestral Busco linkage if available for clade ( BUSCO_ANNOTATION:ANCESTRAL_GENE ) ## Usage @@ -72,15 +34,17 @@ The version 1 pipeline will be made up of the following steps, (r) = Steps run i > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. +Currently, it is advised to run the pipeline with docker or singularity as a small number of major modules do not currently have a conda env associated with them. + Now, you can run the pipeline using: ```bash nextflow run main.nf -profile singularity --input treeval.yaml -entry {FULL|RAPID} --outdir {OUTDIR} ``` -## Documentation +An example treeval.yaml can be found [here](assets/local_testing/nxOscDF5033.yaml). -The sanger-tol/treeval pipeline comes with documentation about the pipeline [usage](https://nf-co.re/treeval/usage), [parameters](https://nf-co.re/treeval/parameters) and [output](https://nf-co.re/treeval/output). +Further documentation about the pipeline can be found in the following files: [usage](https://nf-co.re/treeval/usage), [parameters](https://nf-co.re/treeval/parameters) and [output](https://nf-co.re/treeval/output). > **Warning:** > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those @@ -94,11 +58,11 @@ sanger-tol/treeval has been written by Damon-Lee Pointon (@DLBPointon), Yumi Sim We thank the following people for their extensive assistance in the development of this pipeline: ## Contributions and Support diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json old mode 100644 new mode 100755 index 1dbe9c70..eac75b32 --- a/assets/adaptivecard.json +++ b/assets/adaptivecard.json @@ -16,7 +16,7 @@ "type": "TextBlock", "size": "Large", "weight": "Bolder", - "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "color": "<% if ( success ) { %>Good<% } else { %>Attention<%} %>", "text": "sanger-tol/treeval v${version} - ${runName}", "wrap": true }, diff --git a/assets/digest/digest.as b/assets/digest/digest.as old mode 100644 new mode 100755 index 5ded115f..5b6272fa --- a/assets/digest/digest.as +++ b/assets/digest/digest.as @@ -1,9 +1,9 @@ table insilico_digest "bionano digest cut sites" ( -string chrom; "Reference sequence chromosome or scaffold" -uint chromStart; "Start position of feature on chromosome" -uint chromEnd; "End position of feature on chromosome" -string name; "Name of enzyme" -string length; "length of fragment" +string chrom; "Reference sequence chromosome or scaffold" +uint chromStart; "Start position of feature on chromosome" +uint chromEnd; "End position of feature on chromosome" +string name; "Name of enzyme" +string length; "length of fragment" ) diff --git a/assets/full_s3_treeval_test.yaml b/assets/full_s3_treeval_test.yaml old mode 100644 new mode 100755 index 7e688b5d..911260b0 --- a/assets/full_s3_treeval_test.yaml +++ b/assets/full_s3_treeval_test.yaml @@ -1,5 +1,4 @@ assembly: - sizeClass: "" # S if {genome => 4Gb} else L level: scaffold sample_id: nxOscDoli1 classT: nematode diff --git a/assets/gene_alignment/assm_cdna.as b/assets/gene_alignment/assm_cdna.as old mode 100644 new mode 100755 index f8e7afbe..30552cc5 --- a/assets/gene_alignment/assm_cdna.as +++ b/assets/gene_alignment/assm_cdna.as @@ -8,5 +8,5 @@ string name; "Name of gene" uint score; "Score" char[1] strand; "+ or - for strand" string geneSymbol; "Gene Symbol" -string ensemblId; "Ensembl Accession number" +string ensemblId; "Ensembl Accession number" ) diff --git a/assets/gene_alignment/assm_cds.as b/assets/gene_alignment/assm_cds.as old mode 100644 new mode 100755 index 63002786..4306c459 --- a/assets/gene_alignment/assm_cds.as +++ b/assets/gene_alignment/assm_cds.as @@ -8,5 +8,5 @@ string name; "Name of gene" uint score; "Score" char[1] strand; "+ or - for strand" string geneSymbol; "Gene Symbol" -string ensemblId; "Ensembl Accession number" +string ensemblId; "Ensembl Accession number" ) diff --git a/assets/gene_alignment/assm_pep.as b/assets/gene_alignment/assm_pep.as old mode 100644 new mode 100755 index a63070c7..40b8701f --- a/assets/gene_alignment/assm_pep.as +++ b/assets/gene_alignment/assm_pep.as @@ -8,5 +8,5 @@ string name; "Name of gene" uint score; "Score" char[1] strand; "+ or - for strand" string geneSymbol; "Gene Symbol" -string ensemblId; "Ensembl Accession number" +string ensemblId; "Ensembl Accession number" ) diff --git a/assets/gene_alignment/assm_rna.as b/assets/gene_alignment/assm_rna.as old mode 100644 new mode 100755 index 9edf6cbd..f28bdf37 --- a/assets/gene_alignment/assm_rna.as +++ b/assets/gene_alignment/assm_rna.as @@ -8,5 +8,5 @@ string name; "Name of gene" uint score; "Score" char[1] strand; "+ or - for strand" string geneSymbol; "Gene Symbol" -string ensemblId; "Ensembl Accession number" +string ensemblId; "Ensembl Accession number" ) diff --git a/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md b/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md deleted file mode 100644 index 32fbd6d0..00000000 --- a/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md +++ /dev/null @@ -1,31 +0,0 @@ -e769c449778489095a023896d05b87fa cds/CaenorhabditisElegans.WBcel235_cds.bigBed -29f4bb4aa841e754e6ad90a95c51a8ac cds/Gae_host.Gae_cds.bigBed -55e02bdabcbd4c03413d42026ac9e34a custom/software_versions.yml -d41d8cd98f00b204e9800998ecf8427e gap/Oscheius_DF5033_gaplen.bed -efa3906048c52a26a3b762142b138df2 gen/CaenorhabditisElegans.WBcel235_cdna.bigBed -6a1f75afdc99390c150a9abe204e856b generate/my.genome -ab841e49f59ff1dd51ed87191c2d7562 gen/Gae_host.Gae_cdna.bigBed -8b277d209db8bf97c46f118562c4b9b5 gen/OscheiusTipulae.ASM1342590v1_cdna.bigBed -1d1846bbab542500504b19bfc56cb9b2 insilico/BSPQI.bigBed -008e29071b2574e2ed50a2887f4a7fc5 insilico/BSSSI.bigBed -5f58843218b373c5addd38bc91e0d74d insilico/DLE1.bigBed -08d932ddcb01866d9cfa76dbcaf8c5f5 longread/Oscheius_DF5033.bigWig -36e4493afcd46a6c89d762fee08b2aa8 longread/Oscheius_DF5033_halfdepth.bed -7bd5f463e6cd75e876f648dce93411fc longread/Oscheius_DF5033_maxdepth.bed -82d251d88ee7d9bdbb29b68d3136b7ea longread/Oscheius_DF5033_zerodepth.bed -cf6a4dc883979ac9cafd75382aa16bdc pep/CaenorhabditisElegans.WBcel235_pep.gff.gz -84c1ad1989c7e9bcf13258b2774f4a25 pep/CaenorhabditisElegans.WBcel235_pep.gff.gz.tbi -c2cccc5ab38b0e6b4e12fea2c1151569 pep/Gae_host.Gae_pep.gff.gz -6a6522a6176761172a6313df9fc5b210 pep/Gae_host.Gae_pep.gff.gz.tbi -e012da1d0c2ea40171785ead8a294289 punchlist/CaenorhabditisElegans.WBcel235_cdna_punchlist.bed -d9da11fc3f6170a1c37c38765718ab47 punchlist/CaenorhabditisElegans.WBcel235_cds_punchlist.bed -31d4e0cec6ef4ec92d51336393a923be punchlist/CaenorhabditisElegans.WBcel235_rna_punchlist.bed -1ae4cbf700ff5b6d02c96631351f7eb8 punchlist/Gae_host.Gae_cdna_punchlist.bed -50f76662114c8a77e8604a5a539e1e9c punchlist/Gae_host.Gae_cds_punchlist.bed -c269f93c3a43697116b5aa75314e5e07 punchlist/Gae_host.Gae_rna_punchlist.bed -e5fed140728b0f0d088d983a34868d8d punchlist/OscheiusTipulae.ASM1342590v1_cdna_punchlist.bed -779ad07ceefaca4657090c9f0322ddfd repeat/Oscheius_DF5033.bigWig -9d2cca3997c9a60f66516af739eb3719 repeat/Oscheius_DF5033_renamed.bed -bb92039394cc0f2e9e6809e78be4bc9e rna/CaenorhabditisElegans.WBcel235_rna.bigBed -4254dcb32d0aed160e03d3f6c02cf636 rna/Gae_host.Gae_rna.bigBed -b2d9bea322639d2b0954a0ccc7eed800 selfcomp/Oscheius_DF5033_selfcomp.bigBed diff --git a/assets/local_testing/nxOscDF5033.yaml b/assets/local_testing/nxOscDF5033.yaml old mode 100644 new mode 100755 index 388fdf34..69d61dc7 --- a/assets/local_testing/nxOscDF5033.yaml +++ b/assets/local_testing/nxOscDF5033.yaml @@ -4,8 +4,7 @@ assembly: sample_id: Oscheius_DF5033 latin_name: to_provide_taxonomic_rank classT: nematode - asmVersion: Oscheius_DF5033_1 - dbVersion: "1" + asmVersion: 1 gevalType: DTOL reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta #/lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta diff --git a/assets/local_testing/nxOscSUBSET.yaml b/assets/local_testing/nxOscSUBSET.yaml old mode 100644 new mode 100755 index d313a189..64082b6e --- a/assets/local_testing/nxOscSUBSET.yaml +++ b/assets/local_testing/nxOscSUBSET.yaml @@ -4,8 +4,7 @@ assembly: sample_id: OscheiusSUBSET latin_name: to_provide_taxonomic_rank classT: nematode - asmVersion: OscheiusSUBSET_1 - dbVersion: "1" + asmVersion: 1 gevalType: DTOL reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/assembly/draft/SUBSET_genome/Oscheius_SUBSET.fasta #/lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml old mode 100644 new mode 100755 index b42a1ed9..6a3f02fd --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,8 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "sanger-tol/treeval Methods Description" section_href: "https://github.com/sanger-tol/treeval" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using sanger-tol/treeval v${workflow.manifest.version} ${doi_text} of the sanger-tol collection of workflows, created using nf-core (Ewels et al., 2020).

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml old mode 100644 new mode 100755 diff --git a/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv b/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv old mode 100644 new mode 100755 index b967de78..dca2b3dd --- a/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv +++ b/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv @@ -1,5 +1,5 @@ -org,type,data_file -Gae_host.Gae,cdna,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host5000cdna.MOD.fa -Gae_host.Gae,cds,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12003cds.MOD.fa -Gae_host.Gae,pep,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12005pep.MOD.fa -Gae_host.Gae,rna,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host18005rna.MOD.fa +org, type, data_file +Gae_host.Gae, cdna, https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host5000cdna.MOD.fa +Gae_host.Gae, cds, https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12003cds.MOD.fa +Gae_host.Gae, pep, https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12005pep.MOD.fa +Gae_host.Gae, rna, https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host18005rna.MOD.fa diff --git a/assets/nf-core-treeval_logo_light.png b/assets/nf-core-treeval_logo_light.png deleted file mode 100644 index 074f9c6a..00000000 Binary files a/assets/nf-core-treeval_logo_light.png and /dev/null differ diff --git a/assets/s3_treeval_test.yaml b/assets/s3_treeval_test.yaml old mode 100644 new mode 100755 index cc80c4bd..d3ddb32a --- a/assets/s3_treeval_test.yaml +++ b/assets/s3_treeval_test.yaml @@ -1,5 +1,4 @@ assembly: - sizeClass: "" # S if {genome => 4Gb} else L level: scaffold sample_id: nxOscDoli1 classT: nematode diff --git a/assets/schema_input.json b/assets/schema_input.json old mode 100644 new mode 100755 index 7240ae81..5b387a7d --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,30 +7,134 @@ "items": { "type": "object", "properties": { - "sample": { + "assembly": { + "type": "object", + "properties": { + "sample_id": { + "type": "string", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "latin_name": { + "type": "string", + "errorMessage": "The scientific name for the assembly" + }, + "classT": { + "type": "string", + "errorMessage": "The Clade of the assembly. Used as the syntenic group and to complete the gene_alignment data dir." + }, + "TicketType": { + "type": "string", + "errorMessage": "Not currently in use. Single word description of associated project." + } + } + }, + "reference_file": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "pattern": "^\\S+\\.f(ast)a$", + "errorMessage": "Assembly input file, decompressed" }, - "fastq_1": { + "assem_reads": { + "type": "object", + "properties": { + "pacbio": { + "type": "string", + "errorMessage": "Path to folder containing fasta.gz files" + }, + "hic": { + "type": "string", + "errorMessage": "Path to folder containing .cram and .crai files" + }, + "supplementary": { + "type": "string", + "errorMessage": "Not currently in use. Placeholder for future use" + } + } + }, + "alignment": { + "type": "object", + "properties": { + "data_dir": { + "type": "string", + "errorMessage": "Gene Alignment data directory" + }, + "common_name": { + "type": "string", + "errorMessage": "Not currently in use. Common identifier for group (adding bee, wasp, ant as sub division for clade)" + }, + "geneset": { + "type": "string", + "errorMessage": "A csv list of organisms to run against." + } + } + }, + "self_comp": { + "type": "object", + "properties": { + "motif_len": { + "type": "integer", + "errorMessage": "Length of motif to be used in self comp search" + }, + "mummer_chunk": { + "type": "integer", + "errorMessage": "Size of chunks to be used my Mummer" + } + } + }, + "synteny": { + "type": "object", + "properties": { + "synteny_genome_path": { + "type": "string", + "errorMessage": "Syntenic Genome Directory Path" + } + } + }, + "outdir": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "Out directory path, can be changed via cli" + }, + "intron": { + "type": "object", + "properties": { + "size": { + "type": "string", + "errorMessage": "Base pair size of introns, defaults to 50k" + } + } + }, + "telomere": { + "type": "object", + "properties": { + "teloseq": { + "type": "string", + "errorMessage": "Expected telomeric motif" + } + } }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { + "busco": { + "type": "object", + "properties": { + "lineage_path": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" + "errorMessage": "Path to directory containing lineages folder" }, - { + "lineage": { "type": "string", - "maxLength": 0 + "errorMessage": "busco lineage to run" } - ] + } } }, - "required": ["sample", "fastq_1"] + "required": [ + "busco", + "telomere", + "intron", + "synteny", + "self_comp", + "alignment", + "assem_reads", + "reference_file", + "assembly" + ] } } diff --git a/assets/self_comp/selfcomp.as b/assets/self_comp/selfcomp.as old mode 100644 new mode 100755 diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt old mode 100644 new mode 100755 index 7652fd11..3d299c57 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -26,7 +26,7 @@ Content-Disposition: inline; filename="sanger-tol-treeval_logo_light.png" join( '\n' ) %> <% -if (mqcFile){ +if ( mqcFile ){ def mqcFileObj = new File("$mqcFile") if (mqcFileObj.length() < mqcMaxSize){ out << """ diff --git a/assets/slackreport.json b/assets/slackreport.json old mode 100644 new mode 100755 index 1fcea494..bdc3b859 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -2,7 +2,7 @@ "attachments": [ { "fallback": "Plain-text summary of the attachment.", - "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "color": "<% if ( success ) { %>good<% } else { %>danger<%} %>", "author_name": "sanger-tol/treeval v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", diff --git a/assets/treeval_test.yaml b/assets/treeval_test.yaml deleted file mode 100644 index 53f38c44..00000000 --- a/assets/treeval_test.yaml +++ /dev/null @@ -1,27 +0,0 @@ -assembly: - sizeClass: { S if (genome => 4Gb) else L } - sample_id: ToLID - latin_name: to_provide_taxonomic_rank - classT: { nematode | insect | mammal | etc } - TicketType: { DTOL | VGP | ERGA | FACULTY | OTHER } -reference_file: path_to_input_genome -assem_reads: - pacbio: path_to_fasta - hic: path_to_cram - supplementary: path -alignment: - data_dir: gene_alignment_data - geneset: "csv of genesets" -self_comp: - motif_len: 0 - mummer_chunk: 10 -telomere: - teloseq: TTAGGG -synteny: - synteny_genome_path: syntenic_genome_path -busco: - lineages_path: path_to_lineages_folder - lineage: lineage -outdir: "NEEDS TESTING" -intron: - size: "50k" diff --git a/bin/gff_to_bed.sh b/bin/extract_cov_iden.sh similarity index 88% rename from bin/gff_to_bed.sh rename to bin/extract_cov_iden.sh index 12f96690..977a388b 100755 --- a/bin/gff_to_bed.sh +++ b/bin/extract_cov_iden.sh @@ -1,9 +1,9 @@ #!/bin/bash -# gff_to_bed.sh +# extract_cov_iden.sh # ------------------- # A shell script to convert a -# gff into bed format by stripping the +# extract coverage information from # PAF header and reorganising the data # ------------------- # Author = yy5 diff --git a/conf/base.config b/conf/base.config old mode 100644 new mode 100755 index c14bf74c..4e7932ac --- a/conf/base.config +++ b/conf/base.config @@ -17,27 +17,94 @@ process { maxRetries = 1 maxErrors = '-1' + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + + withLabel:error_ignore { + errorStrategy = 'ignore' + } + + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + + // CUSTOM CONFIGS + // TODO: add process.tiny + + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } + withName:SAMTOOLS_MERGE { cpus = { check_max( 16 * 1, 'cpus' ) } memory = { check_max( 50.GB * task.attempt, 'memory') } } // RESOURCES: MEMORY INTENSIVE STEPS, SOFTWARE TO BE UPDATED TO COMBAT THIS - withName: '.*:.*:SELFCOMP:(SELFCOMP_ALIGNMENTBLOCKS|SELFCOMP_MAPIDS|SELFCOMP_MUMMER2BED|SELFCOMP_SPLITFASTA|BEDTOOLS_MERGE)' { + withName: '.*:.*:SELFCOMP:(SELFCOMP_MAPIDS|SELFCOMP_MUMMER2BED|SELFCOMP_SPLITFASTA|BEDTOOLS_MERGE)' { cpus = { check_max( 10 * task.attempt, 'cpus' ) } memory = { check_max( 120.GB * task.attempt, 'memory' ) } time = { check_max( 12.h * task.attempt, 'time' ) } } + withName: SELFCOMP_ALIGNMENTBLOCKS { + cpus = { check_max( 20 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 18.h * task.attempt, 'time' ) } + } + + // RESOURCES: CHANGES TO FREQUENT FAILURES BELOW THIS MEM POINT - withName: '(MINIPROT_ALIGN|MINIMAP2_ALIGN)' { + withName: '.*:.*:GENE_ALIGNMENT:.*:(MINIPROT_ALIGN|MINIMAP2_ALIGN)' { memory = { check_max( 100.GB * Math.ceil( task.attempt * 1.5 ) , 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { - cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } + cpus = { check_max( 16 * 1, 'cpus' ) } + memory = { check_max( 300.GB * task.attempt, 'memory' ) } + time = { check_max( 36.h * task.attempt, 'time' ) } + } + + withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT' { + cpus = { check_max( 8 * 1, 'cpus' ) } } // 25GB * (task attempt * 2) = 50GB, 100GB, 150GB @@ -66,6 +133,11 @@ process { memory = { check_max( 16.GB * task.attempt, 'memory' ) } } + withName: SNAPSHOT_HRES { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 50.GB * task.attempt, 'memory' ) } + } + withName: JUICER_TOOLS_PRE { cpus = { check_max( 20 * task.attempt, 'cpus' ) } memory = { check_max( 100.GB * task.attempt, 'memory' ) } @@ -77,64 +149,18 @@ process { // add a cpus 16 if bam.size() >= 50GB withName: '(SAMTOOLS_MARKDUP|BAMTOBED_SORT)' { - cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 75.GB * task.attempt, 'memory' ) } + cpus = { check_max( 12 * 1, 'cpus' ) } + memory = { check_max( 100.GB * task.attempt, 'memory' ) } } withName: COOLER_CLOAD { - cpus = { check_max( 16 * 1, 'cpus' ) } + cpus = { check_max( 16 * 1, 'cpus' ) } memory = { check_max( 100.GB * task.attempt, 'memory' ) } } - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - - withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withLabel:process_medium { + withName: BUSCO { cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - - withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - } - - withLabel:error_ignore { - errorStrategy = 'ignore' - } - - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } - - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } } diff --git a/conf/digest.config b/conf/digest.config deleted file mode 100644 index 9cd3d209..00000000 --- a/conf/digest.config +++ /dev/null @@ -1,30 +0,0 @@ -params { - outdir = "output/" - publish_dir_mode = "copy" - singularity_pull_docker_container = false -} - -process { - cpus = 2 - memory = 3.GB - time = 2.h -} - -if ("$PROFILE" == "singularity") { - singularity.enabled = true - singularity.autoMounts = true -} else { - docker.enabled = true - docker.userEmulation = true - docker.runOptions = "--platform linux/x86_64" -} - -// Increase time available to build Conda environment -conda { createTimeout = "120 min" } - -// Load test_data.config containing paths to test data -//includeConfig 'test_data.config' - -manifest { - nextflowVersion = '!>=21.10.0' -} diff --git a/conf/full_s3_test.config b/conf/full_s3_test.config old mode 100644 new mode 100755 index 1674f7ca..89d7cecf --- a/conf/full_s3_test.config +++ b/conf/full_s3_test.config @@ -15,10 +15,10 @@ params { config_profile_description = 'Minimal Test Data for GitHub Actions test' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data - input = "${projectDir}/assets/full_s3_treeval_test.yaml" + input = "${projectDir}/assets/full_s3_treeval_test.yaml" } diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 7a1b3ac6..00000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,432 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} diff --git a/conf/modules.config b/conf/modules.config old mode 100644 new mode 100755 index 548ea8f0..40c44751 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,7 +22,7 @@ process { // Files to be uploaded to the TreeVal JBrowse2 instance // .genome, .gz.{tbi|csi}, .bigBed, .bigWig, .paf - withName: 'GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|.*:.*:SYNTENY:MINIMAP2_ALIGN' { + withName: 'GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|.*:.*:SYNTENY:MINIMAP2_ALIGN|.*:.*:GENERATE_GENOME:CUSTOM_GETCHROMSIZES' { publishDir = [ path: { "${params.outdir}/treeval_upload" }, mode: params.publish_dir_mode, @@ -32,7 +32,7 @@ process { // Files to be stored along side the TreeVal files for access by curators // all are .bed - withName: 'PAF2BED|GFF_TO_BED|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MAX|BEDTOOLS_MERGE_MIN' { + withName: 'PAF2BED|EXTRACT_COV_IDEN|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MAX|BEDTOOLS_MERGE_MIN' { publishDir = [ path: { "${params.outdir}/treeval_upload/punchlists" }, mode: params.publish_dir_mode, @@ -42,7 +42,7 @@ process { // Files to be used for pretext, likely to be deleted once the hic workflow is complete. // .bed, .hr.pretext, .lr.pretext, needs centromere} - withName: 'GAP_LENGTH|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD|COOLER_ZOOMIFY|.*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG|EXTRACT_TELO|JUICER_TOOLS_PRE' { + withName: 'SEQTK_CUTN|GAP_LENGTH|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES|GET_PAIRED_CONTACT_BED' { publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, @@ -61,7 +61,7 @@ process { withName: GNU_SORT_B { ext.args = { "-k1,1 -k2,2n" } - ext.suffix = { "genome" } + ext.suffix = { "sorted.genome" } } withName: GNU_SORT_C { @@ -84,6 +84,7 @@ process { withName: SEQTK_CUTN { ext.args = "-n 1" + ext.prefix = { "${meta.id}_gap" } } withName: MINIPROT_ALIGN { @@ -134,6 +135,9 @@ process { ext.args = "-n -b -c -L -l 400" } + // + // LONGREAD BLOCK + // withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { ext.args = "--MD -t 8" ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } @@ -148,6 +152,10 @@ process { ext.prefix = { "${meta.id}_merge" } } + withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}_sorted" } + } + withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' { ext.args = "-b -hF 256" ext.prefix = { "${meta.id}_view" } @@ -174,9 +182,12 @@ process { } withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = 'coverage' + ext.prefix = { "${meta.id}_coverage" } } + // + // TELOMERE BLOCK + // withName: 'FIND_TELOMERE_REGIONS' { ext.find_telomere = 'find_telomere' } @@ -190,6 +201,9 @@ process { ext.prefix = { "telo_${meta.id}" } } + // + // BUSCO BLOCK + // withName: '.*:.*:BUSCO_ANNOTATION:UCSC_BEDTOBIGBED' { ext.args = { "-type=bed3+4 -extraIndex=name,OrthoDBurl" } ext.prefix = { "${meta.id}_buscogene" } @@ -212,6 +226,9 @@ process { ext.args = "--mode genome" } + // + // HIC MAPPING BLOCK + // withName: PRETEXTMAP_STANDRD { ext.args = "--sortby length --mapq 0" ext.prefix = { "${meta.id}_normal" } @@ -222,6 +239,16 @@ process { ext.prefix = { "${meta.id}_hr" } } + withName: 'SNAPSHOT_SRES' { + ext.args = "--sequences '=full' --resolution 1440" + ext.prefix = { "${meta.id}_normal" } + } + + withName: 'SNAPSHOT_HRES' { + ext.args = "--sequences '=full' --resolution 1440" + ext.prefix = { "${meta.id}_hr" } + } + withName: JUICER_TOOLS_PRE { ext.juicer_tools_jar = 'juicer_tools.1.8.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms36g -Xmx36g' diff --git a/conf/s3_test.config b/conf/s3_test.config old mode 100644 new mode 100755 index bb684f90..65bc1f09 --- a/conf/s3_test.config +++ b/conf/s3_test.config @@ -15,10 +15,10 @@ params { config_profile_description = 'Minimal Test Data for GitHub Actions test' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data - input = "${projectDir}/assets/s3_treeval_test.yaml" + input = "${projectDir}/assets/s3_treeval_test.yaml" } diff --git a/conf/selfcomp.config b/conf/selfcomp.config deleted file mode 100644 index 9cd3d209..00000000 --- a/conf/selfcomp.config +++ /dev/null @@ -1,30 +0,0 @@ -params { - outdir = "output/" - publish_dir_mode = "copy" - singularity_pull_docker_container = false -} - -process { - cpus = 2 - memory = 3.GB - time = 2.h -} - -if ("$PROFILE" == "singularity") { - singularity.enabled = true - singularity.autoMounts = true -} else { - docker.enabled = true - docker.userEmulation = true - docker.runOptions = "--platform linux/x86_64" -} - -// Increase time available to build Conda environment -conda { createTimeout = "120 min" } - -// Load test_data.config containing paths to test data -//includeConfig 'test_data.config' - -manifest { - nextflowVersion = '!>=21.10.0' -} diff --git a/conf/test.config b/conf/test.config old mode 100644 new mode 100755 index 087f0165..c3a6bb5e --- a/conf/test.config +++ b/conf/test.config @@ -18,9 +18,9 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 6 - max_memory = '12.GB' - max_time = '6.h' + max_cpus = 6 + max_memory = '12.GB' + max_time = '6.h' - input = 'assets/local_testing/nxOscSUBSET.yaml' + input = 'assets/local_testing/nxOscSUBSET.yaml' } diff --git a/conf/test_full.config b/conf/test_full.config old mode 100644 new mode 100755 index e6e1025a..5d6a9617 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,8 +16,8 @@ cleanup = true params { - config_profile_name = 'Full local test profile' - config_profile_description = 'Full test dataset to check pipeline function, using a current full local dataset' + config_profile_name = 'Full local test profile' + config_profile_description = 'Full test dataset to check pipeline function, using a current full local dataset' - input = 'assets/local_testing/nxOscDF5033.yaml' + input = 'assets/local_testing/nxOscDF5033.yaml' } diff --git a/docs/README.md b/docs/README.md old mode 100644 new mode 100755 diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e47..00000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb8..00000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf5..00000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-treeval_logo_dark.png b/docs/images/nf-core-treeval_logo_dark.png deleted file mode 100644 index 9d2e6bb9..00000000 Binary files a/docs/images/nf-core-treeval_logo_dark.png and /dev/null differ diff --git a/docs/images/nf-core-treeval_logo_light.png b/docs/images/nf-core-treeval_logo_light.png deleted file mode 100644 index 4bd3b871..00000000 Binary files a/docs/images/nf-core-treeval_logo_light.png and /dev/null differ diff --git a/docs/images/treeval_1_0_gap_finder.jpeg b/docs/images/treeval_1_0_gap_finder.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_gene_alignment.jpeg b/docs/images/treeval_1_0_gene_alignment.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_hic_mapping.jpeg b/docs/images/treeval_1_0_hic_mapping.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_legend.jpeg b/docs/images/treeval_1_0_legend.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_longread_coverage.jpeg b/docs/images/treeval_1_0_longread_coverage.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_repeat_density.jpeg b/docs/images/treeval_1_0_repeat_density.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_synteny.jpeg b/docs/images/treeval_1_0_synteny.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_1_0_telo_finder.jpeg b/docs/images/treeval_1_0_telo_finder.jpeg old mode 100644 new mode 100755 diff --git a/docs/images/treeval_genealignment_workflow.jpeg b/docs/images/treeval_genealignment_workflow.jpeg deleted file mode 100644 index a81fa73a..00000000 Binary files a/docs/images/treeval_genealignment_workflow.jpeg and /dev/null differ diff --git a/docs/images/treeval_generategenome_workflow.jpeg b/docs/images/treeval_generategenome_workflow.jpeg deleted file mode 100644 index 82ded04e..00000000 Binary files a/docs/images/treeval_generategenome_workflow.jpeg and /dev/null differ diff --git a/docs/images/treeval_insilicodigest_workflow.jpeg b/docs/images/treeval_insilicodigest_workflow.jpeg deleted file mode 100644 index b525990f..00000000 Binary files a/docs/images/treeval_insilicodigest_workflow.jpeg and /dev/null differ diff --git a/docs/images/treeval_selfcomp_workflow.jpeg b/docs/images/treeval_selfcomp_workflow.jpeg deleted file mode 100644 index 47593de4..00000000 Binary files a/docs/images/treeval_selfcomp_workflow.jpeg and /dev/null differ diff --git a/docs/images/treeval_synteny_workflow.jpeg b/docs/images/treeval_synteny_workflow.jpeg deleted file mode 100644 index 01168b36..00000000 Binary files a/docs/images/treeval_synteny_workflow.jpeg and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 75100bc3..e760d706 100755 --- a/docs/output.md +++ b/docs/output.md @@ -1,35 +1,32 @@ # sanger-tol/treeval: Output -## Introduction +# Introduction This document describes the output produced by the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. -## Pipeline overview +# Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following workflows: -- [YAML_INPUT](#yamlinput) - Reads the input yaml and generates parameters used by other workflows. -- [GENERATE_GENOME](#generategenome) - Builds genome description file of the reference genome. -- [LONGREAD_COVERAGE](#longreadcoverage) - Produces read coverage based on pacbio long read fasta file. -- [GAP_FINDER](#gapfinder) - Identifies contig gaps in the input genome. -- [REPEAT_DENSITY](#repeatdensity) - Reports the intensity of regional repeats within an input assembly. -- [HIC_MAPPING](#hicmapping) - Aligns illumina HiC short reads to the input genome, generates mapping file in three format for visualisation: .pretext, .hic and .mcool -- [TELO_FINDER](#telofinder) - . -- [GENE_ALIGNMENT](#genealignment) - Aligns the peptide and nuclear data from assemblies of related species to the input genome. -- [INSILICO_DIGEST](#insilicodigest) - Generates a map of enzymatic digests using 3 Bionano enzymes. -- [SELFCOMP](#selfcomp) - Identifies regions of self-complementary sequence. -- [SYNTENY](#synteny) - Generates syntenic alignments between other high quality genomes. -- [BUSCO_ANALYSIS](#buscoanalysis) - Uses BUSCO to identify ancestral elements. Also use to identify ancestral Lepidopteran genes (merian units). +- [generate-genome](#generate-genome) - Builds genome description file of the reference genome. +- [longread-coverage](#longread-coverage) - Produces read coverage based on pacbio long read fasta file. +- [gap-finder](#gap-finder) - Identifies contig gaps in the input genome. +- [repeat-density](#repeat-density) - Reports the intensity of regional repeats within an input assembly. +- [hic-mapping](#hic-mapping) - Aligns illumina HiC short reads to the input genome, generates mapping file in three format for visualisation: .pretext, .hic and .mcool +- [telo-finder](#telo-finder) - Identifies regions of a user given telomeric sequence. +- [gene-alignment](#gene-alignment) - Aligns the peptide and nuclear data from assemblies of related species to the input genome. +- [insilico-digest](#insilico-digest) - Generates a map of enzymatic digests using 3 Bionano enzymes. +- [selfcomp](#selfcomp) - Identifies regions of self-complementary sequence. +- [synteny](#synteny) - Generates syntenic alignments between other high quality genomes. +- [busco-analysis](#busco-analysis) - Uses BUSCO to identify ancestral elements. Also use to identify ancestral Lepidopteran genes (merian units). -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [pipeline-information](#pipeline-information) - Report metrics generated during the workflow execution -### YAML_INPUT +## generate-genome -This subworkflow reads the input .yaml via the use of the built-in snakeyaml.Yaml component, which converts the yaml into a nested list. Via some simple channel manipulation, each item in this nexted list is converted into a parameter for use in each of the other subworkflows. - -### GENERATE_GENOME +This workflow generates a .genome file which describes the base pair length of each scaffold in the reference genome. This file is then recycled into the workflow to be used by a number of other subworkflows.
Output files @@ -39,13 +36,13 @@ This subworkflow reads the input .yaml via the use of the built-in snakeyaml.Yam
-This workflow generates a .genome file which describes the base pair length of each scaffold in the reference genome. This is performed by [SAMTOOLS_FAIDX](https://nf-co.re/modules/samtools_faidx) to generate a .fai file. This index file is trimmed using local module [GENERATE_GENOME_FILE](../modules/local/generate_genome_file.nf) to output a .genome file. This file is then recycled into the workflow to be used by a number of other subworkflows. - ![Generate genome workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_generate_genome.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### LONGREAD_COVERAGE +## longread-coverage + +Longread Coverage uses Pacbio HiC reads to generage a coverage bigWig as well as a trio of depth.bigbed files.
Output files @@ -59,7 +56,9 @@ This workflow generates a .genome file which describes the base pair length of e
-### GAP_FINDER +## gap-finder + +The gap-finder subworkflow generates a bed file containing the genomic locations of the gaps in the sequence. This file is injected into the hic_maps at a later stage. The output bed file is then BGzipped and indexed for display on JBrowse.
Output files @@ -72,13 +71,13 @@ This workflow generates a .genome file which describes the base pair length of e
-The GAP_FINDER subworkflow generates a bed file containing the genomic locations of the gaps in the sequence. This is performed by the use of [SEQTK_CUTN]() which cuts the input genome at sites of N (gaps). [GAP_LENGTH]() then calculates the lengths of gaps generates in the previous step, this file is injected into the hic_maps at a later stage. SEQTK's output bed file is then BGzipped and indexed by [TABIX_BGZIPTABIX](https://nf-co.re/modules/tabix_bgziptabix). - ![Gap Finder workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_gap_finder.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### REPEAT_DENSITY +## repeat-density + +This uses [WindowMasker](https://github.com/goeckslab/WindowMasker) to mark potential repeats on the genome. The genome is chunked into 10kb bins which move along the entire genome as sliding windows in order to profile the repeat intensity. These fragments are then mapped back to the original assembly for visualization purposes.
Output files @@ -87,13 +86,14 @@ The GAP_FINDER subworkflow generates a bed file containing the genomic locations - `*_repeat_density.bw`: Intersected read windows aligned to the reference genome in bigwig format.
-This uses [WindowMasker](https://github.com/goeckslab/WindowMasker) to mark potential repeats on the genome. The genome is chunked into 10kb bins which move along the entire genome as sliding windows in order to profile the repeat intensity. Bedtools is then used to intersect the bins and WindowMasker fragments. These fragments are then mapped back to the original assembly for visualization purposes. ![Repeat Density workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_repeat_density.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### HIC_MAPPING +## hic-mapping + +The hic-mapping subworkflow takes a set of HiC read files in .cram format as input and derives HiC mapping outputs in .pretext, .hic, and .mcool formats. These outputs are used for visualization on [PretextView](https://github.com/wtsi-hpag/PretextView), [Juicebox](https://github.com/aidenlab/Juicebox), and [Higlass](https://github.com/higlass/higlass) respectively.
Output files @@ -104,13 +104,14 @@ This uses [WindowMasker](https://github.com/goeckslab/WindowMasker) to mark pote - `*.mcool`: HiC map required for HiGlass
-The HIC_MAPPING subworkflow takes a set of HiC read files in .cram format as input and derives HiC mapping outputs in .pretext, .hic, and .mcool formats. These outputs are used for visualization on [PretextView](https://github.com/wtsi-hpag/PretextView), [Juicebox](https://github.com/aidenlab/Juicebox), and [Higlass](https://github.com/higlass/higlass) respectively. ![Hic Mapping workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_hic_mapping.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### TELO_FINDER +## telo-finder + +The telo-finder subworkflow uses a supplied (by the .yaml) telomeric sequence to identify putative telomeric regions in the input genome. The BGZipped and indexed file is used in JBrowse and as supplementary data for HiGlass and PreText.
Output files @@ -123,13 +124,13 @@ The HIC_MAPPING subworkflow takes a set of HiC read files in .cram format as inp
-The TELO_FINDER subworkflow uses a supplied (by the .yaml) telomeric sequence to identify putative telomeric regions in the input genome. This is acheived via the use of [FIND_TELOMERE_REGIONS](../modules/local/find_telomere_regions.nf), the output of which is used to generate a telomere.windows file with [FIND_TELOMERE_WINDOWS](../modules/local/find_telomere_windows.nf) (Both of these modules utilise VGP derived telomere programs [found here](https://github.com/VGP/vgp-assembly/tree/master/pipeline/telomere)), data for each telomeric site is then extracted into bed format with [EXTRACT_TELO](../modules/local/extract_telo.nf) and finally BGZipped and indexed with [TABIX_BGZIPTABIX](https://nf-co.re/modules/tabix_bgziptabix/tabix_bgziptabix). - ![Telomere Finder workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_telo_finder.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### BUSCO_ANALYSIS +## busco-analysis + +The BUSCO annotation subworkflow takes an assembly genome as input and extracts a list of [BUSCO](https://gitlab.com/ezlab/busco) genes based on the BUSCO results obtained from BUSCO. Additionally, it provides an overlap BUSCO gene set based on a list of lepidoptera ancestral genes (Wright et al., 2023), which has been investigated by Charlotte Wright from Mark Blaxter's lab at the Sanger Institute.
Output files @@ -140,13 +141,13 @@ The TELO_FINDER subworkflow uses a supplied (by the .yaml) telomeric sequence to
-The BUSCO_ANNOTATION subworkflow takes an assembly genome as input and extracts a list of [BUSCO](https://gitlab.com/ezlab/busco) genes based on the BUSCO results obtained from BUSCO. Additionally, it provides an overlap BUSCO gene set based on a list of lepidoptera ancestral genes((Wright et al., 2023), which has been investigated by Charlotte Wright from Mark Blaxter's lab at the Sanger Institute. - ![Busco analysis workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_busco_analysis.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### GENE_ALIGNMENT +## gene-alignment + +The gene alignment subworkflows load genesets (cdna, cds, rna, pep) data from a given list of genomes detailed, in the input .yaml, and aligns these to the reference genome. It contains two subworkflows, one of which handles peptide data and the other of which handles RNA, nuclear and complementary DNA data. These produce files that can be displayed by JBrowse as tracks.
Output files @@ -165,23 +166,13 @@ The BUSCO_ANNOTATION subworkflow takes an assembly genome as input and extracts
-The gene alignment subworkflows loads genesets (cdna, cds, rna, pep) data from a given list of genomes detailed, in the input .yaml, and aligns these to the reference genome. It contains two subworkflows, one of which handles peptide data and the other of which handles RNA, nuclear and complementary DNA data. These produce files that can be displayed by JBrowse as tracks. - -NUC_ALIGNMENTS: Reference fasta and fai files are aligned with the above mentioned gene alignment query files by [MINIMAP2_ALIGN](https://nf-co.re/modules/minimap2_align). -These are merged with [SAMTOOLS_MERGE](https://nf-co.re/modules/samtools_merge), converted to .bed format through [BEDTOOLS_BAMTOBED](https://nf-co.re/modules/bedtools_bamtobed), sorted via [BEDTOOLS_SORT](https://nf-co.re/modules/bedtools_sort) and finally converted to .bigBed format [UCSC_BEDTOBIGBED](https://nf-co.re/modules/ucsc_bedtobigbed) with the use of an auto SQL file found in the /assets/gene_alignment folder. This process is performed per species per data type. - -PEP_ALIGNMENTS: Reference fasta is indexed with [MINIPROT_INDEX](https://nf-co.re/modules/miniprot_index) and aligned with peptide data [MINIPROT_ALIGN](https://nf-co.re/modules/miniprot_align). The output .gff file is merged with [CAT_CAT](https://nf-co.re/modules/cat_cat) per species, sorted with [BEDTOOLS_SORT](https://nf-co.re/modules/bedtools_sort) and indexed with [TABIX_BGZIPTABIX](https://nf-co.re/modules/tabix_bgziptabix/tabix_bgziptabix). +![Gene alignment workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_gene_alignment.jpeg) -PUNCHLIST: Punchlists contain information on genes found to be duplicated (fully and partially) in the input genome. This is generated differently dependent on whether the datatype is peptide or not. - -- NUC_ALIGNMENT:PUNCHLIST takes the merged.bam produced after the [SAMTOOLS_MERGE](https://nf-co.re/modules/samtools_merge) step. This is then converted into a .paf file with [PAFTOOLS_SAM2PAF](https://github.com/nf-core/modules/tree/master/modules/nf-core/paftools/sam2paf) and finally into bed with [PAF2BED](../modules/local/paf_to_bed.nf). -- PEP_ALIGNMENT:PUNCHLIST takes the merged.gff produced by [CAT_CAT](https://nf-co.re/modules/cat_cat) and converts it into .bed with [GFF_TO_BED](../modules/local/gff_to_bed.nf) - -![Gene alignment workflow](images/treeval_1_0_gene_alignment.jpeg) +![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -![Workflow Legend](images/treeval_1_0_legend.jpeg) +## insilico-digest -### INSILICO_DIGEST +The insilico-digest workflow is used to visualize the Bionano enzyme cutting sites for a genomic FASTA file. This procedure generates data tracks based on three digestion enzymes: BSPQ1, BSSS1, and DLE1.
Output files @@ -191,13 +182,13 @@ PUNCHLIST: Punchlists contain information on genes found to be duplicated (fully
-The INSILICO_DIGEST workflow is used to visualize the Bionano enzyme cutting sites for a genome FASTA file. It starts by identifying the recognition sequences of the labeling enzyme to create a CMAP file. This CMAP file is then converted into BED and BIGBED formats to provide visualizations of the Bionano enzyme cutting sites. This procedure generates data tracks based on three digestion enzymes: BSPQ1, BSSS1, and DLE1. - ![Insilico digest workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_insilico_digest.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### SELFCOMP +## selfcomp + +The selfcomp subworkflow is a comparative genomics analysis originally performed by the Ensembl project. It involves comparing the genes and genomic sequences within a single species. The goal of the analysis is mainly to identify haplotypic duplications in a particular genome assembly.
Output files @@ -207,13 +198,13 @@ The INSILICO_DIGEST workflow is used to visualize the Bionano enzyme cutting sit
-he SELFCOMP subworkflow is a comparative genomics analysis originally performed by the Ensembl project. It involves comparing the genes and genomic sequences within a single species. The goal of the analysis is mainly to identify haplotypic duplications in a particular genome assembly. - ![Selfcomp workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_selfcomp.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### SYNTENY +## synteny + +This worflows searches along predetermined path for syntenic genome files based on clade and then aligns with [MINIMAP2_ALIGN](https://nf-co.re/modules/minimap2_align) each to the reference genome, emitting an aligned .paf file for each.
Output files @@ -223,22 +214,21 @@ he SELFCOMP subworkflow is a comparative genomics analysis originally performed
-This worflows searches along predetermined path for syntenic genome files based on clade and then aligns with [MINIMAP2_ALIGN](https://nf-co.re/modules/minimap2_align) each to the reference genome, emitting an aligned .paf file for each. - ![Synteny workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_synteny.jpeg) ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) -### Pipeline information +## pipeline-information + +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
Output files -- `pipeline_info/` +- `treeval_info/` + - Report generated by TreeValProject. - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md old mode 100644 new mode 100755 index 321329bf..7769c18d --- a/docs/usage.md +++ b/docs/usage.md @@ -8,28 +8,32 @@ The TreeVal pipeline has three requirements before being able to run -First, it requires a .yaml file (described below). This will contain all of the information required for the running of the TreeVal pipeline. +Firstly, it requires a .yaml file (described below in Full Samplesheet). This will contain all of the information required for running the TreeVal pipeline. -The cram files must each be acompanied by an index files generated by SAMTOOLS. +The cram files must each be accompanied by an index file (.crai) generated by samtools index. The gene alignment data is also expected to follow a particular folder structure, such as (using data from the yaml below): ```bash geneset = "Gae_host.Gae,CSKR_v2.CSKR" -{gene_alignment_dir}{classT}/{i for i in geneset.split(',')}/{cds|cdna|rna|pep}/ +for organism in geneset.split(',') + path = {gene_alignment_dir}{classT}/{organism}/csv-data/{organism}-data.csv +``` + +Each csv file acts as a file of file names. Containing data per file in a format such as: -/lustre/scratch123/tol/resources/treeval/gene_alignment_data/nematode/Gae_host.Gae/{cds|cdna|rna|pep}/Gae_host.Gae_100_cdna.fasta +```bash +{organism},{cdna|pep|cds|rna},/lustre/scratch123/tol/resources/treeval/gene_alignment_data/{classT}/{organism}/{cds|cdna|rna|pep}/{organism}_{cdna|pep|cds|rna}.fasta ``` -These folders will contain your fasta files. The data per organism will be described in a csv file found in: `{gene_alignment_dir}{classT}/csv_data/`, for example: `/lustre/scratch123/tol/resources/treeval/gene_alignment_data/nematode/csv_data` will contain two files, `Gae_host.Gae-data.csv and CSKR_v2.CSKR`. Each of these will contain the sample_id, data_type and absolute_path. For example, `CSKR_v2,cdna,/lustre/scratch123/tol/resources/treeval/gene_alignment_data/nematode/Gae_host.Gae/cdna/Gae_host.Gae_100_cdna.fasta` +It is advised that the fasta files be no larger than 50Mb, this allows the pipeline to run without wasting significant resources on large alignments. ## Full samplesheet -The samplesheet for this pipeline is as shown below. This yaml is parsed by the pipeline and converted into the relavent channels. +The samplesheet for this pipeline is as shown below. This yaml is parsed by the pipeline and converted into the relevant channels. - `assembly` - - `sizeClass`: {S | L} denotes the size of genome S = <4Gb. - `sample_id`: ToLID of the sample. - `latin_name`: Latin identification of species - `classT`: Clade name (as used to group synteny sequences and to complete alignment/data_dir). @@ -52,14 +56,14 @@ The samplesheet for this pipeline is as shown below. This yaml is parsed by the - `intron:` - `size`: base pair size of introns default is 50k - `telomere`: - - `teloseq`: Telomeric sequence + - `teloseq`: Telomeric motif - `busco` - `lineages_path`: path to folder above lineages folder - `lineage`: Example is nematode_odb10 -### BUSCO databases +### Note on using BUSCO -The pipeline requires the use of the BUSCO and AEA subworkflows. +The pipeline requires the use of the BUSCO subworkflows. Create the database directory and move into the directory: ```bash @@ -69,7 +73,7 @@ mkdir -p $BUSCO cd $BUSCO ``` -Download BUSCO data and lineages to allow BUSCO to run in offline mode: +Download BUSCO data and lineages to allow BUSCO to run in offline mode. ## Subworkflows @@ -121,70 +125,7 @@ nextflow run sanger-tol/treeval --input assets/treeval.yaml --outdir -p With the `treeval.yaml` containing: -You will need to create a .yaml with information about the sample you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to follow the structure shown in the example below. - -- `assembly` - - `sizeClass`: {S | L} denotes the size of genome S = <4Gb. - - `sample_id`: ToLID of the sample. - - `latin_name`: Latin identification of species - - `classT`: Clade name (as used to group synteny sequences and to complete alignment/data_dir). - - `TicketType`: -- `reference_file`: Sample .fa file -- `assem_reads` - - `pacbio`: path to folder containing fasta.gz files. - - `hic`: path to folder containing cram files - - `supplementary`: #Will be required in future development. -- `alignment` - - `data_dir`: Gene alignment data path - - `common_name`: # For future implementation (adding bee, wasp, ant etc) - - `geneset`: a csv list of geneset data to be used -- `self_comp` - - `motif_len`: Length of motif to be used in self complementary sequence finding - - `mummer_chunk`: Size of chunks used by MUMMER module. -- `synteny` - - `synteny_genome_path`: Path to syntenic genomes grouped by clade. -- `outdir`: Will be required in future development. -- `intron:` - - `size`: base pair size of introns default is 50k -- `telomere`: - - `teloseq`: Telomeric sequence -- `busco` - - `lineages_path`: path to folder above lineages folder - - `lineage`: Example is nematode_odb10 - -An example is shown below: - -```bash -assembly: - sizeClass: S - level: scaffold - sample_id: nxOscDoli1 - classT: nematode - TicketType: DTOL -reference_file: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa -assem_reads: - pacbio: path - hic: path - supplementary: path -alignment: - data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ - common_name: "" # For future implementation (adding bee, wasp, ant etc) - geneset: "Gae_host.Gae,CSKR_v2.CSKR" - #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" -self_comp: - motif_len: 0 - mummer_chunk: 10 -telomere: - teloseq: TTAGGG -synteny: - synteny_genome_path: /lustre/scratch123/tol/resources/treeval/synteny/ -busco: - lineages_path: /lustre/scratch123/tol/resources/busco/v5 - lineage: nematode_odb10 -outdir: "NEEDS TESTING" -intron: - size: "50k" -``` +You will need to create a .yaml with information about the sample you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to follow the structure shown in the full samplesheet shown above. ### Updating the pipeline diff --git a/lib/TreeValProject.groovy b/lib/TreeValProject.groovy new file mode 100755 index 00000000..775835ef --- /dev/null +++ b/lib/TreeValProject.groovy @@ -0,0 +1,56 @@ +class TreeValProject { + // + // Generates a small summary containing context for the input files + // Creates a new file containing this context + pipeline_execution data + // Will be used for graph generation. + // + + public static void summary(workflow, params) { + + def input_data = [:] + input_data['version'] = NfcoreTemplate.version( workflow ) + input_data['runName'] = workflow.runName + input_data['session_id'] = workflow.sessionId + input_data['duration'] = workflow.duration + input_data['DateStarted'] = workflow.start + input_data['DateCompleted'] = workflow.complete + + input_data['input_yaml'] = params.input + input_data['sample_name'] = params.sample_id.value + input_data['rf_data'] = params.rf_data.value + input_data['pb_data'] = params.pb_data.value + input_data['cm_data'] = params.cm_data.value + + if (workflow.success) { + + def output_directory = new File("${params.tracedir}/") + if (!output_directory.exists()) { + output_directory.mkdirs() + } + + def output_hf = new File(output_directory, "input_data_${params.trace_timestamp}.txt") + output_hf.write """\ + ---RUN_DATA--- + Pipeline_version: ${input_data.version} + Pipeline_runname: ${input_data.runName} + Pipeline_session: ${input_data.session_id} + Pipeline_duration: ${input_data.duration} + Pipeline_datastrt: ${input_data.DateStarted} + Pipeline_datecomp: ${input_data.DateCompleted} + ---INPUT_DATA--- + InputSampleID: ${input_data.sample_name} + InputYamlFile: ${input_data.input_yaml} + InputAssemblyData: ${input_data.rf_data} + Input_PacBio_Files: ${input_data.pb_data} + Input_Cram_Files: ${input_data.cm_data} + ---RESOURCES--- + """.stripIndent() + + def full_file = new File( output_directory, "TreeVal_run_${params.sample_id.value}_${params.trace_timestamp}.txt" ) + def file_locs = ["${params.tracedir}/input_data_${params.trace_timestamp}.txt", + "${params.tracedir}/pipeline_execution_${params.trace_timestamp}.txt"] + file_locs.each{ full_file.append( new File( it ).getText() ) } + + } + } +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar old mode 100644 new mode 100755 diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index 0757da9d..5e412368 --- a/main.nf +++ b/main.nf @@ -14,7 +14,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +WorkflowMain.initialise( workflow, params, log ) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -27,11 +27,11 @@ include { TREEVAL_RAPID } from './workflows/treeval_rapid' // WORKFLOW: Run main sanger-tol/treeval analysis pipeline workflow SANGERTOL_TREEVAL { - TREEVAL () + TREEVAL () } workflow SANGERTOL_TREEVAL_RAPID { - TREEVAL_RAPID () + TREEVAL_RAPID () } /* @@ -44,11 +44,11 @@ workflow SANGERTOL_TREEVAL_RAPID { // WORKFLOW: Execute named workflow for the pipeline // workflow FULL { - SANGERTOL_TREEVAL () + SANGERTOL_TREEVAL () } workflow RAPID { - SANGERTOL_TREEVAL_RAPID () + SANGERTOL_TREEVAL_RAPID () } /* diff --git a/modules.json b/modules.json old mode 100644 new mode 100755 index 615bf148..f2689dc0 --- a/modules.json +++ b/modules.json @@ -7,42 +7,42 @@ "nf-core": { "bedtools/bamtobed": { "branch": "master", - "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "bedtools/genomecov": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", - "git_sha": "da46ad4dfd38229e1514a81d3128ec7c30206f5c", + "git_sha": "c1532c77717ad7c64752b26b0fd9b4556bdef272", "installed_by": ["modules"] }, "bedtools/makewindows": { "branch": "master", - "git_sha": "007c99a9726dfd89198e788162db594cd29d426f", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bedtools/map": { "branch": "master", - "git_sha": "d3c433828498c6881adcc2ea3a93260fff1fe942", + "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "bedtools/sort": { "branch": "master", - "git_sha": "f1f473b21811b958d1317c4a97c56e16d3ee40f9", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "busco": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", "installed_by": ["modules"] }, "bwamem2/index": { @@ -52,17 +52,17 @@ }, "cat/cat": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { @@ -70,100 +70,109 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "d75b37fef175f241230ee25c485bd574c768e282", + "installed_by": ["modules"] + }, "gnu/sort": { "branch": "master", - "git_sha": "5427d51ca9aaf0b4c5919df6fa6c7a2f718ae2a8", + "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", "installed_by": ["modules"] }, "minimap2/index": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "miniprot/align": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", "installed_by": ["modules"] }, "miniprot/index": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", "installed_by": ["modules"] }, "mummer": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "paftools/sam2paf": { "branch": "master", - "git_sha": "2d34b7ffd1e672521d5480f368028ddd1fa5ea21", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", "installed_by": ["modules"] }, "pretextmap": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "decfb802f2e573efb7b44ff06b11ecf16853054d", + "installed_by": ["modules"] + }, + "pretextsnapshot": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", "installed_by": ["modules"] }, "seqtk/cutn": { "branch": "master", - "git_sha": "fc2e38dcf6b3cdbe858a83a9457c1b1e018a33b5", + "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15", - "installed_by": ["modules"], - "patch": "modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff" + "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247", + "installed_by": ["modules"] }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "ucsc/bedtobigbed": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", "installed_by": ["modules"] }, "windowmasker/mk_counts": { "branch": "master", - "git_sha": "d33e1ba8b6806fa096071eb515622da753a4b8e5", + "git_sha": "30c3ed32e8bd5ddaf349ba2f4f99d38182fdc08c", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "d33e1ba8b6806fa096071eb515622da753a4b8e5", + "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", "installed_by": ["modules"] } } diff --git a/modules/local/assign_ancestral.nf b/modules/local/assign_ancestral.nf old mode 100644 new mode 100755 index a7e7d3b5..43758c92 --- a/modules/local/assign_ancestral.nf +++ b/modules/local/assign_ancestral.nf @@ -1,6 +1,6 @@ process ASSIGN_ANCESTRAL { tag "$meta.id" - label "process_low" + label 'process_low' conda "conda-forge::python=3.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf old mode 100644 new mode 100755 index 22c330a3..3af4fbc4 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -14,10 +14,11 @@ process BAMTOBED_SORT { path "versions.yml" , emit: versions script: - def prefix = args.ext.prefix ?: "${meta.id}" - def st_cores = task.cpus > 4 ? 4 : "${task.cpus}" + def prefix = args.ext.prefix ?: "${meta.id}" + def st_cores = task.cpus > 4 ? 4 : "${task.cpus}" + def buffer_mem = task.memory.toGiga() / 2 """ - samtools view -@${st_cores} -u -F0x400 ${bam} | bamToBed | sort -k4 --parallel=${task.cpus} -S ${task.memory.toGiga()}G > ${prefix}_merged_sorted.bed + samtools view -@${st_cores} -u -F0x400 ${bam} | bamToBed | sort -k4 --parallel=${task.cpus} -S ${buffer_mem}G > ${prefix}_merged_sorted.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/chunkfasta.nf b/modules/local/chunkfasta.nf index 2e9d785f..266f19be 100755 --- a/modules/local/chunkfasta.nf +++ b/modules/local/chunkfasta.nf @@ -1,6 +1,6 @@ process CHUNKFASTA { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::pyfasta=0.5.2-1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf old mode 100644 new mode 100755 index fe7dff81..f65298aa --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -32,7 +32,7 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') END_VERSIONS """ - // temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') + // temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') CAUSES ERROR stub: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/extract_ancestral.nf b/modules/local/extract_ancestral.nf old mode 100644 new mode 100755 index 4f29462a..8c5c509c --- a/modules/local/extract_ancestral.nf +++ b/modules/local/extract_ancestral.nf @@ -1,6 +1,6 @@ process EXTRACT_ANCESTRAL { tag "$meta.id" - label "process_low" + label 'process_low' conda "conda-forge::python=3.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/extract_buscogene.nf b/modules/local/extract_buscogene.nf old mode 100644 new mode 100755 index 7ddc0d9b..42d6fb18 --- a/modules/local/extract_buscogene.nf +++ b/modules/local/extract_buscogene.nf @@ -1,6 +1,6 @@ process EXTRACT_BUSCOGENE { tag "$meta.id" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/gff_to_bed.nf b/modules/local/extract_cov_iden.nf old mode 100644 new mode 100755 similarity index 85% rename from modules/local/gff_to_bed.nf rename to modules/local/extract_cov_iden.nf index 687a4429..871d4ec4 --- a/modules/local/gff_to_bed.nf +++ b/modules/local/extract_cov_iden.nf @@ -1,6 +1,6 @@ -process GFF_TO_BED { +process EXTRACT_COV_IDEN { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -18,11 +18,11 @@ process GFF_TO_BED { def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - gff_to_bed.sh ${file} ${prefix}.bed + extract_cov_iden.sh ${file} ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": - gff_to_bed: \$(gff_to_bed.sh -v) + extract_cov_iden: \$(extract_cov_iden.sh -v) coreutils: $VERSION END_VERSIONS """ @@ -35,7 +35,7 @@ process GFF_TO_BED { cat <<-END_VERSIONS > versions.yml "${task.process}": - gff_to_bed: \$(gff_to_bed.sh -v) + extract_cov_iden: \$(extract_cov_iden.sh -v) coreutils: $VERSION END_VERSIONS """ diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf old mode 100644 new mode 100755 index 13f2898c..801c9b45 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -18,7 +18,7 @@ process EXTRACT_REPEAT { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ extract_repeat.pl $file > ${prefix}_repeats.bed @@ -31,7 +31,7 @@ process EXTRACT_REPEAT { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}_repeats.bed diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf old mode 100644 new mode 100755 index 8cd784d2..f78e0a36 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -1,6 +1,6 @@ process EXTRACT_TELO { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/find_telomere_regions.nf b/modules/local/find_telomere_regions.nf old mode 100644 new mode 100755 index f0d7c669..6f0d56c7 --- a/modules/local/find_telomere_regions.nf +++ b/modules/local/find_telomere_regions.nf @@ -1,6 +1,6 @@ process FIND_TELOMERE_REGIONS { tag "${meta.id}" - label "process_low" + label 'process_low' container 'docker.io/library/gcc:7.1.0' diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf old mode 100644 new mode 100755 index da99258c..ac9584e2 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -1,6 +1,6 @@ process FIND_TELOMERE_WINDOWS { tag "${meta.id}" - label "process_low" + label 'process_low' conda "bioconda::java-jdk=8.0.112" container "${ workflow.containerEngine == 'singularity' && diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf old mode 100644 new mode 100755 index 32737202..9240de0e --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -11,7 +11,7 @@ process GAP_LENGTH { tuple val( meta ), path( file ) output: - tuple val( meta ), file( "*bedgraph" ) , emit: bed + tuple val( meta ), file( "*bedgraph" ) , emit: bedgraph path "versions.yml" , emit: versions shell: @@ -19,7 +19,7 @@ process GAP_LENGTH { def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. $/ cat "${file}" \ - | awk '{print $0"\t"sqrt(($3-$2)*($3-$2))}' > pretext_${prefix}_gap.bedgraph + | awk '{print $0"\t"sqrt(($3-$2)*($3-$2))}' > ${prefix}_gap.bedgraph cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -31,7 +31,7 @@ process GAP_LENGTH { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" """ - touch ${prefix}_gap.bed + touch ${prefix}_gap.bedgraph cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index 72f90a33..6b6fe62c 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -1,6 +1,6 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" - label "process_low" + label 'process_low' conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/generate_genome_file.nf b/modules/local/generate_genome_file.nf deleted file mode 100644 index 74a0a962..00000000 --- a/modules/local/generate_genome_file.nf +++ /dev/null @@ -1,38 +0,0 @@ -process GENERATE_GENOME_FILE { - tag "${meta.id}" - label "process_low" - - conda "conda-forge::coreutils=9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" - - input: - tuple val( meta ), path( fai ) - - output: - tuple val( meta ), file( "my.genome" ) , emit: dotgenome - path "versions.yml" , emit: versions - - script: - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - awk -F"\t" '{print \$1"\t"\$2}' $fai |sort > my.genome - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - coreutils: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = "9.1" - """ - touch my.genome - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - coreutils: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf old mode 100644 new mode 100755 index 1819f4dd..3e3fc2d7 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -16,7 +16,7 @@ process GET_LARGEST_SCAFF { path "versions.yml" , emit: versions shell: - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. $/ largest_scaff=`head -n 1 "${file}" | cut -d$'\t' -f2` @@ -27,8 +27,8 @@ process GET_LARGEST_SCAFF { /$ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ largest_scaff=1000000 diff --git a/modules/local/get_paired_contact_bed.nf b/modules/local/get_paired_contact_bed.nf index 8882e857..308ea84a 100755 --- a/modules/local/get_paired_contact_bed.nf +++ b/modules/local/get_paired_contact_bed.nf @@ -17,7 +17,7 @@ process GET_PAIRED_CONTACT_BED { script: def pulled = '-T sort_tmp' """ - bed_to_contacts.sh $file > ${meta.id}_paired.bed + bed_to_contacts.sh $file > pre.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/get_synteny_genomes.nf b/modules/local/get_synteny_genomes.nf old mode 100644 new mode 100755 index 8e2b4a5f..933ebb57 --- a/modules/local/get_synteny_genomes.nf +++ b/modules/local/get_synteny_genomes.nf @@ -16,7 +16,7 @@ process GET_SYNTENY_GENOMES { path "versions.yml" , emit: versions script: - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ if [ ! -d ${synteny_path}${assembly_classT}/ ] || [ -z "\$(ls -A ${synteny_path}${assembly_classT}/)" ] then @@ -34,7 +34,7 @@ process GET_SYNTENY_GENOMES { """ stub: - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch empty.fasta diff --git a/modules/local/makecmap_cmap2bed.nf b/modules/local/makecmap_cmap2bed.nf old mode 100644 new mode 100755 index 90addde9..533abd79 --- a/modules/local/makecmap_cmap2bed.nf +++ b/modules/local/makecmap_cmap2bed.nf @@ -19,8 +19,8 @@ process MAKECMAP_CMAP2BED { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ grep -v '#' $cmap > ${prefix}_${enzyme}_edited.cmap cmap2bed.py -t ${prefix}_${enzyme}_edited.cmap -z $enzyme | sort -k1,1 -k2,2n > ${enzyme}.bed @@ -33,7 +33,7 @@ process MAKECMAP_CMAP2BED { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_${enzyme}.bed diff --git a/modules/local/makecmap_fa2cmapmulticolor.nf b/modules/local/makecmap_fa2cmapmulticolor.nf old mode 100644 new mode 100755 index 0adeba13..6aa8e406 --- a/modules/local/makecmap_fa2cmapmulticolor.nf +++ b/modules/local/makecmap_fa2cmapmulticolor.nf @@ -20,7 +20,7 @@ process MAKECMAP_FA2CMAPMULTICOLOR { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' """ fa2cmap_multi_color.pl -i $fasta -e $enzyme 1 $args diff --git a/modules/local/makecmap_renamecmapids.nf b/modules/local/makecmap_renamecmapids.nf old mode 100644 new mode 100755 index 1f158a01..ef162457 --- a/modules/local/makecmap_renamecmapids.nf +++ b/modules/local/makecmap_renamecmapids.nf @@ -19,8 +19,8 @@ process MAKECMAP_RENAMECMAPIDS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ rename_cmapids.pl -cmapfile $cmap -idx_key $keys $args > ${prefix}_EDITED.cmap @@ -33,7 +33,7 @@ process MAKECMAP_RENAMECMAPIDS { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_EDITED.cmap diff --git a/modules/local/paf_to_bed.nf b/modules/local/paf_to_bed.nf old mode 100644 new mode 100755 index 3e2e9db3..a980c2fd --- a/modules/local/paf_to_bed.nf +++ b/modules/local/paf_to_bed.nf @@ -1,6 +1,6 @@ process PAF2BED { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf old mode 100644 new mode 100755 index fdd96954..0842faef --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -1,6 +1,6 @@ process REFORMAT_INTERSECT { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf old mode 100644 new mode 100755 index 9e64f324..545d0c4a --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -1,6 +1,6 @@ process RENAME_IDS { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf old mode 100644 new mode 100755 index fb0d48e8..0f266ca2 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -1,6 +1,6 @@ process REPLACE_DOTS { tag "${meta.id}" - label "process_low" + label 'process_low' conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/selfcomp_alignmentblocks.nf b/modules/local/selfcomp_alignmentblocks.nf old mode 100644 new mode 100755 index d54dc8e8..5d23431a --- a/modules/local/selfcomp_alignmentblocks.nf +++ b/modules/local/selfcomp_alignmentblocks.nf @@ -18,8 +18,8 @@ process SELFCOMP_ALIGNMENTBLOCKS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ build_alignment_block.py $args -i $bedfile -o ${prefix}_chained.block @@ -33,7 +33,7 @@ process SELFCOMP_ALIGNMENTBLOCKS { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_chained.block diff --git a/modules/local/selfcomp_mapids.nf b/modules/local/selfcomp_mapids.nf old mode 100644 new mode 100755 index 42fc9b34..c19d7393 --- a/modules/local/selfcomp_mapids.nf +++ b/modules/local/selfcomp_mapids.nf @@ -19,8 +19,8 @@ process SELFCOMP_MAPIDS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ mapids.py -i $bed -r $agp > ${prefix}_mapped.bed @@ -32,8 +32,8 @@ process SELFCOMP_MAPIDS { """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_mapped.bed diff --git a/modules/local/selfcomp_mummer2bed.nf b/modules/local/selfcomp_mummer2bed.nf old mode 100644 new mode 100755 index 1087523c..c38f6d88 --- a/modules/local/selfcomp_mummer2bed.nf +++ b/modules/local/selfcomp_mummer2bed.nf @@ -19,8 +19,8 @@ process SELFCOMP_MUMMER2BED { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ mummer2bed.py $args -i $mummerfile -l $motiflen > ${prefix}.bed @@ -33,8 +33,8 @@ process SELFCOMP_MUMMER2BED { """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bed diff --git a/modules/local/selfcomp_splitfasta.nf b/modules/local/selfcomp_splitfasta.nf old mode 100644 new mode 100755 index d1aa7edb..be4584b2 --- a/modules/local/selfcomp_splitfasta.nf +++ b/modules/local/selfcomp_splitfasta.nf @@ -19,9 +19,9 @@ process SELFCOMP_SPLITFASTA { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.7.8-1" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.7.8-1" """ split_genomes_for_ensembl.pl $fasta ${prefix}_split.fa ${prefix}_split.agp @@ -34,9 +34,9 @@ process SELFCOMP_SPLITFASTA { """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.7.8-1" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.7.8-1" """ touch ${prefix}_split.agp touch ${prefix}_split.fa diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf old mode 100644 new mode 100755 index e9673571..ab8a6ffb --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_BAMTOBED { tag "$meta.id" label 'process_medium' - conda "bioconda::bedtools=2.30.0" + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(bam) @@ -32,4 +32,15 @@ process BEDTOOLS_BAMTOBED { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml old mode 100644 new mode 100755 index 5a4ff73a..49cc83d9 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -3,6 +3,9 @@ description: Converts a bam file to a bed12 file. keywords: - bam - bed + - bedtools + - bamtobed + - converter tools: - bedtools: description: | diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf old mode 100644 new mode 100755 index 17e38a8b..d2a2f206 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_GENOMECOV { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(intervals), val(scale) @@ -56,4 +56,15 @@ process BEDTOOLS_GENOMECOV { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml old mode 100644 new mode 100755 index 83bfab98..efd6e129 --- a/modules/nf-core/bedtools/genomecov/meta.yml +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -4,6 +4,8 @@ keywords: - bed - bam - genomecov + - bedtools + - histogram tools: - bedtools: description: | @@ -21,7 +23,7 @@ input: description: BAM/BED/GFF/VCF pattern: "*.{bam|bed|gff|vcf}" - scale: - type: value + type: integer description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch - sizes: type: file diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf old mode 100644 new mode 100755 index f966baff..6805582e --- a/modules/nf-core/bedtools/intersect/main.nf +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -5,7 +5,7 @@ process BEDTOOLS_INTERSECT { conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: tuple val(meta), path(intervals1), path(intervals2) @@ -22,7 +22,7 @@ process BEDTOOLS_INTERSECT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" //Extension of the output file. It is set by the user via "ext.suffix" in the config. Corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). - extension = task.ext.suffix ?: "bed" + extension = task.ext.suffix ?: "${intervals1.extension}" def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' if ("$intervals1" == "${prefix}.${extension}" || "$intervals2" == "${prefix}.${extension}") diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml old mode 100644 new mode 100755 index c796f7d5..f2848967 --- a/modules/nf-core/bedtools/intersect/meta.yml +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -3,6 +3,7 @@ description: Allows one to screen for overlaps between two sets of genomic featu keywords: - bed - intersect + - overlap tools: - bedtools: description: | @@ -29,7 +30,7 @@ input: Groovy Map containing reference chromosome sizes e.g. [ id:'test' ] - chrom_sizes: - type: optional file + type: file description: Chromosome sizes file pattern: "*{.sizes,.txt}" output: diff --git a/modules/nf-core/bedtools/makewindows/main.nf b/modules/nf-core/bedtools/makewindows/main.nf old mode 100644 new mode 100755 index 4c1246a0..96dcff15 --- a/modules/nf-core/bedtools/makewindows/main.nf +++ b/modules/nf-core/bedtools/makewindows/main.nf @@ -5,7 +5,7 @@ process BEDTOOLS_MAKEWINDOWS { conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_1' : - 'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_1' }" + 'biocontainers/bedtools:2.30.0--h7d7f7ad_1' }" input: tuple val(meta), path(regions) diff --git a/modules/nf-core/bedtools/makewindows/meta.yml b/modules/nf-core/bedtools/makewindows/meta.yml old mode 100644 new mode 100755 index 307f4cd2..f543da69 --- a/modules/nf-core/bedtools/makewindows/meta.yml +++ b/modules/nf-core/bedtools/makewindows/meta.yml @@ -32,10 +32,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - tab: + - bed: type: file description: BED file containing the windows - pattern: "*.tab" + pattern: "*.bed" authors: - "@kevbrick" - "@nvnieuwk" diff --git a/modules/nf-core/bedtools/map/main.nf b/modules/nf-core/bedtools/map/main.nf old mode 100644 new mode 100755 index c7dceb5c..846d5ba2 --- a/modules/nf-core/bedtools/map/main.nf +++ b/modules/nf-core/bedtools/map/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_MAP { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(intervals1), path(intervals2) diff --git a/modules/nf-core/bedtools/map/meta.yml b/modules/nf-core/bedtools/map/meta.yml old mode 100644 new mode 100755 index 71fe42e8..b0ce79d2 --- a/modules/nf-core/bedtools/map/meta.yml +++ b/modules/nf-core/bedtools/map/meta.yml @@ -2,7 +2,10 @@ name: bedtools_map description: Allows one to screen for overlaps between two sets of genomic features. keywords: - bed + - vcf + - gff - map + - bedtools tools: - bedtools: description: | diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf old mode 100644 new mode 100755 index 21b2e645..6868d39f --- a/modules/nf-core/bedtools/merge/main.nf +++ b/modules/nf-core/bedtools/merge/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_MERGE { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(bed) @@ -33,4 +33,15 @@ process BEDTOOLS_MERGE { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml old mode 100644 new mode 100755 index 76743679..82248afe --- a/modules/nf-core/bedtools/merge/meta.yml +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -3,6 +3,8 @@ description: combines overlapping or “book-ended” features in an interval fi keywords: - bed - merge + - bedtools + - overlapped bed tools: - bedtools: description: | diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf old mode 100644 new mode 100755 index a0ddddd1..df372bc5 --- a/modules/nf-core/bedtools/sort/main.nf +++ b/modules/nf-core/bedtools/sort/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_SORT { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(intervals) @@ -39,4 +39,16 @@ process BEDTOOLS_SORT { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: intervals.extension + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml old mode 100644 new mode 100755 index 1b6ebbcb..3a3b4e4d --- a/modules/nf-core/bedtools/sort/meta.yml +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -3,6 +3,8 @@ description: Sorts a feature file by chromosome and other criteria. keywords: - bed - sort + - bedtools + - chromosome tools: - bedtools: description: | diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/main.nf old mode 100644 new mode 100755 diff --git a/modules/nf-core/busco/meta.yml b/modules/nf-core/busco/meta.yml old mode 100644 new mode 100755 index ef8c5245..cdc9dd46 --- a/modules/nf-core/busco/meta.yml +++ b/modules/nf-core/busco/meta.yml @@ -25,7 +25,7 @@ input: description: Nucleic or amino acid sequence file in FASTA format. pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - lineage: - type: value + type: string description: The BUSCO lineage to use, or "auto" to automatically select lineage - busco_lineages_path: type: directory diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf old mode 100644 new mode 100755 diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf old mode 100644 new mode 100755 index 840af4b9..9f062219 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -5,7 +5,7 @@ process CAT_CAT { conda "conda-forge::pigz=2.3.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'quay.io/biocontainers/pigz:2.3.4' }" + 'biocontainers/pigz:2.3.4' }" input: tuple val(meta), path(files_in) diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml old mode 100644 new mode 100755 index 5eeff5a6..8acc0bfa --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,9 @@ keywords: tools: - cat: description: Just concatenation - homepage: None + documentation: https://man7.org/linux/man-pages/man1/cat.1.html - tool_dev_url: None + licence: ["GPL-3.0-or-later"] input: - meta: diff --git a/modules/nf-core/cooler/cload/main.nf b/modules/nf-core/cooler/cload/main.nf old mode 100644 new mode 100755 index 80109d48..4863cc63 --- a/modules/nf-core/cooler/cload/main.nf +++ b/modules/nf-core/cooler/cload/main.nf @@ -2,10 +2,10 @@ process COOLER_CLOAD { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "bioconda::cooler=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(pairs), path(index), val(cool_bin) @@ -36,4 +36,15 @@ process COOLER_CLOAD { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml old mode 100644 new mode 100755 index 8513aaec..953c8337 --- a/modules/nf-core/cooler/cload/meta.yml +++ b/modules/nf-core/cooler/cload/meta.yml @@ -2,6 +2,9 @@ name: cooler_cload description: Create a cooler from genomic pairs and bins keywords: - cool + - cooler + - cload + - hic tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -24,7 +27,7 @@ input: type: file description: Path to index file of the contacts. - cool_bin: - type: value + type: integer description: Bins size in bp - chromsizes: type: file @@ -45,7 +48,7 @@ output: description: Output COOL file path pattern: "*.cool" - cool_bin: - type: value + type: integer description: Bins size in bp authors: diff --git a/modules/nf-core/cooler/zoomify/main.nf b/modules/nf-core/cooler/zoomify/main.nf old mode 100644 new mode 100755 index 95e7daff..cd210cf7 --- a/modules/nf-core/cooler/zoomify/main.nf +++ b/modules/nf-core/cooler/zoomify/main.nf @@ -2,10 +2,10 @@ process COOLER_ZOOMIFY { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "bioconda::cooler=0.9.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(cool) @@ -32,4 +32,15 @@ process COOLER_ZOOMIFY { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.mcool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml old mode 100644 new mode 100755 index 57f55486..27dfe46a --- a/modules/nf-core/cooler/zoomify/meta.yml +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -2,6 +2,8 @@ name: cooler_zoomify description: Generate a multi-resolution cooler file by coarsening keywords: - mcool + - cool + - cooler tools: - cooler: description: Sparse binary format for genomic interaction matrices diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf old mode 100644 new mode 100755 diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100755 index 00000000..1fd1e768 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,51 @@ +process CUSTOM_GETCHROMSIZES { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + val suffix + + output: + tuple val(meta), path ("*.${suffix}") , emit: sizes + tuple val(meta), path ("*.fa") , emit: fasta + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools faidx $fasta -o ${prefix}.fa.fai + cut -f 1,2 ${prefix}.fa.fai > ${prefix}.${suffix} + + if [[ "${fasta}" != "${prefix}-ref.fa" ]]; then + mv ${fasta} ${prefix}-ref.fa + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${prefix}.fai + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml new file mode 100755 index 00000000..219ca1d8 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -0,0 +1,53 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna,fas}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf old mode 100644 new mode 100755 index e2b0bc9e..b0a57fbb --- a/modules/nf-core/gnu/sort/main.nf +++ b/modules/nf-core/gnu/sort/main.nf @@ -2,10 +2,10 @@ process GNU_SORT { tag "${meta.id}" label "process_low" - conda "conda-forge::coreutils=9.1" + conda "bioconda::coreutils=8.25" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/coreutils:8.25--1' : + 'biocontainers/coreutils:8.25--1' }" input: tuple val(meta), path(input) @@ -23,7 +23,7 @@ process GNU_SORT { suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ sort ${args} ${input} > ${output_file} @@ -39,6 +39,8 @@ process GNU_SORT { suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" def VERSION = "9.1" + + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ sort ${args} ${input} > ${output_file} diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml old mode 100644 new mode 100755 index d53317a6..e7fb0284 --- a/modules/nf-core/gnu/sort/meta.yml +++ b/modules/nf-core/gnu/sort/meta.yml @@ -4,9 +4,10 @@ description: | keywords: - GNU - sort + - merge compare tools: - sort: - description: "Writes a sorted consatenation of file/s" + description: "Writes a sorted concatenation of file/s" homepage: "https://github.com/vgl-hub/gfastats" documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" licence: ["GPL"] diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf old mode 100644 new mode 100755 index 430dbab9..4da47c18 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -6,7 +6,7 @@ process MINIMAP2_ALIGN { conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf old mode 100644 new mode 100755 index 73dd4eef..7a1bb227 --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -5,7 +5,7 @@ process MINIMAP2_INDEX { conda "bioconda::minimap2=2.24" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h7132678_1' : - 'quay.io/biocontainers/minimap2:2.24--h7132678_1' }" + 'biocontainers/minimap2:2.24--h7132678_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/miniprot/align/main.nf b/modules/nf-core/miniprot/align/main.nf old mode 100644 new mode 100755 index 027dadf6..9a1f1184 --- a/modules/nf-core/miniprot/align/main.nf +++ b/modules/nf-core/miniprot/align/main.nf @@ -2,10 +2,10 @@ process MINIPROT_ALIGN { tag "$meta.id" label 'process_medium' - conda "bioconda::miniprot=0.5" + conda "bioconda::miniprot=0.11=he4a0461_2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/miniprot:0.5--h7132678_0': - 'quay.io/biocontainers/miniprot:0.5--h7132678_0' }" + 'https://depot.galaxyproject.org/singularity/miniprot:0.11--he4a0461_2': + 'biocontainers/miniprot:0.11--he4a0461_2' }" input: tuple val(meta), path(pep) @@ -36,4 +36,16 @@ process MINIPROT_ALIGN { miniprot: \$(miniprot --version 2>&1) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--gff") ? "gff" : "paf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + miniprot: \$(miniprot --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/miniprot/align/meta.yml b/modules/nf-core/miniprot/align/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/miniprot/index/main.nf b/modules/nf-core/miniprot/index/main.nf old mode 100644 new mode 100755 index 1c342abf..ee3757b6 --- a/modules/nf-core/miniprot/index/main.nf +++ b/modules/nf-core/miniprot/index/main.nf @@ -2,10 +2,10 @@ process MINIPROT_INDEX { tag "$meta.id" label 'process_medium' - conda "bioconda::miniprot=0.5" + conda "bioconda::miniprot=0.11=he4a0461_2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/miniprot:0.5--h7132678_0': - 'quay.io/biocontainers/miniprot:0.5--h7132678_0' }" + 'https://depot.galaxyproject.org/singularity/miniprot:0.11--he4a0461_2': + 'biocontainers/miniprot:0.11--he4a0461_2' }" input: tuple val(meta), path(fasta) @@ -19,7 +19,6 @@ process MINIPROT_INDEX { script: def args = task.ext.args ?: '' - """ miniprot \\ -t $task.cpus \\ @@ -32,4 +31,14 @@ process MINIPROT_INDEX { miniprot: \$(miniprot --version 2>&1) END_VERSIONS """ + + stub: + """ + touch ${fasta.baseName}.mpi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + miniprot: \$(miniprot --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/miniprot/index/meta.yml b/modules/nf-core/miniprot/index/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/mummer/main.nf b/modules/nf-core/mummer/main.nf old mode 100644 new mode 100755 index 9229f9d5..16387d1d --- a/modules/nf-core/mummer/main.nf +++ b/modules/nf-core/mummer/main.nf @@ -6,7 +6,7 @@ process MUMMER { conda "bioconda::mummer=3.23" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mummer:3.23--pl5262h1b792b2_12' : - 'quay.io/biocontainers/mummer:3.23--pl5262h1b792b2_12' }" + 'biocontainers/mummer:3.23--pl5262h1b792b2_12' }" input: tuple val(meta), path(ref), path(query) @@ -45,4 +45,16 @@ process MUMMER { mummer: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '3.23' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefx}.coords + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mummer: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/mummer/meta.yml b/modules/nf-core/mummer/meta.yml old mode 100644 new mode 100755 index ec4f0c86..f03d483c --- a/modules/nf-core/mummer/meta.yml +++ b/modules/nf-core/mummer/meta.yml @@ -10,7 +10,7 @@ tools: homepage: http://mummer.sourceforge.net/ documentation: http://mummer.sourceforge.net/ tool_dev_url: http://mummer.sourceforge.net/ - doi: https://doi.org/10.1186/gb-2004-5-2-r12 + doi: 10.1186/gb-2004-5-2-r12 licence: ["The Artistic License"] input: diff --git a/modules/nf-core/paftools/sam2paf/main.nf b/modules/nf-core/paftools/sam2paf/main.nf old mode 100644 new mode 100755 index e881d771..ae9c0ff1 --- a/modules/nf-core/paftools/sam2paf/main.nf +++ b/modules/nf-core/paftools/sam2paf/main.nf @@ -6,7 +6,7 @@ process PAFTOOLS_SAM2PAF { conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/paftools/sam2paf/meta.yml b/modules/nf-core/paftools/sam2paf/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/pretextmap/main.nf b/modules/nf-core/pretextmap/main.nf old mode 100644 new mode 100755 index a4171ab8..f7a5313d --- a/modules/nf-core/pretextmap/main.nf +++ b/modules/nf-core/pretextmap/main.nf @@ -3,10 +3,10 @@ process PRETEXTMAP { tag "$meta.id" label 'process_single' - conda "bioconda::pretextmap=0.1.9 bioconda::samtools=1.16.1" + conda "bioconda::pretextmap=0.1.9 bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:c6242a6c1a522137de7a9e9ff90779ede11cf5c5-0': - 'biocontainers/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:c6242a6c1a522137de7a9e9ff90779ede11cf5c5-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61%3A44321ab4d64f0b6d0c93abbd1406369d1b3da684-0': + 'biocontainers/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:44321ab4d64f0b6d0c93abbd1406369d1b3da684-0' }" input: tuple val(meta), path(input) @@ -39,6 +39,18 @@ process PRETEXTMAP { -o ${prefix}.pretext fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pretext + cat <<-END_VERSIONS > versions.yml "${task.process}": pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') diff --git a/modules/nf-core/pretextmap/meta.yml b/modules/nf-core/pretextmap/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/pretextsnapshot/main.nf b/modules/nf-core/pretextsnapshot/main.nf new file mode 100755 index 00000000..10425446 --- /dev/null +++ b/modules/nf-core/pretextsnapshot/main.nf @@ -0,0 +1,35 @@ +process PRETEXTSNAPSHOT { + tag "$meta.id" + label 'process_single' + + conda "bioconda::pretextsnapshot=0.0.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pretextsnapshot:0.0.4--h7d875b9_0': + 'biocontainers/pretextsnapshot:0.0.4--h7d875b9_0' }" + + input: + tuple val(meta), path(pretext_map) + + output: + tuple val(meta), path('*.{jpeg,png,bmp}'), emit: image + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + PretextSnapshot \\ + $args \\ + --map $pretext_map \\ + --prefix $prefix \\ + --folder . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pretextsnapshot: \$(echo \$(PretextSnapshot --version 2>&1) | sed 's/^.*PretextSnapshot Version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pretextsnapshot/meta.yml b/modules/nf-core/pretextsnapshot/meta.yml new file mode 100755 index 00000000..fe9cb17a --- /dev/null +++ b/modules/nf-core/pretextsnapshot/meta.yml @@ -0,0 +1,45 @@ +name: "pretextsnapshot" +description: a module to generate images from Pretext contact maps. +keywords: + - pretext + - image + - hic + - png + - jpg + - bmp + - contact maps +tools: + - "pretextsnapshot": + description: "Commandline image generator for Pretext Hi-C genome contact maps." + homepage: "https://github.com/wtsi-hpag/PretextSnapshot" + tool_dev_url: "https://github.com/wtsi-hpag/PretextSnapshot" + licence: "['https://github.com/wtsi-hpag/PretextSnapshot/blob/master/LICENSE']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pretext_map: + type: file + description: pretext hic map + pattern: "*.pretext" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - image: + type: file + description: image of a hic contact map + pattern: "*.{png,jpg,bmp}" + +authors: + - "@epaule" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf old mode 100644 new mode 100755 index ce6580d2..59ed3088 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,18 +2,20 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $args \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml old mode 100644 new mode 100755 index fe2fe9a1..957b25e5 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -17,12 +18,21 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: FASTA file pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - meta: type: map diff --git a/modules/nf-core/samtools/markdup/main.nf b/modules/nf-core/samtools/markdup/main.nf old mode 100644 new mode 100755 index f459163b..218cf97b --- a/modules/nf-core/samtools/markdup/main.nf +++ b/modules/nf-core/samtools/markdup/main.nf @@ -44,4 +44,20 @@ process SAMTOOLS_MARKDUP { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/markdup/meta.yml b/modules/nf-core/samtools/markdup/meta.yml old mode 100644 new mode 100755 index 9ced7a0f..4207c93a --- a/modules/nf-core/samtools/markdup/meta.yml +++ b/modules/nf-core/samtools/markdup/meta.yml @@ -1,7 +1,10 @@ name: "samtools_markdup" description: mark duplicate alignments in a coordinate sorted file keywords: - - markdup + - bam + - duplicates + - markduplicates + - samtools tools: - "samtools": description: "Tools for dealing with SAM, BAM and CRAM files" diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf old mode 100644 new mode 100755 index a80ff3a2..b73b7cb2 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,15 +2,15 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml old mode 100644 new mode 100755 index 5bd84bc5..3a815f74 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -25,13 +25,23 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: - type: optional file - description: Index of the reference file the CRAM was created with + type: file + description: Index of the reference file the CRAM was created with (optional) pattern: "*.fai" output: - meta: @@ -60,3 +70,4 @@ authors: - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf old mode 100644 new mode 100755 index 84c167cd..2b7753fd --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) @@ -23,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf old mode 100644 new mode 100755 index 729c85e5..cb91facf --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,14 +2,14 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(index) - path fasta + tuple val(meta2), path(fasta) path qname output: diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml old mode 100644 new mode 100755 index 2e597d34..3b05450b --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -26,12 +26,17 @@ input: description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - index: - type: optional file - description: BAM.BAI/CRAM.CRAI file - pattern: "*.{.bai,.crai}" + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" - qname: type: file diff --git a/modules/nf-core/seqtk/cutn/main.nf b/modules/nf-core/seqtk/cutn/main.nf old mode 100644 new mode 100755 index c991c8b3..e2b90cf1 --- a/modules/nf-core/seqtk/cutn/main.nf +++ b/modules/nf-core/seqtk/cutn/main.nf @@ -2,10 +2,10 @@ process SEQTK_CUTN { tag "$meta.id" label 'process_low' - conda "bioconda::seqtk=1.3" + conda "bioconda::seqtk=1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/seqtk/cutn/meta.yml b/modules/nf-core/seqtk/cutn/meta.yml old mode 100644 new mode 100755 index 314e2d0f..4850df9d --- a/modules/nf-core/seqtk/cutn/meta.yml +++ b/modules/nf-core/seqtk/cutn/meta.yml @@ -3,6 +3,7 @@ description: Generates a BED file containing genomic locations of lengths of N. keywords: - cut - fasta + - seqtk tools: - seqtk: description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads into one interleaved file. diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf old mode 100644 new mode 100755 index 76267f79..b73ee0c9 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -5,7 +5,7 @@ process TABIX_BGZIPTABIX { conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff b/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff old mode 100644 new mode 100755 index 55016165..90e3d55b --- a/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff +++ b/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff @@ -14,5 +14,5 @@ Changes in module 'nf-core/tabix/bgziptabix' """ bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz tabix $args2 ${prefix}.${input.getExtension()}.gz - + ************************************************************ diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf old mode 100644 new mode 100755 index defda3ef..b4719dee --- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -6,7 +6,7 @@ process UCSC_BEDGRAPHTOBIGWIG { conda "bioconda::ucsc-bedgraphtobigwig=377" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : - 'quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + 'biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" input: tuple val(meta), path(bedgraph) @@ -34,4 +34,16 @@ process UCSC_BEDGRAPHTOBIGWIG { ucsc: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bigWig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index ba8915be..416c91e0 100755 --- a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -3,6 +3,9 @@ description: Convert a bedGraph file to bigWig format. keywords: - bedgraph - bigwig + - ucsc + - bedgraphtobigwig + - converter tools: - ucsc: description: Convert a bedGraph file to bigWig format. diff --git a/modules/nf-core/ucsc/bedtobigbed/main.nf b/modules/nf-core/ucsc/bedtobigbed/main.nf old mode 100644 new mode 100755 index efa62f9a..1e40375d --- a/modules/nf-core/ucsc/bedtobigbed/main.nf +++ b/modules/nf-core/ucsc/bedtobigbed/main.nf @@ -6,7 +6,7 @@ process UCSC_BEDTOBIGBED { conda "bioconda::ucsc-bedtobigbed=377" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedtobigbed:377--ha8a8165_3' : - 'quay.io/biocontainers/ucsc-bedtobigbed:377--ha8a8165_3' }" + 'biocontainers/ucsc-bedtobigbed:377--ha8a8165_3' }" input: tuple val(meta), path(bed) @@ -38,4 +38,16 @@ process UCSC_BEDTOBIGBED { ucsc: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bigBed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/ucsc/bedtobigbed/meta.yml b/modules/nf-core/ucsc/bedtobigbed/meta.yml index e8e08fa7..8e9e5291 100755 --- a/modules/nf-core/ucsc/bedtobigbed/meta.yml +++ b/modules/nf-core/ucsc/bedtobigbed/meta.yml @@ -3,13 +3,14 @@ description: Convert file from bed to bigBed format keywords: - bed - bigbed + - ucsc + - bedtobigbed + - converter tools: - ucsc: description: Convert file from bed to bigBed format - homepage: None - documentation: None - tool_dev_url: None - doi: "" + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + documentation: https://genome.ucsc.edu/goldenPath/help/bigBed.html licence: ["varies; see http://genome.ucsc.edu/license"] input: diff --git a/modules/nf-core/windowmasker/mk_counts/main.nf b/modules/nf-core/windowmasker/mk_counts/main.nf old mode 100644 new mode 100755 index c3516bc0..bfa66f35 --- a/modules/nf-core/windowmasker/mk_counts/main.nf +++ b/modules/nf-core/windowmasker/mk_counts/main.nf @@ -2,10 +2,10 @@ process WINDOWMASKER_MKCOUNTS { tag "$meta.id" label 'process_low' - conda "bioconda::blast=2.13.0" + conda "bioconda::blast=2.14.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0': - 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': + 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" input: tuple val(meta), path(ref) diff --git a/modules/nf-core/windowmasker/mk_counts/meta.yml b/modules/nf-core/windowmasker/mk_counts/meta.yml old mode 100644 new mode 100755 index e4140a73..788dc96c --- a/modules/nf-core/windowmasker/mk_counts/meta.yml +++ b/modules/nf-core/windowmasker/mk_counts/meta.yml @@ -3,6 +3,7 @@ description: A program to generate frequency counts of repetitive units. keywords: - fasta - interval + - windowmasker tools: - windowmasker: description: | diff --git a/modules/nf-core/windowmasker/ustat/main.nf b/modules/nf-core/windowmasker/ustat/main.nf old mode 100644 new mode 100755 index b288ad7b..72a19dbf --- a/modules/nf-core/windowmasker/ustat/main.nf +++ b/modules/nf-core/windowmasker/ustat/main.nf @@ -2,10 +2,10 @@ process WINDOWMASKER_USTAT { tag "$meta.id" label 'process_low' - conda "bioconda::blast=2.13.0" + conda "bioconda::blast=2.14.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0': - 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': + 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" input: tuple val(meta) , path(counts) diff --git a/modules/nf-core/windowmasker/ustat/meta.yml b/modules/nf-core/windowmasker/ustat/meta.yml old mode 100644 new mode 100755 index fe0d8000..6acf2e50 --- a/modules/nf-core/windowmasker/ustat/meta.yml +++ b/modules/nf-core/windowmasker/ustat/meta.yml @@ -3,6 +3,7 @@ description: A program to take a counts file and creates a file of genomic co-or keywords: - fasta - interval + - windowmasker tools: - windowmasker: description: | diff --git a/nextflow.config b/nextflow.config old mode 100644 new mode 100755 index b62b481e..008981d4 --- a/nextflow.config +++ b/nextflow.config @@ -11,9 +11,6 @@ params { // Boilerplate options input = null - genome = null - igenomes_base = null - igenomes_ignore = null outdir = "./results" tracedir = "${params.outdir}/treeval_info" publish_dir_mode = 'copy' @@ -26,8 +23,7 @@ params { version = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes,genome,igenomes_base,igenomes_ignore' - + schema_ignore_params = 'genomes' // Config options custom_config_version = 'master' @@ -177,6 +173,7 @@ env { process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +params.trace_timestamp = trace_timestamp timeline { enabled = true file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" @@ -187,7 +184,8 @@ report { } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/pipeline_execution_${trace_timestamp}.txt" + fields = 'name,status,module,cpus,memory,attempt,realtime,%cpu,%mem,peak_rss' } dag { enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json old mode 100644 new mode 100755 index 308ccc0d..f98570a5 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/sanger-tol/treeval/master/nextflow_schema.json", "title": "sanger-tol/treeval pipeline parameters", - "description": "A pipeline to generate supplimental data for genome curation", + "description": "A pipeline to generate supplemental data for genome curation", "type": "object", "definitions": { "input_output_options": { @@ -34,6 +34,12 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "trace_timestamp": { + "type": "string", + "description": "Not to be used, this passes data from the config into the pipeline", + "fa_icon": "fas fa-lock", + "help_text": "Don't use this param, required for TreeValProjects Summary functions" } } }, diff --git a/pipeline_template.yml b/pipeline_template.yml old mode 100644 new mode 100755 diff --git a/pyproject.toml b/pyproject.toml old mode 100644 new mode 100755 index 0d62beb6..6102010c --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. [tool.black] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target_version = ["py37", "py38", "py39", "py310" ] [tool.isort] profile = "black" diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index 625173cc..2bb6dd5d 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -23,7 +23,10 @@ workflow ANCESTRAL_GENE { // // MODULE: EXTRACTS ANCESTRALLY LINKED BUSCO GENES FROM FULL TABLE // - EXTRACT_ANCESTRAL(ch_grab, ancestral_table) + EXTRACT_ANCESTRAL( + ch_grab, + ancestral_table + ) ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions) // @@ -38,19 +41,29 @@ workflow ANCESTRAL_GENE { // // MODULE: ASSIGN EXTRACTED GENES TO ANCESTRAL GROUPS // - ASSIGN_ANCESTRAL(EXTRACT_ANCESTRAL.out.comp_location, assignanc_input ) + ASSIGN_ANCESTRAL( + EXTRACT_ANCESTRAL.out.comp_location, + assignanc_input + ) ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions) // // MODULES: SORT THE BED FILE // - BEDTOOLS_SORT(ASSIGN_ANCESTRAL.out.assigned_bed, []) + BEDTOOLS_SORT( + ASSIGN_ANCESTRAL.out.assigned_bed, + [] + ) ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) // // MODULES: CONVERT BED TO INDEXED BIGBED // - UCSC_BEDTOBIGBED(BEDTOOLS_SORT.out.sorted, dot_genome.map{it[1]}, buscogene_as) + UCSC_BEDTOBIGBED( + BEDTOOLS_SORT.out.sorted, + dot_genome.map{ it[1] }, // Pull file from tuple(meta, file) + buscogene_as + ) ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf old mode 100644 new mode 100755 index 88306060..e527f741 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -32,39 +32,50 @@ workflow BUSCO_ANNOTATION { main: ch_versions = Channel.empty() - // + // // MODULE: RUN BUSCO TO OBTAIN FULL_TABLE.CSV // EMITS FULL_TABLE.CSV // - BUSCO ( reference_tuple, - lineageinfo, - lineagespath, - [] ) - ch_versions = ch_versions.mix(BUSCO.out.versions.first()) + BUSCO ( + reference_tuple, + lineageinfo, + lineagespath, + [] + ) + ch_versions = ch_versions.mix( BUSCO.out.versions.first() ) - ch_grab = GrabFiles(BUSCO.out.busco_dir) + ch_grab = GrabFiles( BUSCO.out.busco_dir ) // // MODULE: EXTRACT THE BUSCO GENES FOUND IN REFERENCE // - EXTRACT_BUSCOGENE (ch_grab) - ch_versions = ch_versions.mix(EXTRACT_BUSCOGENE.out.versions) + EXTRACT_BUSCOGENE ( + ch_grab + ) + ch_versions = ch_versions.mix( EXTRACT_BUSCOGENE.out.versions ) // // MODULE: SORT THE EXTRACTED BUSCO GENE // - BEDTOOLS_SORT(EXTRACT_BUSCOGENE.out.genefile, []) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + BEDTOOLS_SORT( + EXTRACT_BUSCOGENE.out.genefile, + [] + ) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CONVERT THE BED TO BIGBED // - UCSC_BEDTOBIGBED(BEDTOOLS_SORT.out.sorted, dot_genome.map{it[1]}, buscogene_as) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + UCSC_BEDTOBIGBED( + BEDTOOLS_SORT.out.sorted, + dot_genome.map{it[1]}, // Gets file from tuple (meta, file) + buscogene_as + ) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) // // LOGIC: AGGREGATE DATA AND SORT BRANCH ON CLASS - // + // lineageinfo .combine( BUSCO.out.busco_dir ) .combine( ancestral_table ) @@ -72,27 +83,29 @@ workflow BUSCO_ANNOTATION { lep: it[0].split('_')[0] == "lepidoptera" general: it[0].split('_')[0] != "lepidoptera" } - .set{ch_busco_data} + .set{ ch_busco_data } // // LOGIC: BUILD NEW INPUT CHANNEL FOR ANCESTRAL ID - // + // ch_busco_data .lep - .multiMap { data -> - busco_dir: tuple(data[1], data[2]) - atable: data[3] + .multiMap { lineage, meta, busco_dir, ancestral_table -> + busco_dir: tuple( meta, busco_dir ) + atable: ancestral_table } - .set{ch_busco_lep_data} + .set{ ch_busco_lep_data } // // SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA) - // - ANCESTRAL_GENE (ch_busco_lep_data.busco_dir, - dot_genome, - buscogene_as, - ch_busco_lep_data.atable) - ch_versions = ch_versions.mix(ANCESTRAL_GENE.out.versions) + // + ANCESTRAL_GENE ( + ch_busco_lep_data.busco_dir, + dot_genome, + buscogene_as, + ch_busco_lep_data.atable + ) + ch_versions = ch_versions.mix( ANCESTRAL_GENE.out.versions ) emit: ch_buscogene_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/gap_finder.nf b/subworkflows/local/gap_finder.nf old mode 100644 new mode 100755 index 89c59d01..3c51e530 --- a/subworkflows/local/gap_finder.nf +++ b/subworkflows/local/gap_finder.nf @@ -33,11 +33,11 @@ workflow GAP_FINDER { // // LOGIC: Adding the largest scaffold size to the meta data so it can be used in the modules.config - // + // SEQTK_CUTN.out.bed .combine(max_scaff_size) - .map {meta, row, scaff -> - tuple([ id : meta.id, + .map {meta, row, scaff -> + tuple([ id : meta.id, max_scaff : scaff >= 500000000 ? 'csi': '' ], file(row) @@ -53,7 +53,7 @@ workflow GAP_FINDER { ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: - gap_file = GAP_LENGTH.out.bed + gap_file = GAP_LENGTH.out.bedgraph gap_tabix = TABIX_BGZIPTABIX.out.gz_csi versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/gene_alignment.nf b/subworkflows/local/gene_alignment.nf old mode 100644 new mode 100755 index 95d1d64c..cec8f2b9 --- a/subworkflows/local/gene_alignment.nf +++ b/subworkflows/local/gene_alignment.nf @@ -37,17 +37,19 @@ workflow GENE_ALIGNMENT { .splitCsv() .flatten() - ch_data - .combine( alignment_datadir ) - .combine( assembly_classT ) // - // LOGIC: CONVERTS THE ABOVE VALUES INTO A PATH AND DOWNLOADS IT, THEN TURNS IT TO A TUPLE OF + // LOGIC: COMBINE CH_DATA WITH ALIGNMENT_DIR AND ASSEMBLY_CLASS + // CONVERTS THESE VALUES INTO A PATH AND DOWNLOADS IT, THEN TURNS IT TO A TUPLE OF // [ [ META.ID, META.TYPE, META.ORG ], GENE_ALIGNMENT_FILE ] // DATA IS THEN BRANCHED BASED ON META.TYPE TO THE APPROPRIATE // SUBWORKFLOW // + ch_data + .combine( alignment_datadir ) + .combine( assembly_classT ) .map { - ch_org, data_dir, classT -> file("${data_dir}${classT}/csv_data/${ch_org}-data.csv") + ch_org, data_dir, classT -> + file("${data_dir}${classT}/csv_data/${ch_org}-data.csv") } .splitCsv( header: true, sep:',') .map( row -> @@ -79,7 +81,7 @@ workflow GENE_ALIGNMENT { ) ch_versions = ch_versions.mix(PEP_ALIGNMENTS.out.versions) - + // // SUBWORKFLOW: GENERATES GENE ALIGNMENTS FOR RNA, NUCLEAR AND COMPLEMENT_DNA DATA, EMITS BIGBED // @@ -90,7 +92,7 @@ workflow GENE_ALIGNMENT { intron_size ) ch_versions = ch_versions.mix(GEN_ALIGNMENTS.out.versions) - + CDS_ALIGNMENTS ( reference_tuple, reference_index, cds_files, @@ -98,7 +100,7 @@ workflow GENE_ALIGNMENT { intron_size ) ch_versions = ch_versions.mix(CDS_ALIGNMENTS.out.versions) - + RNA_ALIGNMENTS ( reference_tuple, reference_index, rna_files, diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 61eb922a..97580e7d 100755 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -4,7 +4,7 @@ // MODULE IMPORT BLOCK // include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { GENERATE_GENOME_FILE } from '../../modules/local/generate_genome_file' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' workflow GENERATE_GENOME { @@ -20,36 +20,36 @@ workflow GENERATE_GENOME { // reference_file .combine( assembly_id ) - .map { it -> - tuple ([id: it[1]], - it[0]) + .map { file, sample_id -> + tuple ([id: sample_id], + file) } - .set { to_samtools } + .set { to_chromsize } // // MODULE: GENERATE INDEX OF REFERENCE - // EMITS REFERENCE INDEX FILE + // EMITS REFERENCE INDEX FILE MODIFIED FOR SCAFF SIZES // - SAMTOOLS_FAIDX ( to_samtools ) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + CUSTOM_GETCHROMSIZES ( + to_chromsize, + "genome" + ) + ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) - // - // MODULE: TRIMS INDEX INTO A GENOME DESCRIPTION FILE - // EMITS REFERENCE GEOME FILE AND REFERENCE INDEX FILE - GENERATE_GENOME_FILE ( SAMTOOLS_FAIDX.out.fai ) - ch_versions = ch_versions.mix( GENERATE_GENOME_FILE.out.versions ) // // MODULE: Cut out the largest scaffold size and use as comparator against 512MB // This is the cut off for TABIX using tbi indexes // - GET_LARGEST_SCAFF ( GENERATE_GENOME_FILE.out.dotgenome ) + GET_LARGEST_SCAFF ( + CUSTOM_GETCHROMSIZES.out.sizes + ) ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) - + emit: max_scaff_size = GET_LARGEST_SCAFF.out.scaff_size.toInteger() - dot_genome = GENERATE_GENOME_FILE.out.dotgenome - ref_index = SAMTOOLS_FAIDX.out.fai - reference_tuple = to_samtools + dot_genome = CUSTOM_GETCHROMSIZES.out.sizes + ref_index = CUSTOM_GETCHROMSIZES.out.fai + reference_tuple = to_chromsize versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf old mode 100644 new mode 100755 index bf8df5bf..cf249ca4 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -13,6 +13,8 @@ include { COOLER_CLOAD } from '../../modules/nf-cor include { COOLER_ZOOMIFY } from '../../modules/nf-core/cooler/zoomify/main' include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../modules/nf-core/pretextmap/main' include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../modules/nf-core/pretextmap/main' +include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../modules/nf-core/pretextsnapshot/main' +include { PRETEXTSNAPSHOT as SNAPSHOT_HRES } from '../../modules/nf-core/pretextsnapshot/main' include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf' @@ -26,20 +28,23 @@ workflow HIC_MAPPING { take: reference_tuple // Channel [ val(meta), path(file) ] reference_index // Channel [ val(meta), path(file) ] - dot_genome // Channel: [val(meta), [ datafile ]] + dot_genome // Channel [ val(meta), [ datafile ]] hic_reads_path // Channel [ val(meta), path(directory) ] + assembly_id // Channel val( id ) main: ch_versions = Channel.empty() // COMMENT: 1000bp BIN SIZE INTERVALS FOR CLOAD - ch_cool_bin = Channel.of(1000) + ch_cool_bin = Channel.of( 1000 ) // // MODULE: Indexing on reference output the folder of indexing files // - BWAMEM2_INDEX (reference_tuple) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + BWAMEM2_INDEX ( + reference_tuple + ) + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) // // LOGIC: make channel of hic reads as input for GENERATE_CRAM_CSV @@ -47,41 +52,51 @@ workflow HIC_MAPPING { reference_tuple .combine( hic_reads_path ) .map { meta, ref, hic_reads_path -> - tuple([ id: meta.id, single_end: true], hic_reads_path) } + tuple( + [ id: meta.id, single_end: true], + hic_reads_path + ) + } .set { get_reads_input } // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT // - GENERATE_CRAM_CSV ( get_reads_input ) - ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) + GENERATE_CRAM_CSV ( + get_reads_input + ) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) // // LOGIC: organise all parametres into a channel for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT // - ch_filtering_input = GENERATE_CRAM_CSV.out.csv - .splitCsv() - .combine (reference_tuple) - .combine (BWAMEM2_INDEX.out.index) - .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> - tuple([ - id: cram_id.id - ], - file(cram_info[0]), - cram_info[1], - cram_info[2], - cram_info[3], - cram_info[4], - cram_info[5], - cram_info[6], - bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1]) - } + GENERATE_CRAM_CSV.out.csv + .splitCsv() + .combine (reference_tuple) + .combine (BWAMEM2_INDEX.out.index) + .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> + tuple([ + id: cram_id.id + ], + file(cram_info[0]), + cram_info[1], + cram_info[2], + cram_info[3], + cram_info[4], + cram_info[5], + cram_info[6], + bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1] + ) + } + .set { ch_filtering_input } // // MODULE: parallel proccessing bwa-mem2 alignment by given interval of containers from cram files // - CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT ( ch_filtering_input ) - ch_versions = ch_versions.mix(CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions) + CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT ( + ch_filtering_input + ) + ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) // // LOGIC: PREPARING BAMS FOR MERGE @@ -94,28 +109,21 @@ workflow HIC_MAPPING { .map { file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] // Change to sample_id + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] ], file ) } - .set { collected_files_for_merge } - - // - // LOGIC: PREPARING MERGE INPUT - // - reference_tuple - .combine( reference_index ) - .multiMap { ref_meta, ref_fa, ref_idx_meta, ref_idx -> - reference: ref_fa - ref_idx: ref_idx - } - .set { ref_files } + .set { collected_files_for_merge } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES // - SAMTOOLS_MERGE ( collected_files_for_merge, ref_files.reference, ref_files.ref_idx ) + SAMTOOLS_MERGE ( + collected_files_for_merge, + reference_tuple, + reference_index + ) ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) // @@ -124,7 +132,10 @@ workflow HIC_MAPPING { SAMTOOLS_MERGE.out.bam .combine( reference_tuple ) .multiMap { bam_meta, bam, ref_meta, ref_fa -> - input_bam: tuple(bam_meta, bam) + input_bam: tuple( [ id: bam_meta.id, + sz: file( bam ).size() ], + bam + ) reference: ref_fa } .set { pretext_input } @@ -132,94 +143,160 @@ workflow HIC_MAPPING { // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR LOW RES // - PRETEXTMAP_STANDRD ( pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) + PRETEXTMAP_STANDRD ( + pretext_input.input_bam, + pretext_input.reference + ) + ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES // - PRETEXTMAP_HIGHRES ( pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_HIGHRES.out.versions) + PRETEXTMAP_HIGHRES ( + pretext_input.input_bam, + pretext_input.reference + ) + ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) + + // + // MODULE: GENERATE PNG FROM STANDARD PRETEXT + // + SNAPSHOT_SRES ( + PRETEXTMAP_STANDRD.out.pretext + ) + ch_versions = ch_versions.mix ( SNAPSHOT_SRES.out.versions ) + + // NOTE: CURRENTLY UNDER INVESTIGATION + // + // MODULE: GENERATE PNG FROM HIGHRES PRETEXT + // + // SNAPSHOT_HRES ( PRETEXTMAP_HIGHRES.out.pretext ) + // ch_versions = ch_versions.mix ( SNAPSHOT_HRES.out.versions ) // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES // - SAMTOOLS_MARKDUP ( pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions.first() ) + SAMTOOLS_MARKDUP ( + pretext_input.input_bam, + pretext_input.reference + ) + ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) // // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE // - BAMTOBED_SORT( SAMTOOLS_MARKDUP.out.bam ) - ch_versions = ch_versions.mix(BAMTOBED_SORT.out.versions) + BAMTOBED_SORT( + SAMTOOLS_MARKDUP.out.bam + ) + ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) // // MODULE: GENERATE CONTACT PAIRS // - GET_PAIRED_CONTACT_BED(BAMTOBED_SORT.out.sorted_bed) - ch_versions = ch_versions.mix(GET_PAIRED_CONTACT_BED.out.versions) + GET_PAIRED_CONTACT_BED( BAMTOBED_SORT.out.sorted_bed ) + ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) // // LOGIC: PREPARE JUICER TOOLS INPUT - // + // GET_PAIRED_CONTACT_BED.out.bed .combine( dot_genome ) - .map { meta, paired_contacts, meta_my_genome, my_genome -> - tuple([ id: meta.id, single_end: true], paired_contacts, my_genome, meta.id) } + .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> + paired : tuple([ id: meta.id, single_end: true], paired_contacts ) + genome : my_genome + id : meta.id + } .set { ch_juicer_input } // // MODULE: GENERATE HIC MAP // JUICER_TOOLS_PRE( - ch_juicer_input.map { [it[0], it[1]] }, - ch_juicer_input.map { it[2] }, - ch_juicer_input.map { it[3] } + ch_juicer_input.paired, + ch_juicer_input.genome, + ch_juicer_input.id ) - ch_versions = ch_versions.mix(JUICER_TOOLS_PRE.out.versions) + ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) // // LOGIC: BIN CONTACT PAIRS - // + // GET_PAIRED_CONTACT_BED.out.bed - .join(BAMTOBED_SORT.out.sorted_bed) - .combine(ch_cool_bin) + .join( BAMTOBED_SORT.out.sorted_bed ) + .combine( ch_cool_bin ) .set { ch_binned_pairs } // // LOGIC: PREPARE COOLER INPUT - // + // ch_binned_pairs .combine(dot_genome) - .map{ meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> [meta, pairs, bed, cool_bin, my_genome]} - .set { ch_cooler_input } + .multiMap { meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> + cooler_in : tuple ( meta, pairs, bed, cool_bin ) + genome_file : my_genome + } + .set { ch_cooler } // // MODULE: GENERATE A MULTI-RESOLUTION COOLER FILE BY COARSENING - // + // COOLER_CLOAD( - ch_cooler_input.map { [it[0], it[1], it[2], it[3]] }, - ch_cooler_input.map { it[4] } + ch_cooler.cooler_in, + ch_cooler.genome_file ) ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) - + // // LOGIC: REFACTOR CHANNEL FOR ZOOMIFY - // + // COOLER_CLOAD.out.cool - .map{ meta, cools, cool_bin -> [meta, cools]} + .map{ meta, cools, cool_bin -> + [meta, cools] + } .set{ch_cool} // // MODULE: ZOOM COOL TO MCOOL - // + // COOLER_ZOOMIFY(ch_cool) ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) + // + // LOGIC: FOR REPORTING + // + + ch_cram_files = GrabFiles( get_reads_input ) + + ch_cram_files + .collect() + .map { meta, cram -> + tuple( [ id: 'cram', + sz: cram instanceof ArrayList ? cram.collect { it.size()} : cram.size() ], + cram + ) + } + .set { ch_reporting_cram } + emit: standrd_pretext = PRETEXTMAP_STANDRD.out.pretext + standrd_snpshot = SNAPSHOT_SRES.out.image highres_pretext = PRETEXTMAP_HIGHRES.out.pretext + //highres_snpshot = SNAPSHOT_HRES.out.image mcool = COOLER_ZOOMIFY.out.mcool hic = JUICER_TOOLS_PRE.out.hic + ch_reporting = ch_reporting_cram.collect() versions = ch_versions.ifEmpty(null) } + +process GrabFiles { + tag "${meta.id}" + executor 'local' + + input: + tuple val(meta), path("in") + + output: + tuple val(meta), path("in/*.cram") + + "true" +} diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index 8233c8fa..2e2989dc 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -27,83 +27,113 @@ workflow INSILICO_DIGEST { // LOGIC: COMBINES REFERENCE TUPLE WITH ENZYME CHANNEL // MULTIMAP INTO TWO CHANNELS SO THERE IS REFERENCE * ENZYME CHANNELS // - input_fasta = sample.map { data -> - tuple([ - id : data[0].id, - single_end : false - ], - file(data[1]) - )} + sample + .map { meta, data -> + tuple( + [ id : meta.id, + single_end : false ], + file( data ) + ) + } + .set { input_fasta } input_fasta .combine(ch_enzyme) - .multiMap { data -> - fasta: tuple( data[0], - data[1] + .multiMap { meta, reference, enzyme_id -> + fasta : tuple( meta, + reference ) - enzyme: data[2] + enzyme : enzyme_id } - .set { fa2c_input } + .set { fa2c_input } // // MODULE: CONVERTS FASTA INTO A COLOUR-AWARE BIONANO CMAP FORMAT // EMITS FILES CONTAINING INDEX_IDs AND ORIGINAL_GENOMIC_LOCATIONS // - MAKECMAP_FA2CMAPMULTICOLOR ( fa2c_input.fasta, fa2c_input.enzyme ) - - ch_cmap = MAKECMAP_FA2CMAPMULTICOLOR.out.cmap - ch_cmapkey = MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey + MAKECMAP_FA2CMAPMULTICOLOR ( + fa2c_input.fasta, + fa2c_input.enzyme + ) ch_versions = ch_versions.mix(MAKECMAP_FA2CMAPMULTICOLOR.out.versions) // // LOGIC: CREATES A TUPLE CONTAINING THE CMAP AND ORIGINAL GENOMIC LOCATIONS // - ch_cmap_new = ch_cmap - .map{ meta, cfile -> tuple([ - id : cfile.toString().split('_')[-3] - ], cfile)} - - ch_cmapkey_new = ch_cmapkey - .map{ kfile -> tuple([ - id : kfile.toString().split('_')[-4] - ], kfile)} - - - ch_join = ch_cmap_new.join(ch_cmapkey_new) - .map { meta, cfile, kfile -> tuple ([ - meta, - cfile - ] , - kfile)} - + MAKECMAP_FA2CMAPMULTICOLOR.out.cmap + .map{ meta, cfile -> + tuple( + [ id : cfile.toString().split('_')[-3] ], + cfile + ) + } + .set { ch_cmap_new } + + MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey + .map{ kfile -> + tuple( + [ id : kfile.toString().split('_')[-4] ], + kfile + ) + } + .set { ch_cmapkey_new } + + + ch_cmap_new + .join(ch_cmapkey_new) + .multiMap { meta, cfile, kfile -> + cmap : tuple( meta, cfile) + key_file : kfile + } + + .set { ch_join } + // // MODULE: RENAME CMAP IDs FROM BIONANO IDX TO ORIGINAL GENOMIC LOCATIONS // EMITS RENAMED CMAP // - MAKECMAP_RENAMECMAPIDS ( ch_join.map { it[0] }, ch_join.map { it[1] } ) + MAKECMAP_RENAMECMAPIDS ( + ch_join.cmap, + ch_join.key_file + ) ch_versions = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) - ch_renamedcmap = MAKECMAP_RENAMECMAPIDS.out.renamedcmap + MAKECMAP_RENAMECMAPIDS.out.renamedcmap + .multiMap { meta, file -> + full : tuple ( meta, file ) + sample : meta.id + } + .set { ch_renamedcmap } // // MODULE: CONVERT CMAP FILE INTO BED FILE // EMITS BED FILE // - MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) + MAKECMAP_CMAP2BED ( + ch_renamedcmap.full, + ch_renamedcmap.sample + ) ch_versions = ch_versions.mix(MAKECMAP_CMAP2BED.out.versions) - ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile - combined_ch = ch_bedfile - .combine(sizefile) - .combine(dot_as) - + MAKECMAP_CMAP2BED.out.bedfile + .combine(sizefile) + .combine(dot_as) + .multiMap { meta, bed, meta_2, dot_genome, as_file -> + bed_tuple : tuple( meta, bed ) + genome_file : dot_genome + autosql : as_file + } + .set { combined_ch } + // // MODULE: CONVERT ABOVE BED INTO BIGBED WITH ADDITIONAL AS FILE // EMITS BIGBED FILE // - UCSC_BEDTOBIGBED ( combined_ch.map { [it[0], it[1]] }, - combined_ch.map { it[3] }, - combined_ch.map { it[4] }) + UCSC_BEDTOBIGBED ( + combined_ch.bed_tuple, + combined_ch.genome_file, + combined_ch.autosql + ) ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: diff --git a/subworkflows/local/longread_coverage.nf b/subworkflows/local/longread_coverage.nf index 7af0214c..f1ca457c 100755 --- a/subworkflows/local/longread_coverage.nf +++ b/subworkflows/local/longread_coverage.nf @@ -24,10 +24,9 @@ include { FINDHALFCOVERAGE } from '../../modules/local/ workflow LONGREAD_COVERAGE { take: - reference_tuple // Channel: [ val(meta), path(reference_file) ] - dot_genome // Channel: [ val(meta), [ path(datafile) ] ] - reads_path // Channel: [ val(meta), val( str ) ] - size_class // Channel: val( str ) + reference_tuple // Channel: [ val(meta), file( reference_file ) ] + dot_genome // Channel: [ val(meta), [ file( datafile ) ] ] + reads_path // Channel: [ val(meta), val( str ) ] main: ch_versions = Channel.empty() @@ -35,62 +34,95 @@ workflow LONGREAD_COVERAGE { // // MODULE: CREATES INDEX OF REFERENCE FILE // - MINIMAP2_INDEX(reference_tuple) - ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) - ch_ref_index = MINIMAP2_INDEX.out.index + MINIMAP2_INDEX( + reference_tuple + ) + ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) // - // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT + // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT // reference_tuple .combine( reads_path ) .map { meta, ref, reads_path -> - tuple([ id: meta.id, single_end: true], reads_path) } + tuple( + [ id : meta.id, + single_end : true ], + reads_path + ) + } .set { get_reads_input } // // MODULE: GETS PACBIO READ PATHS FROM READS_PATH // - ch_grabbed_read_paths = GrabFiles(get_reads_input) + ch_grabbed_read_paths = GrabFiles( get_reads_input ) // // LOGIC: PACBIO READS FILES TO CHANNEL // ch_grabbed_read_paths - .map { meta, files -> - tuple(files) - } + .map { meta, files -> + tuple( files ) + } .flatten() .set { ch_read_paths } // // LOGIC: COMBINE PACBIO READ PATHS WITH MINIMAP2_INDEX OUTPUT // - ch_ref_index - .combine(ch_read_paths) - .combine(size_class) - .map { meta, ref_mmi, read_path, size_class -> - tuple([ id: meta.id, - single_end: true, + MINIMAP2_INDEX.out.index + .combine( ch_read_paths ) + .combine( reference_tuple ) + .map { meta, ref_mmi, read_path, ref_meta, reference -> + tuple( + [ id : meta.id, + single_end : true, split_prefix: read_path.toString().split('/')[-1].split('.fasta.gz')[0] ], - read_path, ref_mmi, true, false, false, size_class) - } + read_path, + ref_mmi, + true, + false, + false, + file( reference ).size() + ) + } .branch { - large: it[6] == 'L' - small: it[6] == 'S' + large : it[6] > 3000000000 + small : it[6] < 3000000000 } .set { mma_input } + mma_input.large + .multiMap { meta, read_path, ref_mmi, bam_output, cigar_paf, cigar_bam, file_size -> + read_tuple : tuple( meta, read_path) + mmi_index : ref_mmi + bool_bam_ouput : bam_output + bool_cigar_paf : cigar_paf + bool_cigar_bam : cigar_bam + } + .set { large } + + mma_input.small + .multiMap { meta, read_path, ref_mmi, bam_output, cigar_paf, cigar_bam, file_size -> + read_tuple : tuple( meta, read_path) + mmi_index : ref_mmi + bool_bam_ouput : bam_output + bool_cigar_paf : cigar_paf + bool_cigar_bam : cigar_bam + } + .set { small } + // // MODULE: ALIGN READS TO REFERENCE WHEN REFERENCE <5GB PER SCAFFOLD - // + // MINIMAP2_ALIGN ( - mma_input.small.map { [it[0], it[1]] }, - mma_input.small.map { it[2] }, - mma_input.small.map { it[3] }, - mma_input.small.map { it[4] }, - mma_input.small.map { it[5] } + small.read_tuple, + small.mmi_index, + small.bool_bam_ouput, + small.bool_cigar_paf, + small.bool_cigar_bam ) ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) ch_align_bams = MINIMAP2_ALIGN.out.bam @@ -99,20 +131,19 @@ workflow LONGREAD_COVERAGE { // MODULE: ALIGN READS TO REFERENCE WHEN REFERENCE >5GB PER SCAFFOLD // MINIMAP2_ALIGN_SPLIT ( - mma_input.large.map { [it[0], it[1]] }, - mma_input.large.map { it[2] }, - mma_input.large.map { it[3] }, - mma_input.large.map { it[4] }, - mma_input.large.map { it[5] } + large.read_tuple, + large.mmi_index, + large.bool_bam_ouput, + large.bool_cigar_paf, + large.bool_cigar_bam ) ch_versions = ch_versions.mix(MINIMAP2_ALIGN_SPLIT.out.versions) - ch_split_bams = MINIMAP2_ALIGN_SPLIT.out.bam // // LOGIC: COLLECT OUTPUTTED BAM FILES FROM BOTH PROCESSES - // + // ch_align_bams - .mix(ch_split_bams) + .mix( MINIMAP2_ALIGN_SPLIT.out.bam ) .set { ch_bams } // @@ -125,41 +156,55 @@ workflow LONGREAD_COVERAGE { .collect() .map { file -> tuple ( - [ - id: file[0].toString().split('/')[-1].split('_')[0] // Change to sample_id - ], + [ id : file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID file ) } - .set { collected_files_for_merge } + .set { collected_files_for_merge } // // MODULE: MERGES THE BAM FILES IN REGARDS TO THE REFERENCE // EMITS A MERGED BAM SAMTOOLS_MERGE( collected_files_for_merge, - reference_tuple.map { it[1] }, - MINIMAP2_INDEX.out.index.map { it[1] } + reference_tuple, + [[],[]] ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) - ch_merged_bam = SAMTOOLS_MERGE.out.bam + + // + // MODULE: SORT THE MERGED BAM BEFORE CONVERSION + // + SAMTOOLS_SORT ( + SAMTOOLS_MERGE.out.bam + ) + ch_versions = ch_versions.mix( SAMTOOLS_MERGE.out.versions ) // // LOGIC: PREPARING MERGE INPUT WITH REFERENCE GENOME AND REFERENCE INDEX // - ch_merged_bam + SAMTOOLS_SORT.out.bam .combine( reference_tuple ) - .combine( ch_ref_index ) - .map { meta, file, ref_meta, ref, ref_index_meta, ref_index -> - tuple([ id: meta.id, single_end: true], file, ref, ref_index) } + .multiMap { meta, bam, ref_meta, ref -> + bam_input : tuple( + [ id : meta.id, + sz : bam.size(), + single_end : true ], + bam, + [] // As we aren't using an index file here + ) + ref_input : tuple( + ref_meta, + ref + ) + } .set { view_input } - // // MODULE: EXTRACT READS FOR PRIMARY ASSEMBLY // SAMTOOLS_VIEW( - view_input.map { [it[0], it[1], it[3]] }, - view_input.map { it[2] }, + view_input.bam_input, + view_input.ref_input, [] ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) @@ -167,42 +212,51 @@ workflow LONGREAD_COVERAGE { // // MODULE: BAM TO PRIMARY BED // - BEDTOOLS_BAMTOBED(SAMTOOLS_VIEW.out.bam) + BEDTOOLS_BAMTOBED( + SAMTOOLS_VIEW.out.bam + ) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // // LOGIC: PREPARING Genome2Cov INPUT // BEDTOOLS_BAMTOBED.out.bed - .combine(dot_genome) - .map { meta, file, my_genome_meta, my_genome -> - tuple([ id: meta.id, single_end: true], file, 1, my_genome, 'bed') + .combine( dot_genome ) + .multiMap { meta, file, my_genome_meta, my_genome -> + input_tuple : tuple ( + [ id : meta.id, + single_end : true ], + file, + 1 + ) + dot_genome : my_genome + file_suffix : 'bed' } .set { genomecov_input } // // MODULE: Genome2Cov - // + // BEDTOOLS_GENOMECOV( - genomecov_input.map { [it[0], it[1], it[2]] }, - genomecov_input.map { it[3] }, - genomecov_input.map { it[4] } + genomecov_input.input_tuple, + genomecov_input.dot_genome, + genomecov_input.file_suffix ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions) - ch_coverage_unsorted_bed = BEDTOOLS_GENOMECOV.out.genomecov // // MODULE: SORT THE PRIMARY BED FILE // - GNU_SORT(ch_coverage_unsorted_bed) + GNU_SORT( + BEDTOOLS_GENOMECOV.out.genomecov + ) ch_versions = ch_versions.mix(GNU_SORT.out.versions) - ch_coverage_bed = GNU_SORT.out.sorted // // MODULE: get_minmax_punches // GETMINMAXPUNCHES( - ch_coverage_bed + GNU_SORT.out.sorted ) ch_versions = ch_versions.mix(GETMINMAXPUNCHES.out.versions) @@ -213,7 +267,6 @@ workflow LONGREAD_COVERAGE { GETMINMAXPUNCHES.out.max ) ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MAX.out.versions) - ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed // // MODULE: get_minmax_punches @@ -222,13 +275,12 @@ workflow LONGREAD_COVERAGE { GETMINMAXPUNCHES.out.min ) ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MIN.out.versions) - ch_minbed = BEDTOOLS_MERGE_MIN.out.bed // // MODULE: GENERATE DEPTHGRAPH // GRAPHOVERALLCOVERAGE( - ch_coverage_bed + GNU_SORT.out.sorted ) ch_versions = ch_versions.mix(GRAPHOVERALLCOVERAGE.out.versions) ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part @@ -236,32 +288,35 @@ workflow LONGREAD_COVERAGE { // // LOGIC: PREPARING FINDHALFCOVERAGE INPUT // - ch_coverage_bed - .combine( ch_depthgraph ) + GNU_SORT.out.sorted + .combine( GRAPHOVERALLCOVERAGE.out.part ) .combine( dot_genome ) - .map { meta, file, meta_depthgraph, depthgraph, meta_my_genome, my_genome -> - tuple([ id: meta.id, single_end: true], file, my_genome, depthgraph) + .multiMap { meta, file, meta_depthgraph, depthgraph, meta_my_genome, my_genome -> + halfcov_bed : tuple( [ id : meta.id, single_end : true ], file ) + genome_file : my_genome + depthgraph_file : depthgraph } - .set { findhalfcov_input } + .set { halfcov_input } // - // MODULE: findHalfcoverage + // MODULE: FIND REGIONS OF HALF COVERAGE // FINDHALFCOVERAGE( - findhalfcov_input.map { [it[0], it[1]] }, - findhalfcov_input.map { it[2] }, - findhalfcov_input.map { it[3] } + halfcov_input.halfcov_bed, + halfcov_input.genome_file, + halfcov_input.depthgraph_file ) ch_versions = ch_versions.mix(FINDHALFCOVERAGE.out.versions) - ch_halfbed = FINDHALFCOVERAGE.out.bed // - // LOGIC: PREPARING FINDHALFCOVERAGE INPUT + // LOGIC: PREPARING COVERAGE INPUT // - ch_coverage_bed + GNU_SORT.out.sorted .combine( dot_genome ) - .map { meta, file, meta_my_genome, my_genome -> - tuple([ id: meta.id, single_end: true], file, my_genome) + .combine(reference_tuple) + .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + ch_coverage_bed : tuple ([ id: ref_meta.id, single_end: true], file) + genome_file : my_genome } .set { bed2bw_input } @@ -269,18 +324,33 @@ workflow LONGREAD_COVERAGE { // MODULE: CONVERT BEDGRAPH TO BIGWIG // UCSC_BEDGRAPHTOBIGWIG( - bed2bw_input.map { [it[0], it[1]] }, - bed2bw_input.map { it[2] } + bed2bw_input.ch_coverage_bed, + bed2bw_input.genome_file ) ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions) - ch_bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig + + // + // LOGIC: GENERATE A SUMMARY TUPLE FOR OUTPUT + // + ch_grabbed_read_paths.map{ it } + + ch_grabbed_read_paths + .collect() + .map { meta, fasta -> + tuple( [ id: 'pacbio', + sz: fasta instanceof ArrayList ? fasta.collect { it.size()} : fasta.size() ], + fasta + ) + } + .set { ch_reporting_pacbio } emit: - ch_minbed - ch_halfbed - ch_maxbed - ch_bigwig - versions = ch_versions + ch_minbed = BEDTOOLS_MERGE_MIN.out.bed + ch_halfbed = FINDHALFCOVERAGE.out.bed + ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed + ch_bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig + ch_reporting = ch_reporting_pacbio.collect() + versions = ch_versions } process GrabFiles { @@ -294,4 +364,4 @@ process GrabFiles { tuple val(meta), path("in/*.fasta.gz") "true" -} \ No newline at end of file +} diff --git a/subworkflows/local/nuc_alignments.nf b/subworkflows/local/nuc_alignments.nf old mode 100644 new mode 100755 index c774e464..8d13b8f1 --- a/subworkflows/local/nuc_alignments.nf +++ b/subworkflows/local/nuc_alignments.nf @@ -37,21 +37,28 @@ workflow NUC_ALIGNMENTS { .buffer( size: 2 ) .combine ( reference_tuple ) .combine( intron_size ) - .map ( it -> - tuple( [id: it[0].id, - type: it[0].type, - org: it[0].org, - intron_size: it[4], - split_prefix: it[1].toString().split('/')[-1].split('.fasta')[0], - single_end: true + .map { meta, nuc_file, ref_meta, ref, intron -> + tuple( [id: meta.id, + type: meta.type, + org: meta.org, + intron_size: intron, + split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], + single_end: true ], - it[1], - it[3], + nuc_file, + ref, true, false, false ) - ) + } + .multiMap { meta, nuc_file, reference, bool_1, bool_2, bool_3 -> + nuc : tuple( meta, nuc_file) + ref : reference + bool_bam_output : bool_1 + bool_cigar_paf : bool_2 + bool_cigar_bam : bool_3 + } .set { formatted_input } // @@ -59,39 +66,34 @@ workflow NUC_ALIGNMENTS { // EMITS ALIGNED BAM FILE // MINIMAP2_ALIGN ( - formatted_input.map { [it[0], it[1]] }, - formatted_input.map { it[2] }, - formatted_input.map { it[3] }, - formatted_input.map { it[4] }, - formatted_input.map { it[5] } + formatted_input.nuc, + formatted_input.ref, + formatted_input.bool_bam_output, + formatted_input.bool_cigar_paf, + formatted_input.bool_cigar_bam ) ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) // - // LOGIC: CONVERTS THE MINIMAP OUTPUT TUPLE INTO A GROUPED TUPLE PER INPUT QUERY ORGANISM + // LOGIC: CONVERTS THE MINIMAP OUTPUT TUPLE INTO A GROUPED TUPLE PER INPUT QUERY ORGANISM // AND DATA TYPE (RNA, CDS, DNA). - // EMITS THREE CHANNELS FOR THE GROUPED QUERY DATA REFERENCE AND REFERENCE INDEX // MINIMAP2_ALIGN.out.bam .map { meta, file -> - tuple([id: meta.org, type: meta.type], file) } - .groupTuple( by: [0] ) - .combine( reference_tuple ) - .combine( reference_index ) - .multiMap { it -> - nuc_grouped: tuple( it[0], it[1] ) - reference: it[-3] - ref_index: it[-1] - } + tuple( + [ id: meta.org, + type: meta.type ], + file) } + .groupTuple( by: [0] ) // group by meta list .set { merge_input } // // MODULE: MERGES THE BAM FILES FOUND IN THE GROUPED TUPLE IN REGARDS TO THE REFERENCE // EMITS A MERGED BAM SAMTOOLS_MERGE ( - merge_input.nuc_grouped, - merge_input.reference, - merge_input.ref_index + merge_input, + reference_tuple, + reference_index ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) @@ -132,14 +134,14 @@ workflow NUC_ALIGNMENTS { file_size: file.size() ], file ) } - .filter { it[0].file_size >= 141 } + .filter { it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink .combine( dot_genome ) - .multiMap { it -> - bed_file: tuple( [ id: it[0].id, - type: it[0].type, + .multiMap { meta, ref, genome_meta, genome -> + bed_file: tuple( [ id: meta.id, + type: meta.type, ], - it[1] ) - dot_genome: it[3] + ref ) + dot_genome: genome } .set { ucsc_input } diff --git a/subworkflows/local/pep_alignments.nf b/subworkflows/local/pep_alignments.nf old mode 100644 new mode 100755 index db31b354..8f12e91e --- a/subworkflows/local/pep_alignments.nf +++ b/subworkflows/local/pep_alignments.nf @@ -8,7 +8,7 @@ include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' include { MINIPROT_INDEX } from '../../modules/nf-core/miniprot/index/main' include { MINIPROT_ALIGN } from '../../modules/nf-core/miniprot/align/main' -include { GFF_TO_BED } from '../../modules/local/gff_to_bed' +include { EXTRACT_COV_IDEN } from '../../modules/local/extract_cov_iden' workflow PEP_ALIGNMENTS { take: @@ -34,15 +34,15 @@ workflow PEP_ALIGNMENTS { .flatten() .buffer( size: 2 ) .combine ( MINIPROT_INDEX.out.index ) - .multiMap { data -> - pep_tuple : tuple( [ id: data[0].id, - type: data[0].type, - org: data[0].org + .multiMap { pep_meta, pep_file, miniprot_meta, miniprot_index -> + pep_tuple : tuple( [ id: pep_meta.id, + type: pep_meta.type, + org: pep_meta.org ], - data[1] ) + pep_file ) index_file : tuple( [ id: "Reference", ], - data[3] ) + miniprot_index ) } .set { formatted_input } @@ -50,7 +50,7 @@ workflow PEP_ALIGNMENTS { // MODULE: ALIGNS PEP DATA WITH REFERENCE INDEX // EMITS GFF FILE // - MINIPROT_ALIGN ( + MINIPROT_ALIGN ( formatted_input.pep_tuple, formatted_input.index_file ) @@ -60,11 +60,12 @@ workflow PEP_ALIGNMENTS { // LOGIC: GROUPS OUTPUT GFFS BASED ON QUERY ORGANISMS AND DATA TYPE (PEP) // MINIPROT_ALIGN.out.gff - .map { it -> - tuple([ id: it[0].org + '_pep', - type: it[0].type - ], - it[1] ) + .map { meta, file -> + tuple( + [ id : meta.org + '_pep', + type : meta.type ], + file + ) } .groupTuple( by: [0] ) .set { grouped_tuple } @@ -72,42 +73,52 @@ workflow PEP_ALIGNMENTS { // // MODULE: AS ABOVE OUTPUT IS BED FORMAT, IT IS MERGED PER ORGANISM + TYPE // - CAT_CAT ( grouped_tuple ) + CAT_CAT ( + grouped_tuple + ) ch_versions = ch_versions.mix( CAT_CAT.out.versions ) // // MODULE: SORTS ABOVE OUTPUT AND RETAINS GFF SUFFIX // EMITS A MERGED GFF FILE // - BEDTOOLS_SORT ( CAT_CAT.out.file_out , [] ) + BEDTOOLS_SORT ( + CAT_CAT.out.file_out , + [] + ) ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CUTS GFF INTO PUNCHLIST // - GFF_TO_BED ( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix( GFF_TO_BED.out.versions ) + EXTRACT_COV_IDEN ( + CAT_CAT.out.file_out + ) + ch_versions = ch_versions.mix( EXTRACT_COV_IDEN.out.versions ) BEDTOOLS_SORT.out.sorted - .combine(max_scaff_size) - .map {meta, row, scaff -> - tuple([ id : meta.id, - max_scaff : scaff >= 500000000 ? 'csi': '' - ], - file(row) - )} + .combine( max_scaff_size ) + .map {meta, row, scaff -> + tuple( + [ id : meta.id, + max_scaff : scaff >= 500000000 ? 'csi': '' ], + file( row ) + ) + } .set { modified_bed_ch } // // MODULE: COMPRESS AND INDEX MERGED.GFF // EMITS A TBI FILE // - TABIX_BGZIPTABIX ( modified_bed_ch ) + TABIX_BGZIPTABIX ( + modified_bed_ch + ) ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: gff_file = BEDTOOLS_SORT.out.sorted tbi_gff = TABIX_BGZIPTABIX.out.gz_tbi - pep_punch = GFF_TO_BED.out.punchlist + pep_punch = EXTRACT_COV_IDEN.out.punchlist versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/punchlist.nf b/subworkflows/local/punchlist.nf old mode 100644 new mode 100755 index 8750a73a..1db01a6e --- a/subworkflows/local/punchlist.nf +++ b/subworkflows/local/punchlist.nf @@ -17,14 +17,18 @@ workflow PUNCHLIST { // // MODULE: CONVERTS BAM INTO PAF FOR THE PUNCHLIST GENERATION // - PAFTOOLS_SAM2PAF ( merged_bam ) - ch_versions = ch_versions.mix(PAFTOOLS_SAM2PAF.out.versions) + PAFTOOLS_SAM2PAF ( + merged_bam + ) + ch_versions = ch_versions.mix( PAFTOOLS_SAM2PAF.out.versions ) // // MODULE: GENERATES PUNCHLIST FROM PAF FILE // - PAF2BED ( PAFTOOLS_SAM2PAF.out.paf ) - ch_versions = ch_versions.mix(PAF2BED.out.versions) + PAF2BED ( + PAFTOOLS_SAM2PAF.out.paf + ) + ch_versions = ch_versions.mix( PAF2BED.out.versions ) emit: punchlist = PAF2BED.out.punchlist diff --git a/subworkflows/local/repeat_density.nf b/subworkflows/local/repeat_density.nf old mode 100644 new mode 100755 index 94e82fdd..2445c89a --- a/subworkflows/local/repeat_density.nf +++ b/subworkflows/local/repeat_density.nf @@ -6,9 +6,9 @@ include { WINDOWMASKER_USTAT } from '../../modules/nf-core/windowmasker/ustat/main' include { WINDOWMASKER_MKCOUNTS } from '../../modules/nf-core/windowmasker/mk_counts/main' include { EXTRACT_REPEAT } from '../../modules/local/extract_repeat' -include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' -include { BEDTOOLS_MAKEWINDOWS } from '../../modules/nf-core/bedtools/makewindows/main' -include { BEDTOOLS_MAP } from '../../modules/nf-core/bedtools/map/main' +include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' +include { BEDTOOLS_MAKEWINDOWS } from '../../modules/nf-core/bedtools/makewindows/main' +include { BEDTOOLS_MAP } from '../../modules/nf-core/bedtools/map/main' include { RENAME_IDS } from '../../modules/local/rename_ids' include { UCSC_BEDGRAPHTOBIGWIG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' include { GNU_SORT as GNU_SORT_A } from '../../modules/nf-core/gnu/sort/main' @@ -27,26 +27,34 @@ workflow REPEAT_DENSITY { // // MODULE: MARK UP THE REPEAT REGIONS OF THE REFERENCE GENOME // - WINDOWMASKER_MKCOUNTS ( reference_tuple ) + WINDOWMASKER_MKCOUNTS ( + reference_tuple + ) ch_versions = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions ) // // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS // - WINDOWMASKER_USTAT( WINDOWMASKER_MKCOUNTS.out.counts, - reference_tuple ) + WINDOWMASKER_USTAT( + WINDOWMASKER_MKCOUNTS.out.counts, + reference_tuple + ) ch_versions = ch_versions.mix( WINDOWMASKER_USTAT.out.versions ) // // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA // - EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals ) + EXTRACT_REPEAT( + WINDOWMASKER_USTAT.out.intervals + ) ch_versions = ch_versions.mix( EXTRACT_REPEAT.out.versions ) // // MODULE: CREATE WINDOWS FROM .GENOME FILE // - BEDTOOLS_MAKEWINDOWS( dot_genome ) + BEDTOOLS_MAKEWINDOWS( + dot_genome + ) ch_versions = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions ) // @@ -54,10 +62,11 @@ workflow REPEAT_DENSITY { // BEDTOOLS_MAKEWINDOWS.out.bed .combine( EXTRACT_REPEAT.out.bed ) - .map{ data -> - tuple ( data[0], - data[1], - data[3] + .map{ meta, windows_file, repeat_meta, repeat_file -> + tuple ( + meta, + windows_file, + repeat_file ) } .set { intervals } @@ -65,7 +74,7 @@ workflow REPEAT_DENSITY { // // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE // - BEDTOOLS_INTERSECT( + BEDTOOLS_INTERSECT( intervals, dot_genome ) @@ -74,37 +83,48 @@ workflow REPEAT_DENSITY { // // MODULE: FIXES IDS FOR REPEATS // - RENAME_IDS( BEDTOOLS_INTERSECT.out.intersect ) + RENAME_IDS( + BEDTOOLS_INTERSECT.out.intersect + ) ch_versions = ch_versions.mix( RENAME_IDS.out.versions ) // // MODULE: SORTS THE ABOVE BED FILES // - GNU_SORT_A ( RENAME_IDS.out.bed ) // Intersect file + GNU_SORT_A ( + RENAME_IDS.out.bed // Intersect file + ) ch_versions = ch_versions.mix( GNU_SORT_A.out.versions ) - GNU_SORT_B ( dot_genome ) // genome file + GNU_SORT_B ( + dot_genome // Genome file - Will not run unless genome file is sorted to + ) ch_versions = ch_versions.mix( GNU_SORT_B.out.versions ) - GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed ) // windows file + GNU_SORT_C ( + BEDTOOLS_MAKEWINDOWS.out.bed // Windows file + ) ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH // - REFORMAT_INTERSECT ( GNU_SORT_A.out.sorted ) - ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) + REFORMAT_INTERSECT ( + GNU_SORT_A.out.sorted + ) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // - // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO + // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO // tuple(intersect_meta, windows file, intersect file) // REFORMAT_INTERSECT.out.bed .combine( GNU_SORT_C.out.sorted ) - .map{ data -> - tuple ( data[0], - data[3], - data[1] + .map{ intersect_meta, bed, sorted_meta, windows_file -> + tuple ( + intersect_meta, + windows_file, + bed ) } .set { for_mapping } @@ -112,7 +132,7 @@ workflow REPEAT_DENSITY { // // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME // - BEDTOOLS_MAP( + BEDTOOLS_MAP( for_mapping, GNU_SORT_B.out.sorted ) @@ -131,7 +151,7 @@ workflow REPEAT_DENSITY { // UCSC_BEDGRAPHTOBIGWIG( REPLACE_DOTS.out.bed, - GNU_SORT_B.out.sorted.map { it[1] } + GNU_SORT_B.out.sorted.map { it[1] } // Pulls file from tuple of meta and file ) ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 08be8731..985d9c6c 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -26,107 +26,133 @@ workflow SELFCOMP { main: ch_versions = Channel.empty() - - // + + // // MODULE: SPLITS INPUT FASTA INTO 500KB CHUNKS // EMITS CHUNKED FASTA // - SELFCOMP_SPLITFASTA(reference_tuple) - ch_versions = ch_versions.mix(SELFCOMP_SPLITFASTA.out.versions) + SELFCOMP_SPLITFASTA( + reference_tuple + ) + ch_versions = ch_versions.mix( SELFCOMP_SPLITFASTA.out.versions ) // // MODULE: SPLIT INPUT FASTA INTO 1GB CHUNKS // EMITS CHUNKED FASTA // - CHUNKFASTA(SELFCOMP_SPLITFASTA.out.fa, mummer_chunk) - ch_versions = ch_versions.mix(CHUNKFASTA.out.versions) + CHUNKFASTA( + SELFCOMP_SPLITFASTA.out.fa, + mummer_chunk + ) + ch_versions = ch_versions.mix( CHUNKFASTA.out.versions ) // // LOGIC: CONVERTS ABOVE OUTPUTS INTO A SINGLE TUPLE // ch_query_tup = CHUNKFASTA.out.fas - .map{ meta, query -> - [query] + .map{ meta, query -> + [query] } .flatten() ch_ref = SELFCOMP_SPLITFASTA.out.fa - .map{ meta, ref -> - ref + .map{ meta, ref -> + ref } ch_mummer_input = ch_query_tup .combine(ch_ref) - .map{ query, ref -> - tuple([id: query.toString().split('/')[-1] ], - ref, - query - ) + .map{ query, ref -> + tuple([ id: query.toString().split('/')[-1] ], + ref, + query + ) } // // MODULE: ALIGNS 1GB CHUNKS TO 500KB CHUNKS // EMITS MUMMER ALIGNMENT FILE // - MUMMER( ch_mummer_input ) - ch_versions = ch_versions.mix(MUMMER.out.versions) + MUMMER( + ch_mummer_input + ) + ch_versions = ch_versions.mix( MUMMER.out.versions ) // // LOGIC: GROUPS OUTPUT INTO SINGLE TUPLE BASED ON REFERENCE META // MUMMER.out.coords - .combine(reference_tuple) - .map { coords_meta, coords, ref_meta, ref -> - tuple( ref_meta, - coords - ) + .combine( reference_tuple ) + .map { coords_meta, coords, ref_meta, ref -> + tuple( ref_meta, + coords + ) } - .groupTuple(by:[0]) + .groupTuple( by:[0] ) .set{ ch_mummer_files } // // MODULE: MERGES MUMMER ALIGNMENT FILES // - CONCATMUMMER(ch_mummer_files) - ch_versions = ch_versions.mix(CONCATMUMMER.out.versions) + CONCATMUMMER( + ch_mummer_files + ) + ch_versions = ch_versions.mix( CONCATMUMMER.out.versions ) // // MODULE: CONVERT THE MUMMER ALIGNMENTS INTO BED FORMAT // - SELFCOMP_MUMMER2BED(CONCATMUMMER.out.mummer, motif_len) - ch_versions = ch_versions.mix(SELFCOMP_MUMMER2BED.out.versions) + SELFCOMP_MUMMER2BED( + CONCATMUMMER.out.mummer, + motif_len + ) + ch_versions = ch_versions.mix( SELFCOMP_MUMMER2BED.out.versions ) // // MODULE: GENERATE A LIST OF IDs AND GENOMIC POSITIONS OF SELFCOMPLEMENTARY REGIONS // EMITS BED FILE // - SELFCOMP_MAPIDS(SELFCOMP_MUMMER2BED.out.bedfile, SELFCOMP_SPLITFASTA.out.agp) - ch_versions = ch_versions.mix(SELFCOMP_MAPIDS.out.versions) + SELFCOMP_MAPIDS( + SELFCOMP_MUMMER2BED.out.bedfile, + SELFCOMP_SPLITFASTA.out.agp + ) + ch_versions = ch_versions.mix( SELFCOMP_MAPIDS.out.versions ) // // MODULE: SORTS ABOVE OUTPUT BED FILE AND RETAINS BED SUFFIX // - BEDTOOLS_SORT(SELFCOMP_MAPIDS.out.bedfile, []) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + BEDTOOLS_SORT( + SELFCOMP_MAPIDS.out.bedfile, + [] + ) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: BUILD ALIGNMENT BLOCKS // - SELFCOMP_ALIGNMENTBLOCKS(BEDTOOLS_SORT.out.sorted) - ch_versions = ch_versions.mix(SELFCOMP_ALIGNMENTBLOCKS.out.versions) + SELFCOMP_ALIGNMENTBLOCKS( + BEDTOOLS_SORT.out.sorted + ) + ch_versions = ch_versions.mix( SELFCOMP_ALIGNMENTBLOCKS.out.versions ) // // MODULE: SORT BLOCKS FILES AND FILTER BY MOTIF LENGTH // - CONCATBLOCKS(SELFCOMP_ALIGNMENTBLOCKS.out.blockfile) - ch_versions = ch_versions.mix(CONCATBLOCKS.out.versions) + CONCATBLOCKS( + SELFCOMP_ALIGNMENTBLOCKS.out.blockfile + ) + ch_versions = ch_versions.mix( CONCATBLOCKS.out.versions ) // // MODULE: CONVERTS ABOVE OUTPUT INTO BIGBED FORMAT // - UCSC_BEDTOBIGBED(CONCATBLOCKS.out.chainfile, dot_genome.map{it[1]}, selfcomp_as) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + UCSC_BEDTOBIGBED( + CONCATBLOCKS.out.chainfile, + dot_genome.map{it[1]}, // Pulls file from tuple ( meta and file ) + selfcomp_as + ) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: ch_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 04b021a6..3c631887 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -13,23 +13,26 @@ workflow SYNTENY { assembly_classT // Channel val(meta) main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() // // MODULE: SEARCHES PREDETERMINED PATH FOR SYNTENIC GENOME FILES BASED ON CLASS // EMITS PATH LIST // - GET_SYNTENY_GENOMES(synteny_path, assembly_classT) - ch_versions = ch_versions.mix( GET_SYNTENY_GENOMES.out.versions ) + GET_SYNTENY_GENOMES( + synteny_path, + assembly_classT + ) + ch_versions = ch_versions.mix( GET_SYNTENY_GENOMES.out.versions ) // // LOGIC: GENERATES LIST OF GENOMES IN PATH AND BRANCHES ON WHETHER THERE IS DATA // GET_SYNTENY_GENOMES.out.genome_path .flatten() - .branch { data -> - run: !data.toString().contains("empty") - skip: data.toString().contains("empty") + .branch { data -> + run : !data.toString().contains("empty") + skip : data.toString().contains("empty") } .set { mm_intermediary } @@ -37,26 +40,29 @@ workflow SYNTENY { // LOGIC: COMBINE WITH ABOVE .RUN CHANNEL ADD BOOLEANS FOR MINIMAP // reference_tuple - .combine(mm_intermediary.run) - .map { meta, fa, ref -> - tuple([ id: meta.id, - single_end: true], - fa, ref, false, true, false) - } - .set { mm_input } + .combine( mm_intermediary.run ) + .multiMap { meta, syntenic_ref, ref -> + syntenic_tuple : tuple( meta, syntenic_ref ) + reference_fa : ref + bool_bam_output : false + bool_cigar_paf : true + bool_cigar_bam : false + } + .set { mm_input } // // MODULE: ALIGNS THE SUNTENIC GENOMES TO THE REFERENCE GENOME // EMITS ALIGNED PAF FILE // - MINIMAP2_ALIGN( mm_input.map { [it[0], it[1]] }, - mm_input.map { it[2] }, - mm_input.map { it[3] }, - mm_input.map { it[4] }, - mm_input.map { it[5] } + MINIMAP2_ALIGN( + mm_input.syntenic_tuple, + mm_input.reference_fa, + mm_input.bool_bam_output, + mm_input.bool_cigar_paf, + mm_input.bool_cigar_bam ) ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions ) - + emit: ch_paf = MINIMAP2_ALIGN.out.paf versions = ch_versions.ifEmpty(null) diff --git a/subworkflows/local/telo_finder.nf b/subworkflows/local/telo_finder.nf old mode 100644 new mode 100755 index 56e95e76..3ecd3048 --- a/subworkflows/local/telo_finder.nf +++ b/subworkflows/local/telo_finder.nf @@ -45,15 +45,16 @@ workflow TELO_FINDER { // // LOGIC: Adding the largest scaffold size to the meta data so it can be used in the modules.config - // + // EXTRACT_TELO.out.bed .combine(max_scaff_size) - .map {meta, row, scaff -> - tuple([ id : meta.id, - max_scaff : scaff >= 500000000 ? 'csi': '' - ], - file(row) - )} + .map {meta, row, scaff -> + tuple( + [ id : meta.id, + max_scaff : scaff >= 500000000 ? 'csi': '' ], + file( row ) + ) + } .set { modified_bed_ch } // @@ -62,6 +63,7 @@ workflow TELO_FINDER { TABIX_BGZIPTABIX ( modified_bed_ch ) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: bedgraph_file = EXTRACT_TELO.out.bed diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf old mode 100644 new mode 100755 index 84f66c95..ffe02e71 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -13,12 +13,12 @@ workflow YAML_INPUT { .map { file -> readYAML(file) } .set { yamlfile } - // + // // LOGIC: PARSES THE TOP LEVEL OF YAML VALUES - // + // yamlfile .flatten() - .multiMap { data -> + .multiMap { data -> assembly: ( data.assembly ) assembly_reads: ( data.assem_reads ) reference: ( file(data.reference_file) ) @@ -38,7 +38,6 @@ workflow YAML_INPUT { .assembly .multiMap { data -> level: data.level - size_c: data.sizeClass sample_id: data.sample_id classT: data.classT asmVersion: data.asmVersion @@ -49,7 +48,7 @@ workflow YAML_INPUT { group .assembly_reads - .multiMap { data -> + .multiMap { data -> pacbio: data.pacbio hic: data.hic supplement: data.supplementary @@ -59,7 +58,7 @@ workflow YAML_INPUT { group .alignment .multiMap { data -> - data_dir: data.data_dir + data_dir: data.data_dir common_name: data.common_name geneset: data.geneset } @@ -75,7 +74,7 @@ workflow YAML_INPUT { group .synteny - .multiMap { data -> + .multiMap { data -> synteny_genome: data.synteny_genome_path } .set{ synteny_data } @@ -102,14 +101,19 @@ workflow YAML_INPUT { } .set { busco_lineage } + assembly_data.sample_id + .combine( assembly_data.asmVersion ) + .map { it1, it2 -> + ("${it1}_${it2}")} + .set { tolid_version} + emit: - assembly_id = assembly_data.sample_id - assembly_sizeClass = assembly_data.size_c + assembly_id = tolid_version assembly_classT = assembly_data.classT assembly_level = assembly_data.level assembly_asmVer = assembly_data.asmVersion assembly_dbVer = assembly_data.dbVersion - assembly_gtype = assembly_data.gevalType + assembly_ttype = assembly_data.gevalType pacbio_reads = assem_reads.pacbio hic_reads = assem_reads.hic diff --git a/tower.yml b/tower.yml old mode 100644 new mode 100755 diff --git a/workflows/treeval.nf b/workflows/treeval.nf old mode 100644 new mode 100755 index 6a707e8f..ce3b2137 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowTreeval.initialise(params, log) // Check input path parameters to see if they exist -// param.fasta removed from here +// params.input is the treeval yaml def checkPathParamList = [ params.input ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -59,18 +59,18 @@ workflow TREEVAL { // // PRE-PIPELINE CHANNEL SETTING - channel setting for required files // - ch_versions = Channel.empty() + ch_versions = Channel.empty() - input_ch = Channel.fromPath(params.input, checkIfExists: true) + input_ch = Channel.fromPath(params.input, checkIfExists: true) Channel .fromPath( "${projectDir}/assets/gene_alignment/assm_*.as", checkIfExists: true) - .map { it -> + .map { it -> tuple ([ type : it.toString().split('/')[-1].split('_')[-1].split('.as')[0] ], file(it) )} .set { gene_alignment_asfiles } - + Channel .fromPath( "${projectDir}/assets/digest/digest.as", checkIfExists: true ) .set { digest_asfile } @@ -90,113 +90,127 @@ workflow TREEVAL { // // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // - YAML_INPUT ( input_ch ) + YAML_INPUT ( + input_ch + ) // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file - // - GENERATE_GENOME ( YAML_INPUT.out.assembly_id, - YAML_INPUT.out.reference + // + GENERATE_GENOME ( + YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference ) - ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) // // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate // file with enzymatic digest sites. // - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) - INSILICO_DIGEST ( YAML_INPUT.out.assembly_id, - GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, - ch_enzyme, - digest_asfile + INSILICO_DIGEST ( + YAML_INPUT.out.assembly_id, + GENERATE_GENOME.out.dot_genome, + GENERATE_GENOME.out.reference_tuple, + ch_enzyme, + digest_asfile ) - ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions) + ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) // // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS // ON THOSE CHUNKS + // THIS WILL BE REQUIRED FOR LARGER GENOMES EST > 6GB // // REFERENCE_GENOME_SPLIT --> SELFCOMP // --> GENE_ALIGNMENT // BOTH WOULD REQUIRE A POST SUBWORKFLOW MERGE STEP TO MERGE TOGETHER THE SCAFFOLD // BASED ALIGNMENTS/SELFCOMPS INTO A GENOME REPRESENTATIVE ONE. // FOR GENE ALIGNMENT WOULD THIS REQUIRE A .GENOME FILE AND INDEX PER SCAFFOLD? - + // // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data // - GENE_ALIGNMENT ( GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.max_scaff_size, - YAML_INPUT.out.assembly_classT, - YAML_INPUT.out.align_data_dir, - YAML_INPUT.out.align_geneset, - YAML_INPUT.out.align_common, - YAML_INPUT.out.intron_size, - gene_alignment_asfiles + GENE_ALIGNMENT ( + GENERATE_GENOME.out.dot_genome, + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.max_scaff_size, + YAML_INPUT.out.assembly_classT, + YAML_INPUT.out.align_data_dir, + YAML_INPUT.out.align_geneset, + YAML_INPUT.out.align_common, + YAML_INPUT.out.intron_size, + gene_alignment_asfiles ) - ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - REPEAT_DENSITY ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome + REPEAT_DENSITY ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome ) - ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) + ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.max_scaff_size + GAP_FINDER ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.max_scaff_size ) - ch_versions = ch_versions.mix(GAP_FINDER.out.versions) + ch_versions = ch_versions.mix(GAP_FINDER.out.versions) // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as // file to generate a file containing sites of self-complementary sequnce. // - SELFCOMP ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.mummer_chunk, - YAML_INPUT.out.motif_len, - selfcomp_asfile ) - ch_versions = ch_versions.mix(SELFCOMP.out.versions) - + SELFCOMP ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.mummer_chunk, + YAML_INPUT.out.motif_len, + selfcomp_asfile + ) + ch_versions = ch_versions.mix(SELFCOMP.out.versions) + // // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence // and generated a file of syntenic blocks. // - SYNTENY ( GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.synteny_path, - YAML_INPUT.out.assembly_classT + SYNTENY ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.synteny_path, + YAML_INPUT.out.assembly_classT ) - ch_versions = ch_versions.mix(SYNTENY.out.versions) + ch_versions = ch_versions.mix(SYNTENY.out.versions) // - // SUBWORKFLOW: Takes reference, pacbio reads + // SUBWORKFLOW: Takes reference, pacbio reads // - LONGREAD_COVERAGE ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.pacbio_reads, - YAML_INPUT.out.assembly_sizeClass + LONGREAD_COVERAGE ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.pacbio_reads ) - ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - HIC_MAPPING ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads) - ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) + HIC_MAPPING ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads, + YAML_INPUT.out.assembly_id + ) + ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE @@ -205,18 +219,19 @@ workflow TREEVAL { GENERATE_GENOME.out.reference_tuple, YAML_INPUT.out.teloseq ) - ch_versions = ch_versions.mix(TELO_FINDER.out.versions) + ch_versions = ch_versions.mix(TELO_FINDER.out.versions) // // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS // - BUSCO_ANNOTATION ( GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.assembly_classT, - YAML_INPUT.out.lineageinfo, - YAML_INPUT.out.lineagespath, - buscogene_asfile, - ancestral_table + BUSCO_ANNOTATION ( + GENERATE_GENOME.out.dot_genome, + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.assembly_classT, + YAML_INPUT.out.lineageinfo, + YAML_INPUT.out.lineagespath, + buscogene_asfile, + ancestral_table ) ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions) @@ -227,9 +242,31 @@ workflow TREEVAL { ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + // + // LOGIC: GENERATE SOME CHANNELS FOR REPORTING + // + GENERATE_GENOME.out.reference_tuple + .combine( YAML_INPUT.out.assembly_classT ) + .combine( YAML_INPUT.out.assembly_ttype ) + .map { meta, reference, lineage, ticket -> + tuple( + [ id: meta.id, + sz: file(reference).size(), + ln: lineage, + tk: ticket ], + reference + ) + } + .set { rf_data } + + params.sample_id = YAML_INPUT.out.assembly_id.collect() + params.rf_data = rf_data.collect() // reference data tuple( [ id, size, lineage, ticket ], file) + params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta + params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram + emit: - software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml - versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions } /* @@ -242,10 +279,14 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } + + TreeValProject.summary(workflow, params) + } /* diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf old mode 100644 new mode 100755 index dfea9bea..b7606031 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -48,11 +48,11 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft */ workflow TREEVAL_RAPID { - + main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() - input_ch = Channel.fromPath(params.input, checkIfExists: true) + input_ch = Channel.fromPath(params.input, checkIfExists: true) // // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // @@ -60,56 +60,63 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file - // - GENERATE_GENOME ( YAML_INPUT.out.assembly_id, - YAML_INPUT.out.reference + // + GENERATE_GENOME ( + YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference ) - ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - REPEAT_DENSITY ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome + REPEAT_DENSITY ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome ) - ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) + ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.max_scaff_size + GAP_FINDER ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.max_scaff_size ) - ch_versions = ch_versions.mix(GAP_FINDER.out.versions) + ch_versions = ch_versions.mix(GAP_FINDER.out.versions) // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - TELO_FINDER ( GENERATE_GENOME.out.max_scaff_size, - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.teloseq + TELO_FINDER ( + GENERATE_GENOME.out.max_scaff_size, + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.teloseq ) - ch_versions = ch_versions.mix(TELO_FINDER.out.versions) + ch_versions = ch_versions.mix(TELO_FINDER.out.versions) // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - HIC_MAPPING ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads) - ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) + HIC_MAPPING ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads, + YAML_INPUT.out.assembly_id + ) + ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) // - // SUBWORKFLOW: Takes reference, pacbio reads + // SUBWORKFLOW: Takes reference, pacbio reads // - LONGREAD_COVERAGE ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.pacbio_reads, - YAML_INPUT.out.assembly_sizeClass + LONGREAD_COVERAGE ( + GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.pacbio_reads ) - ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) - + ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + // // SUBWORKFLOW: Collates version data from prior subworflows // @@ -117,9 +124,31 @@ workflow TREEVAL_RAPID { ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + // + // LOGIC: GENERATE SOME CHANNELS FOR REPORTING + // + GENERATE_GENOME.out.reference_tuple + .combine( YAML_INPUT.out.assembly_classT ) + .combine( YAML_INPUT.out.assembly_ttype ) + .map { meta, reference, lineage, ticket -> + tuple( + [ id: meta.id, + sz: file(reference).size(), + ln: lineage, + tk: ticket ], + reference + ) + } + .set { rf_data } + + params.sample_id = YAML_INPUT.out.assembly_id.collect() + params.rf_data = rf_data.collect() // reference data tuple( [ id, size, lineage, ticket ], file) + params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta + params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram + emit: - software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml - versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions } /* @@ -133,6 +162,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) } NfcoreTemplate.summary(workflow, params, log) + + TreeValProject.summary(workflow, params) + } /*