Skip to content

Commit

Permalink
Merge pull request #102 from tkchafin/dev
Browse files Browse the repository at this point in the history
merge bwamem_index and remove unneeded multiqc options
  • Loading branch information
tkchafin authored Aug 12, 2024
2 parents c07a1dd + 4e220da commit eb95288
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 93 deletions.
2 changes: 2 additions & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ nf_core_version: 2.14.1
repository_type: pipeline
template:
name: readmapping
prefix: sanger-tol
lint:
files_exist:
- assets/multiqc_config.yml
- assets/nf-core-readmapping_logo_light.png
- assets/methods_description_template.yml
- conf/igenomes.config
- docs/images/nf-core-readmapping_logo_dark.png
- docs/images/nf-core-readmapping_logo_light.png
Expand Down
29 changes: 0 additions & 29 deletions assets/methods_description_template.yml

This file was deleted.

3 changes: 1 addition & 2 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
"meta": ["datatype"]
},
"datafile": {
"format": "file-path",
"exists": true,
"format": "string",
"pattern": "^\\S+$",
"errorMessage": "Data file for reads cannot contain spaces and must have extension 'cram', 'bam', '.fq.gz' or '.fastq.gz'",
"meta": ["datafile"]
Expand Down
10 changes: 2 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,8 @@ params {
help = false
version = false
validate_params = true
schema_ignore_params = 'genomes'
schema_ignore_params = ''

// MultiQC options
multiqc_config = null
multiqc_title = null
multiqc_logo = null
max_multiqc_email_size = '25.MB'
multiqc_methods_description = null

// Config options
config_profile_name = null
Expand All @@ -58,7 +52,7 @@ params {
// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = false
validationSchemaIgnoreParams = 'genomes,igenomes_base'
validationSchemaIgnoreParams = ''
validationShowHiddenParams = false
validate_params = true

Expand Down
33 changes: 0 additions & 33 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"multiqc_title": {
"type": "string",
"description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
"fa_icon": "fas fa-file-signature"
}
}
},
Expand Down Expand Up @@ -219,14 +214,6 @@
"fa_icon": "fas fa-remove-format",
"hidden": true
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
Expand All @@ -244,25 +231,6 @@
"help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"multiqc_config": {
"type": "string",
"format": "file-path",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"multiqc_logo": {
"type": "string",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"fa_icon": "fas fa-cogs",
"hidden": true
},
"multiqc_methods_description": {
"type": "string",
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
Expand Down Expand Up @@ -293,7 +261,6 @@
},
"schema_ignore_params": {
"type": "string",
"default": "genomes",
"hidden": true
}
}
Expand Down
61 changes: 46 additions & 15 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ workflow PREPARE_GENOME {
take:
fasta // channel: [ meta, /path/to/fasta ]


main:
ch_versions = Channel.empty()

Expand All @@ -33,24 +32,27 @@ workflow PREPARE_GENOME {
UNMASK ( ch_fasta )
ch_versions = ch_versions.mix ( UNMASK.out.versions.first() )


// Generate BWA index
if ( params.bwamem2_index ) {
Channel.fromPath ( params.bwamem2_index )
| combine ( ch_fasta )
| map { bwa, meta, fa -> [ meta, bwa ] }
| set { ch_bwamem }

if ( params.bwamem2_index.endsWith('.tar.gz') ) {
ch_bwamem2_index = UNTAR ( ch_bwamem ).untar
ch_versions = ch_versions.mix ( UNTAR.out.versions.first() )
if ( checkShortReads( params.input ) ) {
if ( params.bwamem2_index ) {
Channel.fromPath ( params.bwamem2_index )
| combine ( ch_fasta )
| map { bwa, meta, fa -> [ meta, bwa ] }
| set { ch_bwamem }

if ( params.bwamem2_index.endsWith('.tar.gz') ) {
ch_bwamem2_index = UNTAR ( ch_bwamem ).untar
ch_versions = ch_versions.mix ( UNTAR.out.versions.first() )
} else {
ch_bwamem2_index = ch_bwamem
}

} else {
ch_bwamem2_index = ch_bwamem
ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index
ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions.first() )
}

} else {
ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index
ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions.first() )
ch_bwamem2_index = Channel.empty()
}


Expand All @@ -59,3 +61,32 @@ workflow PREPARE_GENOME {
bwaidx = ch_bwamem2_index.first() // channel: [ meta, /path/to/bwamem2/index_dir/ ]
versions = ch_versions // channel: [ versions.yml ]
}

//
// Check for short reads in the samplesheet
//
def checkShortReads(filePath, columnToCheck="datatype") {
// Define the target values to check
def valuesToCheck = ['illumina', 'hic']

// Read the CSV file
def csvLines = new File(filePath).readLines()

// Extract the header and find the index of the column
def header = csvLines[0].split(',')
def columnIndex = header.findIndexOf { it == columnToCheck }

// Check if the column index was found
if (columnIndex == -1) {
error("Column '${columnToCheck}' not found in the CSV header.")
}

// Check for the values in the specified column and return true if found
def containsValues = csvLines[1..-1].any { line ->
def columns = line.split(',')
valuesToCheck.contains(columns[columnIndex].toLowerCase())
}

return containsValues
}

13 changes: 11 additions & 2 deletions subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,20 @@ def validateInputParameters() {
def validateInputSamplesheet(channel) {
def seen = [:].withDefault { 0 }
def uniquePairs = new HashSet()
def validFormats = [".fq.gz", ".fastq.gz", ".cram", ".bam"]

// Use map to process each item in the channel
return channel.map { sample ->
def (meta, file) = sample
def pair = [meta.sample, meta.datafile].toString()

// Replace spaces with underscores in sample names
meta.sample = meta.sample.replace(" ", "_")

// Validate that the file path is non-empty and has a valid format
if (!file || !validFormats.any { file.toString().endsWith(it) }) {
error("Data file is required and must have a valid extension: ${file}")
}

def pair = [meta.sample, file.toString()].toString()

if (!uniquePairs.add(pair)) {
error("The pair of sample name and read file must be unique: ${pair}")
Expand Down
4 changes: 0 additions & 4 deletions workflows/readmapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,6 @@ workflow READMAPPING {

ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )


//
// SUBWORKFLOW: Uncompress and prepare reference genome files
//
ch_fasta
| map { [ [ id: it.baseName ], it ] }
| set { ch_genome }
Expand Down

0 comments on commit eb95288

Please sign in to comment.