Skip to content

Commit

Permalink
modify custom/filterdifferentialtable to allow >=/<= (#7258)
Browse files Browse the repository at this point in the history
* modify custom/filterdifferentialabundance to allow custom setting of >=/<= comparisons through ext.args

* update tests and snapshots

* simplified the cardinality declaration

* modified the config for propd_basic.config

* modify meta.yml

* modify test for custom/filterdifferentialtable

* Update meta.yml

* modified padj_* into stat_* in custom/filterdifferentialtable

* update abundance_differential_filter with stat_* instead of padj_*
  • Loading branch information
suzannejin authored Jan 10, 2025
1 parent f349d93 commit 045f0fb
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 74 deletions.
29 changes: 20 additions & 9 deletions modules/nf-core/custom/filterdifferentialtable/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ process CUSTOM_FILTERDIFFERENTIALTABLE {

input:
tuple val(meta), path(input_file)
val(logFC_column)
val(FC_threshold)
val(padj_column)
val(padj_threshold)
tuple val(logfc_column), val(fc_threshold), val(fc_cardinality)
tuple val(stat_column), val(stat_threshold), val(stat_cardinality)

output:
tuple val(meta), path("*_filtered.tsv"), emit: filtered
Expand Down Expand Up @@ -41,14 +39,27 @@ process CUSTOM_FILTERDIFFERENTIALTABLE {
table = pd.read_csv("${input_file}", sep=sep)
# Calculate log2 fold change threshold
logFC_threshold = log2(float("${FC_threshold}"))
logfc_threshold = log2(float("${fc_threshold}"))
# define evaluation
def evaluate_condition(x, threshold, cardinality):
if cardinality == ">=":
return x >= threshold
elif cardinality == "<=":
return x <= threshold
elif cardinality == ">":
return x > threshold
elif cardinality == "<":
return x < threshold
else:
raise ValueError(f"Invalid cardinality: {cardinality}")
# Apply filters
mask = (
table["${logFC_column}"].notna() &
table["${padj_column}"].notna() &
(table["${logFC_column}"].abs() >= logFC_threshold) &
(table["${padj_column}"] <= float("${padj_threshold}"))
table["${logfc_column}"].notna() &
table["${stat_column}"].notna() &
table["${logfc_column}"].abs().apply(lambda x: evaluate_condition(x, logfc_threshold, "${fc_cardinality}")) &
table["${stat_column}"].apply(lambda x: evaluate_condition(x, float("${stat_threshold}"), "${stat_cardinality}"))
)
filtered_table = table[mask]
Expand Down
25 changes: 19 additions & 6 deletions modules/nf-core/custom/filterdifferentialtable/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ keywords:
- filter
- differential expression
- logFC
- significance statistic
- p-value
tools:
- "pandas":
Expand All @@ -26,18 +27,30 @@ input:
type: file
description: Input differential expression table (CSV, TSV, or TXT format)
pattern: "*.{csv,tsv,txt}"
- - logFC_column:
- - logfc_column:
type: string
description: Name of the column containing log fold change values
- - FC_threshold:
- fc_threshold:
type: float
description: Fold change threshold for filtering
- - padj_column:
- fc_cardinality:
type: string
description: Name of the column containing adjusted p-values
- - padj_threshold:
description: |
Operator to compare the fold change values with the threshold.
Valid values are: ">=", "<=", ">", "<".
- - stat_column:
type: string
description: |
Name of the column containing the significance statistic values
(eg. adjusted p-values).
- stat_threshold:
type: float
description: Adjusted p-value threshold for filtering
description: Statistic threshold for filtering
- stat_cardinality:
type: string
description: |
Operator to compare the column values with the threshold.
Valid values are: ">=", "<=", ">", "<".
output:
- filtered:
- meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@ nextflow_process {
process {
"""
input[0] = [ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv", checkIfExists: true) ]
input[1] = 'log2FoldChange'
input[2] = 2
input[3] = 'padj'
input[4] = 0.05
input[1] = Channel.of(['log2FoldChange', 2, '>='])
input[2] = Channel.of(['padj', 0.05, '<='])
"""
}
}
Expand Down
41 changes: 27 additions & 14 deletions subworkflows/nf-core/abundance_differential_filter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def mergeMaps(meta, meta2){
workflow ABUNDANCE_DIFFERENTIAL_FILTER {
take:
// Things we may need to iterate
ch_input // [[meta_input], counts, analysis method, fc_threshold, padj_threshold]
ch_input // [[meta_input], counts, analysis method, fc_threshold, stat_threshold]

// Workflow-wide things, we don't need to iterate
ch_samplesheet // [ meta_exp, samplesheet ]
Expand All @@ -30,7 +30,7 @@ workflow ABUNDANCE_DIFFERENTIAL_FILTER {
main:

// Set up how the channels crossed below will be used to generate channels for processing
def criteria = multiMapCriteria { meta_input, abundance, analysis_method, fc_threshold, padj_threshold, meta_exp, samplesheet, meta_contrasts, variable, reference, target ->
def criteria = multiMapCriteria { meta_input, abundance, analysis_method, fc_threshold, stat_threshold, meta_exp, samplesheet, meta_contrasts, variable, reference, target ->
samples_and_matrix:
meta_map = meta_input + [ 'method': analysis_method ]
[meta_map, samplesheet, abundance]
Expand All @@ -39,7 +39,7 @@ workflow ABUNDANCE_DIFFERENTIAL_FILTER {
[ meta_map, variable, reference, target ]
filter_params:
meta_map = mergeMaps(meta_contrasts, meta_input) + [ 'method': analysis_method ]
[meta_map, [ 'fc_threshold': fc_threshold, 'padj_threshold': padj_threshold ]]
[meta_map, [ 'fc_threshold': fc_threshold, 'stat_threshold': stat_threshold ]]
}

// For DIFFERENTIAL modules we need to cross the things we're iterating so we
Expand Down Expand Up @@ -142,24 +142,37 @@ workflow ABUNDANCE_DIFFERENTIAL_FILTER {
.join(inputs.filter_params)
.multiMap { meta, results, filter_meta ->
def method_params = [
'deseq2': [fc_column: 'log2FoldChange', padj_column: 'padj'],
'limma' : [fc_column: 'logFC', padj_column: 'adj.P.Val'],
'propd' : [fc_column: 'lfc', padj_column: 'weighted_connectivity']
'deseq2': [
fc_column: 'log2FoldChange', fc_cardinality: '>=',
stat_column: 'padj', stat_cardinality: '<='
],
'limma' : [
fc_column: 'logFC', fc_cardinality: '>=',
stat_column: 'adj.P.Val', stat_cardinality: '<='
],
'propd' : [
fc_column: 'lfc', fc_cardinality: '>=',
stat_column: 'weighted_connectivity', stat_cardinality: '>='
]
]
filter_input: [meta + filter_meta, results]
fc_column: method_params[meta.method].fc_column
padj_column: method_params[meta.method].padj_column
fc_threshold: filter_meta.fc_threshold
padj_threshold: filter_meta.padj_threshold
fc_input: [
method_params[meta.method].fc_column,
filter_meta.fc_threshold,
method_params[meta.method].fc_cardinality
]
stat_input: [
method_params[meta.method].stat_column,
filter_meta.stat_threshold,
method_params[meta.method].stat_cardinality
]
}

// Filter differential results
CUSTOM_FILTERDIFFERENTIALTABLE(
ch_diff_filter_params.filter_input,
ch_diff_filter_params.fc_column,
ch_diff_filter_params.fc_threshold,
ch_diff_filter_params.padj_column,
ch_diff_filter_params.padj_threshold
ch_diff_filter_params.fc_input,
ch_diff_filter_params.stat_input
)

emit:
Expand Down
9 changes: 6 additions & 3 deletions subworkflows/nf-core/abundance_differential_filter/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@ input:
description: Count matrix file
- analysis_method:
type: value
description: Analysis method (deseq2 or limma)
description: Analysis method (deseq2, limma, or propd)
- fc_threshold:
type: value
description: Fold change threshold for filtering
- padj_threshold:
- stat_threshold:
type: value
description: Adjusted p-value threshold for filtering
description: |
Threshold for filtering the significance statistics
(eg. adjusted p-values in the case of deseq2 or limma,
weighted connectivity in the case of propd)
- ch_samplesheet:
description: Channel with sample information
structure:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ nextflow_workflow {

test("deseq2 - mouse - basic") {
config './deseq2_basic.config'
tag "deseq2"

when {
workflow {
Expand Down Expand Up @@ -45,7 +46,7 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'deseq2', // analysis method
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
])
input[0] = ch_input
Expand Down Expand Up @@ -73,6 +74,7 @@ nextflow_workflow {

test("limma - basic - microarray") {
config './limma_basic_microarray.config'
tag "limma"

setup {
run("UNTAR") {
Expand Down Expand Up @@ -128,7 +130,7 @@ nextflow_workflow {
file,
'limma',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
]}
input[0] = ch_input
Expand All @@ -155,6 +157,7 @@ nextflow_workflow {

test("limma - voom") {
config './limma_voom.config'
tag "limma_voom"

when {
workflow {
Expand Down Expand Up @@ -184,7 +187,7 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'limma',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
])
input[0] = ch_input
Expand All @@ -211,6 +214,7 @@ nextflow_workflow {

test("deseq2 - with transcript lengths") {
config './deseq2_basic.config'
tag "deseq2_with_lengths"

when {
workflow {
Expand Down Expand Up @@ -244,7 +248,7 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'deseq2',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
])
input[0] = ch_input
Expand Down Expand Up @@ -302,7 +306,7 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'propd', // analysis method
1.5, // FC threshold
50 // weighted connectivity threshold
100 // stat (weighted connectivity) threshold
])
input[0] = ch_input
Expand All @@ -328,6 +332,7 @@ nextflow_workflow {

test("deseq2 and limma - mouse - basic") {
config './deseq2_limmavoom_basic.config'
tag "deseq2+limmavoom"

when {
workflow {
Expand Down Expand Up @@ -358,14 +363,14 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'limma',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
],
[
[ id:'test' ],
file(testData.expression_test_data_dir + testData.abundance_file),
'deseq2',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
]
)
Expand Down Expand Up @@ -425,21 +430,21 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'limma',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
],
[
[ id:'test' ],
file(testData.expression_test_data_dir + testData.abundance_file),
'deseq2',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
],
[
[ id:'test' ],
file(testData.expression_test_data_dir + testData.abundance_file),
'propd',
1.5, // FC threshold
50 // weighted connectivity threshold
100 // stat (weighted connectivity) threshold
]
)
Expand Down Expand Up @@ -469,6 +474,7 @@ nextflow_workflow {

test("stub") {
config './deseq2_basic.config'
tag "stub"

options "-stub"

Expand Down Expand Up @@ -500,7 +506,7 @@ nextflow_workflow {
file(testData.expression_test_data_dir + testData.abundance_file),
'deseq2',
1.5, // FC threshold
0.05 // padj threshold
0.05 // stat (adjusted p-value) threshold
])
input[0] = ch_input
Expand Down
Loading

0 comments on commit 045f0fb

Please sign in to comment.