From c5528c23684fd558c6d97a18c810054f1401a331 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 16 Jun 2024 12:58:34 +0200 Subject: [PATCH 1/2] Start singleR implementation --- conf/test_full.config | 3 +- .../local/celltypes/singler/environment.yml | 10 ++++ modules/local/celltypes/singler/main.nf | 25 +++++++++ .../celltypes/singler/templates/singleR.py | 55 +++++++++++++++++++ nextflow.config | 1 + nextflow_schema.json | 7 +++ subworkflows/local/celltype_assignment.nf | 9 +++ 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 modules/local/celltypes/singler/environment.yml create mode 100644 modules/local/celltypes/singler/main.nf create mode 100644 modules/local/celltypes/singler/templates/singleR.py diff --git a/conf/test_full.config b/conf/test_full.config index c25988a..6b246e1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,7 +21,8 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv' - integration_methods = 'scvi,harmony,bbknn,combat,seurat' + integration_methods = 'scvi,harmony,bbknn,combat' doublet_detection = 'scrublet,doubletdetection,scds' celltypist_model = 'Adult_Human_Skin' + celldex_reference = 'hpca' } diff --git a/modules/local/celltypes/singler/environment.yml b/modules/local/celltypes/singler/environment.yml new file mode 100644 index 0000000..b1ae95b --- /dev/null +++ b/modules/local/celltypes/singler/environment.yml @@ -0,0 +1,10 @@ +name: celltypes_singler +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::anndata2ri=1.3.1 + - bioconda::bioconductor-celldex=1.12.0 + - bioconda::bioconductor-singlecellexperiment=1.24.0 + - bioconda::bioconductor-singler=2.4.0 + - conda-forge::anndata=0.10.7 diff --git a/modules/local/celltypes/singler/main.nf b/modules/local/celltypes/singler/main.nf new file mode 100644 index 0000000..0f6262b --- /dev/null +++ b/modules/local/celltypes/singler/main.nf @@ -0,0 +1,25 @@ +process CELLTYPES_SINGLER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d0dfcaede2417581': + 'community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d6a21ee363999d21' }" + + input: + tuple val(meta), path(h5ad) + val(reference) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path("*.pkl") , emit: obs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'singleR.py' +} diff --git a/modules/local/celltypes/singler/templates/singleR.py b/modules/local/celltypes/singler/templates/singleR.py new file mode 100644 index 0000000..308bd25 --- /dev/null +++ b/modules/local/celltypes/singler/templates/singleR.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import anndata as ad +import anndata2ri +import rpy2 +import rpy2.robjects as ro +import platform +celldex = ro.packages.importr('celldex') +singler = ro.packages.importr('SingleR') + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +adata = ad.read_h5ad("${h5ad}") +sce = anndata2ri.py2rpy(adata) + +get_counts = ro.r("function(sce) { assay(sce, 'X') }") +reference = celldex.fetchReference("${reference}") +predictions = singler.singleR(get_counts(sce), reference) + +# TODO: Save the predictions + +adata.write_h5ad("${prefix}.h5ad") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "anndata": ad.__version__, + "anndata2ri": anndata2ri.__version__, + "rpy2": rpy2.__version__, + "celldex": celldex.__version__, + "singler": singler.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/nextflow.config b/nextflow.config index 35d5c41..75d07a7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,7 @@ params { integration_methods = 'scvi' clustering_resolutions = '0.5,1.0' celltypist_model = '' + celldex_reference = '' // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b582262..2ee407a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -86,6 +86,13 @@ "description": "Specify the models to use for the celltypist cell type annotation", "help_text": "If you want to use multiple models, separate them with a comma. Available models can be found [here](https://www.celltypist.org/models).", "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$" + }, + "celldex_reference": { + "type": "string", + "default": "human", + "description": "Specify the reference to use for the singleR cell type annotation", + "help_text": "Existing references can be found using the surveyReferences function in the celldex package.", + "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$" } } }, diff --git a/subworkflows/local/celltype_assignment.nf b/subworkflows/local/celltype_assignment.nf index b54df32..c3a6852 100644 --- a/subworkflows/local/celltype_assignment.nf +++ b/subworkflows/local/celltype_assignment.nf @@ -1,4 +1,5 @@ include { CELLTYPES_CELLTYPIST } from '../../modules/local/celltypes/celltypist' +include { CELLTYPES_SINGLER } from '../../modules/local/celltypes/singler' workflow CELLTYPE_ASSIGNMENT { take: @@ -16,6 +17,14 @@ workflow CELLTYPE_ASSIGNMENT { ch_versions = ch_versions.mix(CELLTYPES_CELLTYPIST.out.versions) } + if (params.celldex_reference) { + celldex_references = Channel.from(params.celldex_reference.split(',')) + + CELLTYPES_SINGLER(ch_h5ad, celldex_references) + ch_obs = ch_obs.mix(CELLTYPES_SINGLER.out.obs) + ch_versions = ch_versions.mix(CELLTYPES_SINGLER.out.versions) + } + emit: obs = ch_obs From 1419fa75051304de41682cfb5b1ff260f31d0422 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 16 Jun 2024 13:04:31 +0200 Subject: [PATCH 2/2] Remove wrong default celldex_reference --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2ee407a..ceb6fa5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -89,7 +89,7 @@ }, "celldex_reference": { "type": "string", - "default": "human", + "default": "", "description": "Specify the reference to use for the singleR cell type annotation", "help_text": "Existing references can be found using the surveyReferences function in the celldex package.", "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"