diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index 001e2ccd..d5751084 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -26,14 +26,16 @@ process STAR_ALIGN { val other_10x_parameters output: - tuple val(meta), path('*d.out.bam') , emit: bam - tuple val(meta), path('*.Solo.out') , emit: counts - tuple val(meta), path ("*.Solo.out/Gene*/raw") , emit: raw_counts - tuple val(meta), path ("*.Solo.out/Gene*/filtered"), emit: filtered_counts - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out') , emit: log_progress - path "versions.yml" , emit: versions + tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*.Solo.out') , emit: counts + tuple val(meta), path ("*.Solo.out/Gene*/raw") , emit: raw_counts + tuple val(meta), path ("*.Solo.out/Gene*/filtered") , emit: filtered_counts + tuple val(meta), path ("*.Solo.out/Velocyto/velocyto_raw") , emit: raw_velocyto, optional:true + tuple val(meta), path ("*.Solo.out/Velocyto/velocyto_filtered"), emit: filtered_velocyto, optional:true + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out') , emit: log_progress + path "versions.yml" , emit: versions tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript @@ -96,6 +98,11 @@ process STAR_ALIGN { find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\; fi + if [ -d ${prefix}.Solo.out/Velocyto ]; then + mv ${prefix}.Solo.out/Velocyto/raw ${prefix}.Solo.out/Velocyto/velocyto_raw + mv ${prefix}.Solo.out/Velocyto/filtered ${prefix}.Solo.out/Velocyto/velocyto_filtered + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": star: \$(STAR --version | sed -e "s/STAR_//g") diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py index d90a7d50..0ce9e650 100755 --- a/modules/local/templates/mtx_to_h5ad_star.py +++ b/modules/local/templates/mtx_to_h5ad_star.py @@ -9,8 +9,9 @@ import pandas as pd import argparse import anndata -from anndata import AnnData +from anndata import AnnData, concat import platform +from scipy.sparse import csr_matrix def _mtx_to_adata( input: str, @@ -18,9 +19,31 @@ def _mtx_to_adata( ): adata = sc.read_10x_mtx(input) adata.obs["sample"] = sample + adata.layers["count"] = adata.X.copy() - return adata + velocyto_dir = f"velocyto_{input}" + if os.path.exists(velocyto_dir): + barcodes = os.path.join(velocyto_dir, "barcodes.tsv.gz") + features = os.path.join(velocyto_dir, "features.tsv.gz") + + for matrix in ["ambiguous", "spliced", "unspliced"]: + adata_state = sc.read_mtx(os.path.join(velocyto_dir, f"{matrix}.mtx.gz")).T + + adata_state.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values + adata_state.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values + missing_obs = adata.obs_names[~adata.obs_names.isin(adata_state.obs_names)] + adata_missing = AnnData( + X=csr_matrix((len(missing_obs), adata.shape[1])), + obs=pd.DataFrame(index=missing_obs), + var=adata_state.var + ) + adata_state = concat([adata_state, adata_missing], join="outer") + adata_state = adata_state[adata.obs_names, adata.var["gene_ids"]].copy() + + adata.layers[matrix] = adata_state.X + + return adata def format_yaml_like(data: dict, indent: int = 0) -> str: """Formats a dictionary to a YAML-like string. diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index aadda6b6..d4307569 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -54,12 +54,26 @@ workflow STARSOLO { ) ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) + raw_counts = STAR_ALIGN.out.raw_counts + .join(STAR_ALIGN.out.raw_velocyto, remainder: true) + .map{ meta, count, velocity -> { + return [meta + [input_type: 'raw'], velocity ? [count, velocity] : [count]] + } + } + + filtered_counts = STAR_ALIGN.out.filtered_counts + .join(STAR_ALIGN.out.filtered_velocyto, remainder: true) + .map{ meta, count, velocity -> { + return [meta + [input_type: 'filtered'], velocity ? [count, velocity] : [count]] + } + } + emit: ch_versions // get rid of meta for star index star_result = STAR_ALIGN.out.tab star_counts = STAR_ALIGN.out.counts - raw_counts = STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] } - filtered_counts = STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } + raw_counts = raw_counts + filtered_counts = filtered_counts for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } } diff --git a/tests/main_pipeline_star.nf.test.snap b/tests/main_pipeline_star.nf.test.snap index e13a570a..7c9d8c8e 100644 --- a/tests/main_pipeline_star.nf.test.snap +++ b/tests/main_pipeline_star.nf.test.snap @@ -11,14 +11,14 @@ "matrix.mtx.gz:md5,0ae080bd0002e350531a5816e159345e", "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", "barcodes.tsv.gz:md5,9b695b0b91bcb146ec9c4688ca10a690", - "Sample_X_raw_matrix.seurat.rds:md5,51a863d56f6d4c9df7161d574fecfd33", - "Sample_X_raw_matrix.sce.rds:md5,2135e075bfb5043b78841de9bd261a3c", - "Sample_Y_raw_matrix.seurat.rds:md5,f177854d779169f6f0e1c628f154a656", - "Sample_Y_raw_matrix.sce.rds:md5,86fc59316bc9083c510aa0d3a0a24ffb", - "Sample_X_filtered_matrix.seurat.rds:md5,a035c9ead72baa36f2ef298dc02d5e1b", - "Sample_X_filtered_matrix.sce.rds:md5,8fd8b3a602a00578e5a72f1fd6792e05", - "Sample_Y_filtered_matrix.seurat.rds:md5,42823b941a1c375473f454980eafbf0b", - "Sample_Y_filtered_matrix.sce.rds:md5,254fd8a73aa0e0ca4bee57855c9cde30" + "Sample_X_raw_matrix.seurat.rds:md5,34fd07749de1f1ee49e885782cd7c6c5", + "Sample_X_raw_matrix.sce.rds:md5,ef88d8a6bb48eb8f555a0c255d9b03bb", + "Sample_Y_raw_matrix.seurat.rds:md5,5b8361f36fec65107e7652751c80fdb5", + "Sample_Y_raw_matrix.sce.rds:md5,d9d6c934d2193681ab58a88d437bf138", + "Sample_X_filtered_matrix.seurat.rds:md5,05780c38d8deb9d06384da2c12cbdefd", + "Sample_X_filtered_matrix.sce.rds:md5,41f1959bd1c577305915fdfcb797bf0b", + "Sample_Y_filtered_matrix.seurat.rds:md5,d27d6360cf500bf4438f650b8796f8ce", + "Sample_Y_filtered_matrix.sce.rds:md5,2c63e5563a037d2834a6eef107b34f67" ], "meta": { "nf-test": "0.9.0", @@ -26,4 +26,4 @@ }, "timestamp": "2024-11-29T15:32:12.524479228" } -} \ No newline at end of file +}