Skip to content

Commit

Permalink
Merge pull request #417 from nictru/velocity
Browse files Browse the repository at this point in the history
Finalize STAR velocity implementation
  • Loading branch information
nictru authored Jan 15, 2025
2 parents f92d28d + 4e2bf76 commit ce6ff07
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 21 deletions.
23 changes: 15 additions & 8 deletions modules/local/star_align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ process STAR_ALIGN {
val other_10x_parameters

output:
tuple val(meta), path('*d.out.bam') , emit: bam
tuple val(meta), path('*.Solo.out') , emit: counts
tuple val(meta), path ("*.Solo.out/Gene*/raw") , emit: raw_counts
tuple val(meta), path ("*.Solo.out/Gene*/filtered"), emit: filtered_counts
tuple val(meta), path('*Log.final.out') , emit: log_final
tuple val(meta), path('*Log.out') , emit: log_out
tuple val(meta), path('*Log.progress.out') , emit: log_progress
path "versions.yml" , emit: versions
tuple val(meta), path('*d.out.bam') , emit: bam
tuple val(meta), path('*.Solo.out') , emit: counts
tuple val(meta), path ("*.Solo.out/Gene*/raw") , emit: raw_counts
tuple val(meta), path ("*.Solo.out/Gene*/filtered") , emit: filtered_counts
tuple val(meta), path ("*.Solo.out/Velocyto/velocyto_raw") , emit: raw_velocyto, optional:true
tuple val(meta), path ("*.Solo.out/Velocyto/velocyto_filtered"), emit: filtered_velocyto, optional:true
tuple val(meta), path('*Log.final.out') , emit: log_final
tuple val(meta), path('*Log.out') , emit: log_out
tuple val(meta), path('*Log.progress.out') , emit: log_progress
path "versions.yml" , emit: versions

tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted
tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript
Expand Down Expand Up @@ -96,6 +98,11 @@ process STAR_ALIGN {
find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\;
fi
if [ -d ${prefix}.Solo.out/Velocyto ]; then
mv ${prefix}.Solo.out/Velocyto/raw ${prefix}.Solo.out/Velocyto/velocyto_raw
mv ${prefix}.Solo.out/Velocyto/filtered ${prefix}.Solo.out/Velocyto/velocyto_filtered
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
star: \$(STAR --version | sed -e "s/STAR_//g")
Expand Down
27 changes: 25 additions & 2 deletions modules/local/templates/mtx_to_h5ad_star.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,41 @@
import pandas as pd
import argparse
import anndata
from anndata import AnnData
from anndata import AnnData, concat
import platform
from scipy.sparse import csr_matrix

def _mtx_to_adata(
input: str,
sample: str,
):
adata = sc.read_10x_mtx(input)
adata.obs["sample"] = sample
adata.layers["count"] = adata.X.copy()

return adata
velocyto_dir = f"velocyto_{input}"
if os.path.exists(velocyto_dir):
barcodes = os.path.join(velocyto_dir, "barcodes.tsv.gz")
features = os.path.join(velocyto_dir, "features.tsv.gz")

for matrix in ["ambiguous", "spliced", "unspliced"]:
adata_state = sc.read_mtx(os.path.join(velocyto_dir, f"{matrix}.mtx.gz")).T

adata_state.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values
adata_state.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values

missing_obs = adata.obs_names[~adata.obs_names.isin(adata_state.obs_names)]
adata_missing = AnnData(
X=csr_matrix((len(missing_obs), adata.shape[1])),
obs=pd.DataFrame(index=missing_obs),
var=adata_state.var
)
adata_state = concat([adata_state, adata_missing], join="outer")
adata_state = adata_state[adata.obs_names, adata.var["gene_ids"]].copy()

adata.layers[matrix] = adata_state.X

return adata

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
Expand Down
18 changes: 16 additions & 2 deletions subworkflows/local/starsolo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,26 @@ workflow STARSOLO {
)
ch_versions = ch_versions.mix(STAR_ALIGN.out.versions)

raw_counts = STAR_ALIGN.out.raw_counts
.join(STAR_ALIGN.out.raw_velocyto, remainder: true)
.map{ meta, count, velocity -> {
return [meta + [input_type: 'raw'], velocity ? [count, velocity] : [count]]
}
}

filtered_counts = STAR_ALIGN.out.filtered_counts
.join(STAR_ALIGN.out.filtered_velocyto, remainder: true)
.map{ meta, count, velocity -> {
return [meta + [input_type: 'filtered'], velocity ? [count, velocity] : [count]]
}
}

emit:
ch_versions
// get rid of meta for star index
star_result = STAR_ALIGN.out.tab
star_counts = STAR_ALIGN.out.counts
raw_counts = STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
filtered_counts = STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] }
raw_counts = raw_counts
filtered_counts = filtered_counts
for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
}
18 changes: 9 additions & 9 deletions tests/main_pipeline_star.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@
"matrix.mtx.gz:md5,0ae080bd0002e350531a5816e159345e",
"features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e",
"barcodes.tsv.gz:md5,9b695b0b91bcb146ec9c4688ca10a690",
"Sample_X_raw_matrix.seurat.rds:md5,51a863d56f6d4c9df7161d574fecfd33",
"Sample_X_raw_matrix.sce.rds:md5,2135e075bfb5043b78841de9bd261a3c",
"Sample_Y_raw_matrix.seurat.rds:md5,f177854d779169f6f0e1c628f154a656",
"Sample_Y_raw_matrix.sce.rds:md5,86fc59316bc9083c510aa0d3a0a24ffb",
"Sample_X_filtered_matrix.seurat.rds:md5,a035c9ead72baa36f2ef298dc02d5e1b",
"Sample_X_filtered_matrix.sce.rds:md5,8fd8b3a602a00578e5a72f1fd6792e05",
"Sample_Y_filtered_matrix.seurat.rds:md5,42823b941a1c375473f454980eafbf0b",
"Sample_Y_filtered_matrix.sce.rds:md5,254fd8a73aa0e0ca4bee57855c9cde30"
"Sample_X_raw_matrix.seurat.rds:md5,34fd07749de1f1ee49e885782cd7c6c5",
"Sample_X_raw_matrix.sce.rds:md5,ef88d8a6bb48eb8f555a0c255d9b03bb",
"Sample_Y_raw_matrix.seurat.rds:md5,5b8361f36fec65107e7652751c80fdb5",
"Sample_Y_raw_matrix.sce.rds:md5,d9d6c934d2193681ab58a88d437bf138",
"Sample_X_filtered_matrix.seurat.rds:md5,05780c38d8deb9d06384da2c12cbdefd",
"Sample_X_filtered_matrix.sce.rds:md5,41f1959bd1c577305915fdfcb797bf0b",
"Sample_Y_filtered_matrix.seurat.rds:md5,d27d6360cf500bf4438f650b8796f8ce",
"Sample_Y_filtered_matrix.sce.rds:md5,2c63e5563a037d2834a6eef107b34f67"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2024-11-29T15:32:12.524479228"
}
}
}

0 comments on commit ce6ff07

Please sign in to comment.