Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt Azimuth code for Wilms label transfer #843

Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9a28271
add functions for label transfer
sjspielman Oct 17, 2024
70e75ba
move functions to utils folder
sjspielman Oct 17, 2024
de20e42
Update 02a notebook to use new functions, and style
sjspielman Oct 17, 2024
2eabc3d
use 0/1 for testing variable
sjspielman Oct 17, 2024
c3fc4c1
update param usage for label transfer notebooks
sjspielman Oct 17, 2024
9abbee9
Update 02b notebook to use new functions, and style
sjspielman Oct 17, 2024
7693571
label transfer does not need to be skipped in CI anymore
sjspielman Oct 17, 2024
6cee2f5
merge base
sjspielman Oct 18, 2024
788927f
remove install fetusref code from script
sjspielman Oct 18, 2024
1778edd
fix notebook name
sjspielman Oct 18, 2024
4aa0eec
use separate query variable to prevent feature loss, and rm when done…
sjspielman Oct 28, 2024
78dfcdc
parameter fixes
sjspielman Oct 29, 2024
d9a6feb
use is_ci
sjspielman Oct 29, 2024
c9b33fc
revert testing code
sjspielman Oct 29, 2024
f7f9686
remove outdated comments
sjspielman Oct 29, 2024
d4a20f5
Add supplemental notebook with results comparing azimuth to adapted a…
sjspielman Oct 29, 2024
76d9ace
little regex tweak
sjspielman Oct 29, 2024
0433245
add query assay arguments to functions
sjspielman Nov 1, 2024
90694c4
Update notebook using RNA as the query assay
sjspielman Nov 1, 2024
69a8a7a
Specify RNA assay in the actual label transfer notebooks
sjspielman Nov 1, 2024
6b3d7ed
why was s still there? and fix a typo
sjspielman Nov 1, 2024
813090a
Merge remote-tracking branch 'upstream/feature/wilms-tumor-06-azimuth…
sjspielman Nov 4, 2024
3a0ba1e
fix typo - output for 02b needs to be 02b
sjspielman Nov 4, 2024
7522c56
names are hard
sjspielman Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitleaks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ regexes = [
# skip base64 encoded images, which might have substrings that look like tokens
'''(?i)<img src="data:image\/.+;base64,.+?".+\/>''',
# skip jQuery definition function
'''^!function\(.+jQuery'''
'''^!function\(.+?jQuery'''
]
27 changes: 14 additions & 13 deletions analyses/cell-type-wilms-tumor-06/00_run_workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,23 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do
output_file = '01_seurat_processing_${sample_id}.html',
output_dir = '${sample_notebook_dir}')"

# Label transfer from the Cao reference
Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd',
params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}),
output_format = 'html_document',
output_file = '02a_fetal_all_reference_Cao_${sample_id}.html',
output_dir = '${sample_notebook_dir}')"

# Label transfer from the Stewart reference
Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd',
params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}),
output_format = 'html_document',
output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html',
output_dir = '${sample_notebook_dir}')"

# Temporarily this code is not run in CI.
if [[ $IS_CI -eq 0 ]]; then

# Label transfer from the Cao reference using Azimuth
Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd',
params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}'),
output_format = 'html_document',
output_file = '02a_fetal_all_reference_Cao_${sample_id}.html',
output_dir = '${sample_notebook_dir}')"

# Label transfer from the Stewart reference using Seurat
Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Stewart.Rmd',
params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}'),
output_format = 'html_document',
output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html',
output_dir = '${sample_notebook_dir}')"

# Cluster exploration
Rscript -e "rmarkdown::render('${notebook_template_dir}/03_clustering_exploration.Rmd',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ params:
scpca_project_id: "SCPCP000006"
sample_id: "SCPCS000176"
seed: 12345
homologs_file: "../scratch/homologs.rds"
testing: 0
output:
html_document:
toc: yes
Expand All @@ -16,9 +18,11 @@ output:
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
message=FALSE,
warnings=FALSE)
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warnings = FALSE
)
```


Expand Down Expand Up @@ -66,10 +70,9 @@ Load required packages in the following chunk, if needed.
Do not install packages here; only load them with the `library()` function.

```{r packages, message=FALSE, warning=FALSE}
library("Seurat")
library(Seurat)
library(SeuratData)
library(sctransform)
library(Azimuth)
library(SCpubr)
library(tidyverse)
library(patchwork)
Expand All @@ -89,20 +92,41 @@ repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")
```

```{r}
# load functions for label transfer
source(
file.path(
module_base,
"notebook_template",
"utils",
"label-transfer-functions.R"
)
)
```

### Input files

#### Reference

We install and load the reference using `Azimuth`.
```{r path_to_reference}
#Check the names of the Azimuth available data and reference
AvailableData()

# Install the fetal reference
# InstallData("fetusref")
Load the Azimuth reference which has been prepared for label transfer.

ref <- SeuratData::LoadData("fetusref", type = "azimuth")
```{r path_to_reference}
path_to_ref <- file.path(
module_base,
"results",
"references",
"cao_formatted_ref.rds"
)
if (!file.exists(path_to_ref)) {
stop("Reference file could not be found. Make sure `scripts/prepare-fetal-references.R` has been run first.")
}
ref <- readRDS(path_to_ref)

# Pull out information from the reference object we need for label transfer
reference <- ref$reference
refdata <- ref$refdata
dims <- ref$dims
annotation_levels <- ref$annotation_levels
```

#### Query
Expand All @@ -129,65 +153,93 @@ output_dir <- file.path(module_base, "results", params$sample_id)

```{r load, message=FALSE, warning=FALSE}
# open the processed rds object
srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id,".Rds")))
srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id, ".Rds")))

# prepare the query for label transfer
# we don't want to overwrite the srat object since `prepare_query`
# removes features that are not present in the reference
DefaultAssay(srat) <- "RNA"
query <- prepare_query(srat, rownames(reference), params$homologs_file)
```


### Label transfer from fetal kidney reference using Azimuth

```{r run_azimuth, message=FALSE, warnings=FALSE}
DefaultAssay(srat) <- "RNA"
options(future.globals.maxSize= 891289600000000)
s <- Azimuth::RunAzimuth(srat, reference ="fetusref")

# We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by `RunAzimuth`
metadata_vec <- c("predicted.annotation.l1.score", "predicted.annotation.l1", "predicted.annotation.l2.score", "predicted.annotation.l2", "predicted.organ.score", "predicted.organ")

metadata_to_transfer <- [email protected][, metadata_vec]

srat <- AddMetaData(srat, metadata_to_transfer, col.name = paste0("fetal_full_", metadata_vec))
options(future.globals.maxSize = 891289600000000)

# determine k.weight based CI
if (params$testing) {
k.weight <- 10 # only for test datasets
} else {
k.weight <- 50 # Azimuth default
}
query_labeled <- transfer_labels(
query,
reference,
dims,
refdata,
k.weight = k.weight
)

# We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by Azimuth
annotation_columns <- c(
glue::glue("predicted.{annotation_levels}"),
glue::glue("predicted.{annotation_levels}.score")
)
metadata_to_trasfer <- [email protected][, annotation_columns]

srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_full_", annotation_columns))

rm(query)
rm(query_labeled)
```

```{r plot_azimuth, fig.height=15, fig.width=8, warnings=FALSE}

d1 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.organ", label = TRUE, repel = TRUE) + NoLegend()
d2 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.annotation.l1", label = TRUE, repel = TRUE) + NoLegend()
d3 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.annotation.l2", label = TRUE, repel = TRUE) + NoLegend()

f1 <- SCpubr::do_BarPlot(sample = srat,
group.by = "fetal_full_predicted.organ",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 4) +
ggtitle("% cells")+
xlab(params$sample_id)

f2 <- SCpubr::do_BarPlot(sample = srat,
group.by = "fetal_full_predicted.annotation.l1",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 2) +
ggtitle("% cells")+
xlab(params$sample_id)

f3 <- SCpubr::do_BarPlot(sample = srat,
group.by = "fetal_full_predicted.annotation.l2",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 2) +
ggtitle("% cells")+
xlab(params$sample_id)

((d1/f1) | (d2/f2) )
d1 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.organ", label = TRUE, repel = TRUE) + NoLegend()
d2 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.annotation.l1", label = TRUE, repel = TRUE) + NoLegend()
d3 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.annotation.l2", label = TRUE, repel = TRUE) + NoLegend()

f1 <- SCpubr::do_BarPlot(
sample = srat,
group.by = "fetal_full_predicted.organ",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 4
) +
ggtitle("% cells") +
xlab(params$sample_id)

f2 <- SCpubr::do_BarPlot(
sample = srat,
group.by = "fetal_full_predicted.annotation.l1",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 2
) +
ggtitle("% cells") +
xlab(params$sample_id)

f3 <- SCpubr::do_BarPlot(
sample = srat,
group.by = "fetal_full_predicted.annotation.l2",
split.by = "seurat_clusters",
position = "fill",
font.size = 10,
legend.ncol = 2
) +
ggtitle("% cells") +
xlab(params$sample_id)

((d1 / f1) | (d2 / f2))
```

## Save the `Seurat`object

```{r save}
saveRDS(object = srat, file = file.path(output_dir, paste0("02a-fetal_full_label-transfer_",params$sample_id,".Rds")))
saveRDS(object = srat, file = file.path(output_dir, paste0("02a-fetal_full_label-transfer_", params$sample_id, ".Rds")))
```

## Session info
Expand Down
Loading