From 9a28271f783aa5a526be094b1baddce7fb619375 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 13:13:10 -0400 Subject: [PATCH 01/22] add functions for label transfer --- .../label-transfer-functions.R | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R b/analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R new file mode 100644 index 000000000..c8eae1013 --- /dev/null +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R @@ -0,0 +1,142 @@ +# This script contains functions used for label transfer in: +# - "02a_label-transfer_fetal_full_reference_Cao.Rmd" +# - "02b_label-transfer_fetal_full_reference_Stewart.Rmd" +# +# All code was adapted from the RunAzimuth function (https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R), retaining only the specific components of +# this function used for label transfer. For example, additional aspects of this function project +# the query data onto the reference UMAP and perform associated calculations, but this is beyond the +# scope of label transfer for this project, so that code was not imported here. + + + +#' Prepare query Seurat object for label transfer +#' +#' This function prepares Seurat objects by: +#' - Converting gene names to gene symbols and subsetting to only shared features with the reference +#' - Ensuring nCount_RNA and nFeature_RNA are present in the Seurat object +#' - Ensuring the percentage of mitochondrial genes is present in the Seurat object +#' +#' This code was adapted from the RunAzimuth function: +#' https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R +#' +#' @param query The Seurat object which will undergo label transfer +#' @param reference_rownames The rownames (aka, features) in the reference object +#' @param homolog_file Path to the homologs.rds file obtained from Seurat +#' +#' @return Seurat object prepared for label transfer +prepare_query <- function(query, reference_rownames, homolog_file = homologs_file) { + # Convert the query (sample) row names from ensembl IDs to gene names to match what + # the Azimuth reference uses + # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L99-L104 + query <- ConvertGeneNames( + object = query, + reference.names = rownames(x = reference), + homolog.table = "https://seurat.nygenome.org/azimuth/references/homologs.rds" + ) + query <- Azimuth::ConvertGeneNames( + object = query, + reference.names = reference_rownames, + homolog.table = homolog_file + ) + + # Calculate nCount_RNA and nFeature_RNA if the query does not + # contain them already + # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L106-L120 + if (!all(c("nCount_RNA", "nFeature_RNA") %in% c(colnames(x = query[[]])))) { + calcn <- as.data.frame(x = Seurat:::CalcN(object = query[["RNA"]])) + colnames(x = calcn) <- paste( + colnames(x = calcn), + "RNA", + sep = "_" + ) + query <- AddMetaData( + object = query, + metadata = calcn + ) + rm(calcn) + } + + # Calculate percent mitochondrial genes if the query contains genes + # matching the regular expression "^MT-" + # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L122-L131 + if (any(grepl(pattern = "^MT-", x = rownames(x = query)))) { + query_symbols <- PercentageFeatureSet( + object = query, + pattern = "^MT-", + col.name = "percent.mt" + ) + } + + return(query) +} + +#' Perform label transfer using an Azimuth-adapted approach +#' +#' This function adapts code from the RunAzimuth function to perform label transfer: +#' https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R +#' +#' Note that several params were documented below based on their descriptions here: +#' https://github.com/satijalab/seurat/blob/1549dcb3075eaeac01c925c4b4bb73c73450fc50/R/integration.R +#' +#' @param query The Seurat object which will undergo label transfer +#' @param reference The reference dataset +#' @param reference_dims Dimensions calculated from the reference dataset +#' @param refdata object used by Azimuth +#' @param reference_dims Number of dimensions to use in the anchor weighting procedure +#' @param k.weight Number of neighbors to consider when weighting anchors. This should be +#' <=15 when running on OpenScPCA test data +#' @param n.trees More trees gives higher precision when using annoy approximate +#' nearest neighbor search +#' @param mapping.score.k Compute and store nearest k query neighbors in the +#' AnchorSet object that is returned. +#' @param ksmooth Number of cells to average over when computing transition +#' probabilities +#' @param verbose Display messages/progress +#' +#' @return Seurat object containing metadata from label transfer +transfer_labels <- function( + query, + reference, + reference_dims, + refdata, + k.weight = 10, + n.trees = 20, + mapping.score.k = 80, + ksmooth = 80, + verbose = FALSE) { + # Find anchors between query and reference + # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L132-L147 + anchors <- FindTransferAnchors( + reference = reference, + query = query, + k.filter = NA, + reference.neighbors = "refdr.annoy.neighbors", + reference.assay = "refAssay", + query.assay = "RNA", + reference.reduction = "refDR", + normalization.method = "SCT", + features = rownames(Loadings(reference[["refDR"]])), + dims = 1:reference_dims, + n.trees = n.trees, + mapping.score.k = mapping.score.k, + verbose = verbose + ) + + # Perform label transfer + # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L164-L175 + query <- TransferData( + reference = reference, + query = query, + query.assay = "RNA", + dims = 1:reference_dims, + anchorset = anchors, + refdata = refdata, + n.trees = n.trees, + store.weights = TRUE, + k.weight = k.weight, + verbose = verbose + ) + + + return(query) +} From 70e75ba56b085b73f08759f720afad9065d4d26a Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 15:30:00 -0400 Subject: [PATCH 02/22] move functions to utils folder --- .../notebook_template/{ => utils}/label-transfer-functions.R | 5 ----- 1 file changed, 5 deletions(-) rename analyses/cell-type-wilms-tumor-06/notebook_template/{ => utils}/label-transfer-functions.R (96%) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R similarity index 96% rename from analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R rename to analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R index c8eae1013..576c6c5e8 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/label-transfer-functions.R +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R @@ -28,11 +28,6 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil # Convert the query (sample) row names from ensembl IDs to gene names to match what # the Azimuth reference uses # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L99-L104 - query <- ConvertGeneNames( - object = query, - reference.names = rownames(x = reference), - homolog.table = "https://seurat.nygenome.org/azimuth/references/homologs.rds" - ) query <- Azimuth::ConvertGeneNames( object = query, reference.names = reference_rownames, From de20e42bfe542fb5f307ed98d0ff082e4d9b097b Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 15:30:47 -0400 Subject: [PATCH 03/22] Update 02a notebook to use new functions, and style --- ...abel-transfer_fetal_full_reference_Cao.Rmd | 163 +++++++++++------- 1 file changed, 105 insertions(+), 58 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd index 69a9c7c0e..0d0a0af64 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd @@ -6,6 +6,8 @@ params: scpca_project_id: "SCPCP000006" sample_id: "SCPCS000176" seed: 12345 + homologs_file: "../scratch/homologs.rds" + testing: FALSE output: html_document: toc: yes @@ -16,9 +18,11 @@ output: --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, - message=FALSE, - warnings=FALSE) +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warnings = FALSE +) ``` @@ -66,10 +70,9 @@ Load required packages in the following chunk, if needed. Do not install packages here; only load them with the `library()` function. ```{r packages, message=FALSE, warning=FALSE} -library("Seurat") +library(Seurat) library(SeuratData) library(sctransform) -library(Azimuth) library(SCpubr) library(tidyverse) library(patchwork) @@ -89,20 +92,41 @@ repository_base <- rprojroot::find_root(rprojroot::is_git_root) module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") ``` +```{r} +# load functions for label transfer +source( + file.path( + module_base, + "notebook_template", + "utils", + "label-transfer-functions.R" + ) +) +``` ### Input files #### Reference -We install and load the reference using `Azimuth`. -```{r path_to_reference} -#Check the names of the Azimuth available data and reference -AvailableData() - -# Install the fetal reference -InstallData("fetusref") +Load the Azimuth reference which has been prepared for label transfer. -ref <- SeuratData::LoadData("fetusref", type = "azimuth") +```{r path_to_reference} +path_to_ref <- file.path( + module_base, + "results", + "references", + "cao_formatted_ref.rds" +) +if (!file.exists(path_to_ref)) { + stop("Reference file could not be found. Make sure `scripts/prepare-fetal-references.R` has been run first.") +} +ref <- readRDS(path_to_ref) + +# Pull out information from the reference object we need for label transfer +reference <- ref$reference +refdata <- ref$refdata +dims <- ref$dims +annotation_levels <- ref$annotation_levels ``` #### Query @@ -129,65 +153,88 @@ output_dir <- file.path(module_base, "results", params$sample_id) ```{r load, message=FALSE, warning=FALSE} # open the processed rds object -srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id,".Rds"))) +srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id, ".Rds"))) + +# prepare the query for label transfer +DefaultAssay(srat) <- "RNA" +srat <- prepare_query(srat, rownames(reference), params$homologs_file) ``` ### Label transfer from fetal kidney reference using Azimuth ```{r run_azimuth, message=FALSE, warnings=FALSE} -DefaultAssay(srat) <- "RNA" -options(future.globals.maxSize= 891289600000000) -s <- Azimuth::RunAzimuth(srat, reference ="fetusref") - -# We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by `RunAzimuth` -metadata_vec <- c("predicted.annotation.l1.score", "predicted.annotation.l1", "predicted.annotation.l2.score", "predicted.annotation.l2", "predicted.organ.score", "predicted.organ") - -metadata_to_trasfer <- s@meta.data[, metadata_vec] - -srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_full_", metadata_vec)) +options(future.globals.maxSize = 891289600000000) + +# determine k.weight based CI +if (params$testing) { + k.weight <- 10 # only for test datasets +} else { + k.weight <- 50 # Azimuth default +} +s <- transfer_labels( + srat, + reference, + dims, + refdata, + k.weight = k.weight +) + +# We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by Azimuth +annotation_columns <- c( + glue::glue("predicted.{annotation_levels}"), + glue::glue("predicted.{annotation_levels}.score") +) +metadata_to_trasfer <- s@meta.data[, annotation_columns] + +srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_full_", annotation_columns)) ``` ```{r plot_azimuth, fig.height=15, fig.width=8, warnings=FALSE} - -d1 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.organ", label = TRUE, repel = TRUE) + NoLegend() -d2 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.annotation.l1", label = TRUE, repel = TRUE) + NoLegend() -d3 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_full_predicted.annotation.l2", label = TRUE, repel = TRUE) + NoLegend() - -f1 <- SCpubr::do_BarPlot(sample = srat, - group.by = "fetal_full_predicted.organ", - split.by = "seurat_clusters", - position = "fill", - font.size = 10, - legend.ncol = 4) + - ggtitle("% cells")+ - xlab(params$sample_id) - -f2 <- SCpubr::do_BarPlot(sample = srat, - group.by = "fetal_full_predicted.annotation.l1", - split.by = "seurat_clusters", - position = "fill", - font.size = 10, - legend.ncol = 2) + - ggtitle("% cells")+ - xlab(params$sample_id) - -f3 <- SCpubr::do_BarPlot(sample = srat, - group.by = "fetal_full_predicted.annotation.l2", - split.by = "seurat_clusters", - position = "fill", - font.size = 10, - legend.ncol = 2) + - ggtitle("% cells")+ - xlab(params$sample_id) - -((d1/f1) | (d2/f2) ) +d1 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.organ", label = TRUE, repel = TRUE) + NoLegend() +d2 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.annotation.l1", label = TRUE, repel = TRUE) + NoLegend() +d3 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_full_predicted.annotation.l2", label = TRUE, repel = TRUE) + NoLegend() + +f1 <- SCpubr::do_BarPlot( + sample = srat, + group.by = "fetal_full_predicted.organ", + split.by = "seurat_clusters", + position = "fill", + font.size = 10, + legend.ncol = 4 +) + + ggtitle("% cells") + + xlab(params$sample_id) + +f2 <- SCpubr::do_BarPlot( + sample = srat, + group.by = "fetal_full_predicted.annotation.l1", + split.by = "seurat_clusters", + position = "fill", + font.size = 10, + legend.ncol = 2 +) + + ggtitle("% cells") + + xlab(params$sample_id) + +f3 <- SCpubr::do_BarPlot( + sample = srat, + group.by = "fetal_full_predicted.annotation.l2", + split.by = "seurat_clusters", + position = "fill", + font.size = 10, + legend.ncol = 2 +) + + ggtitle("% cells") + + xlab(params$sample_id) + +((d1 / f1) | (d2 / f2)) ``` ## Save the `Seurat`object ```{r save} -saveRDS(object = srat, file = file.path(output_dir, paste0("02a-fetal_full_label-transfer_",params$sample_id,".Rds"))) +saveRDS(object = srat, file = file.path(output_dir, paste0("02a-fetal_full_label-transfer_", params$sample_id, ".Rds"))) ``` ## Session info From 2eabc3d2e34ba343de7fbeab2b2915597884fd1d Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 15:32:19 -0400 Subject: [PATCH 04/22] use 0/1 for testing variable --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 2 +- .../02a_label-transfer_fetal_full_reference_Cao.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 00a18e8b9..509a57585 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -74,7 +74,7 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do # Label transfer from the Cao reference using Azimuth Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}'), + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', testing = 1), output_format = 'html_document', output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', output_dir = '${sample_notebook_dir}')" diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd index 0d0a0af64..8757a825d 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd @@ -7,7 +7,7 @@ params: sample_id: "SCPCS000176" seed: 12345 homologs_file: "../scratch/homologs.rds" - testing: FALSE + testing: 0 output: html_document: toc: yes From c3fc4c1f4a416a3e3e991af3d904ec3862883d02 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 16:12:15 -0400 Subject: [PATCH 05/22] update param usage for label transfer notebooks --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 509a57585..2e4d174f7 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -74,14 +74,14 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do # Label transfer from the Cao reference using Azimuth Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', testing = 1), + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), output_format = 'html_document', output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', output_dir = '${sample_notebook_dir}')" # Label transfer from the Stewart reference using Seurat Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Stewart.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}'), + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), output_format = 'html_document', output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', output_dir = '${sample_notebook_dir}')" From 9abbee95ff69ebe7573090647f005de955c06356 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 16:12:45 -0400 Subject: [PATCH 06/22] Update 02b notebook to use new functions, and style --- ...ransfer_fetal_kidney_reference_Stewart.Rmd | 133 ++++++++++++------ 1 file changed, 92 insertions(+), 41 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd index cf247df36..abe006e64 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd @@ -6,6 +6,8 @@ params: scpca_project_id: "SCPCP000006" sample_id: "SCPCS000176" seed: 12345 + homologs_file: "../scratch/homologs.rds" + testing: 0 output: html_document: toc: yes @@ -16,10 +18,11 @@ output: --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, - message=FALSE, - warnings=FALSE - ) +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warnings = FALSE +) ``` @@ -67,9 +70,8 @@ Load required packages in the following chunk, if needed. Do not install packages here; only load them with the `library()` function. ```{r packages, message=FALSE, warning=FALSE} -library("Seurat") +library(Seurat) library(sctransform) -library(Azimuth) library(SCpubr) library(tidyverse) library(patchwork) @@ -92,6 +94,17 @@ data_dir <- file.path(repository_base, "data", "current", params$scpca_project_i module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") ``` +```{r} +# load functions for label transfer +source( + file.path( + module_base, + "notebook_template", + "utils", + "label-transfer-functions.R" + ) +) +``` ### Input files @@ -100,7 +113,22 @@ module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06" The reference has been download and pre-process in the `R Script` `download-and-create-fetal-kidney-ref.R` ```{r path_to_reference} -path_to_reference <- file.path(module_base, "results", "references") +path_to_ref <- file.path( + module_base, + "results", + "references", + "stewart_formatted_ref.rds" +) +if (!file.exists(path_to_ref)) { + stop("Reference file could not be found. Make sure `scripts/prepare-fetal-references.R` has been run first.") +} +ref <- readRDS(path_to_ref) + +# Pull out information from the reference object we need for label transfer +reference <- ref$reference +refdata <- ref$refdata +dims <- ref$dims +annotation_levels <- ref$annotation_levels ``` #### Query @@ -127,47 +155,70 @@ output_dir <- file.path(module_base, "results", params$sample_id) ### Load the pre-processed `Seurat` object ```{r load, message=FALSE, warning=FALSE} # open the processed rds object -srat <- readRDS(file.path(data_dir, paste0("02a-fetal_full_label-transfer_",params$sample_id,".Rds"))) +srat <- readRDS(file.path(data_dir, paste0("02a-fetal_full_label-transfer_", params$sample_id, ".Rds"))) + +# prepare the query for label transfer +DefaultAssay(srat) <- "RNA" +srat <- prepare_query(srat, rownames(reference), params$homologs_file) ``` ### Azimuth annotation from fetal kidney ```{r run_azimuth, message=FALSE, warnings=FALSE} -DefaultAssay(srat) <- "RNA" -options(future.globals.maxSize= 8912896000000) - -data <- RunAzimuth(srat, path_to_reference, assay = 'RNA') -# We transfer the annotation to the pre-processed `Seurat`object as we don't want to keep changes done on the query by `RunAzimuth` -metadata_vec <- c("predicted.compartment.score", "predicted.compartment", "predicted.cell_type.score", "predicted.cell_type") -metadata_to_trasfer <- data@meta.data[, metadata_vec] -srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_kidney_", metadata_vec)) +options(future.globals.maxSize = 8912896000000) + +# determine k.weight based CI +if (params$testing) { + k.weight <- 10 # only for test datasets +} else { + k.weight <- 50 # Azimuth default +} +s <- transfer_labels( + srat, + reference, + dims, + refdata, + k.weight = k.weight +) + +# We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by Azimuth +annotation_columns <- c( + glue::glue("predicted.{annotation_levels}"), + glue::glue("predicted.{annotation_levels}.score") +) +metadata_to_trasfer <- s@meta.data[, annotation_columns] + +srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_kidney_", annotation_columns)) ``` ```{r plot_azimuth, fig.height=8, fig.width=8, warnings=FALSE} - -d1 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_kidney_predicted.compartment", label = TRUE, repel = TRUE) + NoLegend() -d2 <- DimPlot(srat, reduction = "umap", dims = c(1,2), group.by = "fetal_kidney_predicted.cell_type", label = TRUE, repel = TRUE) + NoLegend() - -f1 <- SCpubr::do_BarPlot(sample = srat, - group.by = "fetal_kidney_predicted.compartment", - split.by = "seurat_clusters", - position = "fill", - font.size = 10, - legend.ncol = 3) + - ggtitle("% cells")+ - xlab(params$sample_id) - -f2 <- SCpubr::do_BarPlot(sample = srat, - group.by = "fetal_kidney_predicted.cell_type", - split.by = "seurat_clusters", - position = "fill", - font.size = 10, - legend.ncol = 3) + - ggtitle("% cells")+ - xlab(params$sample_id) - -(d1/f1) | (d2/f2) +d1 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_kidney_predicted.compartment", label = TRUE, repel = TRUE) + NoLegend() +d2 <- DimPlot(srat, reduction = "umap", dims = c(1, 2), group.by = "fetal_kidney_predicted.cell_type", label = TRUE, repel = TRUE) + NoLegend() + +f1 <- SCpubr::do_BarPlot( + sample = srat, + group.by = "fetal_kidney_predicted.compartment", + split.by = "seurat_clusters", + position = "fill", + font.size = 10, + legend.ncol = 3 +) + + ggtitle("% cells") + + xlab(params$sample_id) + +f2 <- SCpubr::do_BarPlot( + sample = srat, + group.by = "fetal_kidney_predicted.cell_type", + split.by = "seurat_clusters", + position = "fill", + font.size = 10, + legend.ncol = 3 +) + + ggtitle("% cells") + + xlab(params$sample_id) + +(d1 / f1) | (d2 / f2) ``` Note: @@ -187,7 +238,7 @@ In our case, cap-mesenchyme contains blastema and primitive epitheliul cancer ce ## Save the `Seurat`object ```{r save} -saveRDS(object = srat, file = file.path(output_dir, paste0("02b-fetal_kidney_label-transfer_",params$sample_id,".Rds"))) +saveRDS(object = srat, file = file.path(output_dir, paste0("02b-fetal_kidney_label-transfer_", params$sample_id, ".Rds"))) ``` ## Session info @@ -204,4 +255,4 @@ sessionInfo() - [3] https://www.science.org/doi/10.1126/science.aat5031 -- [4] https://www.science.org/doi/10.1126/science.aba7721 \ No newline at end of file +- [4] https://www.science.org/doi/10.1126/science.aba7721 From 7693571f332696d511c10e588420d09f2d4ce59b Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 17 Oct 2024 16:13:34 -0400 Subject: [PATCH 07/22] label transfer does not need to be skipped in CI anymore --- .../00_run_workflow.sh | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 2e4d174f7..62fe31ab5 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -69,22 +69,23 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do output_file = '01_seurat_processing_${sample_id}.html', output_dir = '${sample_notebook_dir}')" + # Label transfer from the Cao reference using Azimuth + Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), + output_format = 'html_document', + output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', + output_dir = '${sample_notebook_dir}')" + + # Label transfer from the Stewart reference using Seurat + Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Stewart.Rmd', + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), + output_format = 'html_document', + output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', + output_dir = '${sample_notebook_dir}')" + # Temporarily this code is not run in CI. if [[ $IS_CI -eq 0 ]]; then - # Label transfer from the Cao reference using Azimuth - Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), - output_format = 'html_document', - output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', - output_dir = '${sample_notebook_dir}')" - - # Label transfer from the Stewart reference using Seurat - Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Stewart.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), - output_format = 'html_document', - output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', - output_dir = '${sample_notebook_dir}')" # Cluster exploration Rscript -e "rmarkdown::render('${notebook_template_dir}/03_clustering_exploration.Rmd', From 788927fb31b47905b816c7febecb5d90dddcaf89 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 18 Oct 2024 10:24:09 -0400 Subject: [PATCH 08/22] remove install fetusref code from script --- .../scripts/prepare-fetal-references.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/scripts/prepare-fetal-references.R b/analyses/cell-type-wilms-tumor-06/scripts/prepare-fetal-references.R index 17144fa11..b4da25a10 100644 --- a/analyses/cell-type-wilms-tumor-06/scripts/prepare-fetal-references.R +++ b/analyses/cell-type-wilms-tumor-06/scripts/prepare-fetal-references.R @@ -168,12 +168,9 @@ saveRDS(stewart_ref_list, stewart_ref_file) # Prepare Cao (full fetal organ) reference ------------------------------ -# Install and load in the reference, keeping only the $map portion -options(timeout = 600) # often needed for SeuratData installs -SeuratData::InstallData("fetusref") +# Load in the reference, keeping only the $map portion fetus_ref <- SeuratData::LoadData("fetusref", type = "azimuth")$map - # format for label transfer cao_ref_list <- prepare_azimuth_reference(fetus_ref, cao_annotation_levels) From 1778edd3d0d20c26b10f4ffa003135eabce777a4 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 18 Oct 2024 10:59:47 -0400 Subject: [PATCH 09/22] fix notebook name --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 62fe31ab5..678c4defc 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -77,7 +77,7 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do output_dir = '${sample_notebook_dir}')" # Label transfer from the Stewart reference using Seurat - Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Stewart.Rmd', + Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd', params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), output_format = 'html_document', output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', From 4aa0eecb22c3b76cd7755bd0507cf390b1dc9373 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 28 Oct 2024 11:10:19 -0400 Subject: [PATCH 10/22] use separate query variable to prevent feature loss, and rm when done for space --- .../02a_label-transfer_fetal_full_reference_Cao.Rmd | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd index f7b9492b4..8d3140341 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd @@ -156,8 +156,10 @@ output_dir <- file.path(module_base, "results", params$sample_id) srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id, ".Rds"))) # prepare the query for label transfer +# we don't want to overwrite the srat object since `prepare_query` +# removes features that are not present in the reference DefaultAssay(srat) <- "RNA" -srat <- prepare_query(srat, rownames(reference), params$homologs_file) +query <- prepare_query(srat, rownames(reference), params$homologs_file) ``` @@ -172,8 +174,8 @@ if (params$testing) { } else { k.weight <- 50 # Azimuth default } -s <- transfer_labels( - srat, +query_labeled <- transfer_labels( + query, reference, dims, refdata, @@ -185,9 +187,12 @@ annotation_columns <- c( glue::glue("predicted.{annotation_levels}"), glue::glue("predicted.{annotation_levels}.score") ) -metadata_to_trasfer <- s@meta.data[, annotation_columns] +metadata_to_trasfer <- query_labeled@meta.data[, annotation_columns] srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_full_", annotation_columns)) + +rm(query) +rm(query_labeled) ``` ```{r plot_azimuth, fig.height=15, fig.width=8, warnings=FALSE} From 78dfcdc42ed37881c09e4d962ffba53d2d6db830 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 12:40:37 -0400 Subject: [PATCH 11/22] parameter fixes --- .../utils/label-transfer-functions.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R index 576c6c5e8..af43b6454 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R @@ -41,7 +41,7 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil calcn <- as.data.frame(x = Seurat:::CalcN(object = query[["RNA"]])) colnames(x = calcn) <- paste( colnames(x = calcn), - "RNA", + NULL, # assay sep = "_" ) query <- AddMetaData( @@ -58,7 +58,8 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil query_symbols <- PercentageFeatureSet( object = query, pattern = "^MT-", - col.name = "percent.mt" + col.name = "percent.mt", + assay = NULL ) } @@ -94,7 +95,7 @@ transfer_labels <- function( reference, reference_dims, refdata, - k.weight = 10, + k.weight = 50, n.trees = 20, mapping.score.k = 80, ksmooth = 80, @@ -107,7 +108,7 @@ transfer_labels <- function( k.filter = NA, reference.neighbors = "refdr.annoy.neighbors", reference.assay = "refAssay", - query.assay = "RNA", + query.assay = NULL, reference.reduction = "refDR", normalization.method = "SCT", features = rownames(Loadings(reference[["refDR"]])), @@ -122,7 +123,7 @@ transfer_labels <- function( query <- TransferData( reference = reference, query = query, - query.assay = "RNA", + query.assay = NULL, dims = 1:reference_dims, anchorset = anchors, refdata = refdata, From d9a6feb0c7147f1daaae0a8ffcd7172d2dbf06d6 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 15:02:45 -0400 Subject: [PATCH 12/22] use is_ci --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 678c4defc..74bc50c85 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -50,8 +50,8 @@ Rscript -e "rmarkdown::render('${notebook_template_dir}/00b_characterize_fetal_k # Run the label transfer and cluster exploration for all samples in the project -for sample_dir in ${data_dir}/${project_id}/SCPCS*; do - sample_id=$(basename $sample_dir) +for sample_id in SCPCS000168; do #${data_dir}/${project_id}/SCPCS*; do + # sample_id=$(basename $sample_dir) # define and create sample-specific directories # directory for the pre-processed and labeled `Seurat` objects @@ -71,14 +71,14 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do # Label transfer from the Cao reference using Azimuth Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', output_dir = '${sample_notebook_dir}')" # Label transfer from the Stewart reference using Seurat Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd', - params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = 1), + params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', output_dir = '${sample_notebook_dir}')" From c9b33fc1a7eca426d31e96f8d1db8cce5014e63d Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 15:03:19 -0400 Subject: [PATCH 13/22] revert testing code --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 74bc50c85..a6a6de19b 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -50,8 +50,8 @@ Rscript -e "rmarkdown::render('${notebook_template_dir}/00b_characterize_fetal_k # Run the label transfer and cluster exploration for all samples in the project -for sample_id in SCPCS000168; do #${data_dir}/${project_id}/SCPCS*; do - # sample_id=$(basename $sample_dir) +for sample_dir in ${data_dir}/${project_id}/SCPCS*; do + sample_id=$(basename $sample_dir) # define and create sample-specific directories # directory for the pre-processed and labeled `Seurat` objects From f7f96869434fa054ad3013a8b98f68edb8a64269 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 15:11:17 -0400 Subject: [PATCH 14/22] remove outdated comments --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index a6a6de19b..8ee886647 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -69,14 +69,14 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do output_file = '01_seurat_processing_${sample_id}.html', output_dir = '${sample_notebook_dir}')" - # Label transfer from the Cao reference using Azimuth + # Label transfer from the Cao reference Rscript -e "rmarkdown::render('${notebook_template_dir}/02a_label-transfer_fetal_full_reference_Cao.Rmd', params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', output_file = '02a_fetal_all_reference_Cao_${sample_id}.html', output_dir = '${sample_notebook_dir}')" - # Label transfer from the Stewart reference using Seurat + # Label transfer from the Stewart reference Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd', params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', From d4a20f53b60f7e9ac2b63d42637de5d527b4edb0 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 15:16:52 -0400 Subject: [PATCH 15/22] Add supplemental notebook with results comparing azimuth to adapted azimuth --- .../supplemental-notebooks/README.md | 6 + .../compare-label-transfer-approaches.Rmd | 393 ++ .../compare-label-transfer-approaches.nb.html | 3666 +++++++++++++++++ 3 files changed, 4065 insertions(+) create mode 100644 analyses/cell-type-wilms-tumor-06/supplemental-notebooks/README.md create mode 100644 analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd create mode 100644 analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html diff --git a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/README.md b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/README.md new file mode 100644 index 000000000..1570f5933 --- /dev/null +++ b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/README.md @@ -0,0 +1,6 @@ +This directory contains supplementary notebooks that are not used as part of this module's workflow. + +- `compare-label-transfer-approaches.Rmd` compares label transfer results for 5 samples from two approaches: i) using Azimuth directly, and ii) using code adapted from Azimuth functions. + - This notebook was written because Azimuth was not able to be used with OpenScPCA test data (as described in ). + We therefore adapted code from Azimuth to perform label transfer directly. + This notebook compares label transfer results from this new approach to those obtained with Azimuth to ensure the labels are reasonably similar. \ No newline at end of file diff --git a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd new file mode 100644 index 000000000..e240bdb73 --- /dev/null +++ b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd @@ -0,0 +1,393 @@ +--- +title: "Compare label transfer results between Azimuth and Azimuth-adapted strategy" +author: Stephanie Spielman, Data Lab +output: + html_notebook: + toc: yes + toc_float: yes +params: + seed: 12345 +--- + + +The goal of this notebook is to compare label transfer results between: + +- Label transfer code with Azimuth currently in `main` at commit `6af112d`. These results are referred to as `"azimuth"`. +- Label transfer code adapted from Azimuth. These results are referred to as `"adapted_azimuth"`. + + +## Setup + +```{r setup} +knitr::opts_chunk$set(message = FALSE, warning = FALSE) +options(future.globals.maxSize = 891289600000000) + +suppressPackageStartupMessages({ + library(tidyverse) + library(patchwork) + library(Seurat) +}) + +repository_base <- rprojroot::find_root(rprojroot::is_git_root) +module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") +result_dir <- file.path(module_base, "results") + + +# functions to perform label transfer with azimuth-adapted approach +source( + file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R") +) + +# Output files +full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds") +kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds") +``` + +## Functions + +```{r functions} +# Make a heatmap of counts for label transfer strategies +plot_count_heatmap <- function(df, title, sample_id) { + all_preds <- union(df$azimuth, df$adapted_azimuth) + + plotme <- data.frame( + azimuth = all_preds, + adapted_azimuth = all_preds + ) |> + expand(azimuth, adapted_azimuth) |> + mutate(n = NA_integer_) |> + anti_join(distinct(df)) |> + bind_rows( + df |> count(azimuth, adapted_azimuth) + ) |> + arrange(azimuth) |> + mutate( + color = case_when( + is.na(n) ~ "white", + n <= 20 ~ "grey90", + n <= 50 ~ "lightblue", + n <= 100 ~ "cornflowerblue", + n <= 500 ~ "red", + n <= 1000 ~ "yellow2", + .default = "yellow" + ) + ) + + ggplot(plotme) + + aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + + geom_tile(alpha = 0.5) + + geom_abline(color = "firebrick", alpha = 0.5) + + geom_text(size = 3.5) + + # scale_fill_viridis_c(name = "count", na.value = "grey90") + + scale_fill_identity() + + theme_bw() + + theme( + axis.text.y = element_text(size = 7), + axis.text.x = element_text(angle = 30, size = 7, hjust = 1), + legend.position = "bottom", + legend.title = element_text(size = 9), + legend.text = element_text(size = 8) + ) + + labs( + title = glue::glue("{sample_id}: {str_to_title(title)}") + ) +} + + +# Wrapper function to compare results between approaches +# Makes two plots: +# - heatmap comparing counts for cell labels between approaches +# - density plot of annotation scores for labels that agree and disagree between approaches +compare <- function(df, compare_column, score_column, title) { + spread_df <- df |> + select({{ compare_column }}, barcode, version) |> + pivot_wider(names_from = version, values_from = {{ compare_column }}) + + + heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id)) + + disagree_barcodes <- spread_df |> + filter(azimuth != adapted_azimuth) |> + pull(barcode) + + df2 <- df |> + mutate( + agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"), + agree = fct_relevel(agree, "labels disagree", "labels agree") + ) + + density_plot <- ggplot(df2) + + aes(x = {{ score_column }}, fill = agree) + + geom_density(alpha = 0.6) + + theme_bw() + + ggtitle( + glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}") + ) + + theme(legend.position = "bottom") + + print(heatmap + density_plot + plot_layout(widths = c(2, 1))) +} +``` + + +## Label transfer + +This section both: + +- Reads in existing Azimuth label transfer results +- Performs label transfer with Azimuth-adapted approach + +If results are already available, we read in the files rather than regenerating results. + +```{r} +# sample ids to process +sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208") + +# read in seurat input objects, as needed +if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) { + srat_objects <- sample_ids |> + purrr::map( + \(id) { + srat <- readRDS( + file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")) + ) + DefaultAssay(srat) <- "RNA" + + return(srat) + } + ) + names(srat_objects) <- sample_ids +} +``` + + +### Label transfer for fetal full + +```{r} +if (!file.exists(full_results_file)) { + # read reference + ref <- readRDS(file.path( + module_base, + "results", + "references", + "cao_formatted_ref.rds" + )) + full_reference <- ref$reference + full_refdata <- ref$refdata + full_dims <- ref$dims + full_annotation_columns <- c( + glue::glue("predicted.{ref$annotation_levels}"), + glue::glue("predicted.{ref$annotation_levels}.score") + ) + + + fetal_full <- srat_objects |> + purrr::imap( + \(srat, id) { + # Perform label transfer with new code + set.seed(params$seed) + query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds")) + query <- transfer_labels( + query, + full_reference, + full_dims, + full_refdata + ) + + # Read in results from existing Azimuth label transfer code + srat_02a <- readRDS( + file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds")) + ) + + # create final data frame with all annotations + query@meta.data[, full_annotation_columns] |> + tibble::rownames_to_column(var = "barcode") |> + mutate( + sample_id = id, + version = "adapted_azimuth" + ) |> + # existing results + bind_rows( + data.frame( + sample_id = id, + barcode = colnames(srat_02a), + version = "azimuth", + predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, + predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score, + predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, + predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score, + predicted.organ = srat_02a$fetal_full_predicted.organ, + predicted.organ.score = srat_02a$fetal_full_predicted.organ.score + ) + ) + } + ) + write_rds(fetal_full, full_results_file) +} else { + fetal_full <- read_rds(full_results_file) +} +``` + + +### Label transfer for fetal kidney + + +```{r} +if (!file.exists(kidney_results_file)) { + # read reference + ref <- readRDS(file.path( + module_base, + "results", + "references", + "stewart_formatted_ref.rds" + )) + + # Pull out information from the reference object we need for label transfer + kidney_reference <- ref$reference + kidney_refdata <- ref$refdata + kidney_dims <- ref$dims + kidney_annotation_columns <- c( + glue::glue("predicted.{ref$annotation_levels}"), + glue::glue("predicted.{ref$annotation_levels}.score") + ) + + + fetal_kidney <- srat_objects |> + purrr::imap( + \(srat, id) { + # Perform label transfer with new code + set.seed(params$seed) + query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds")) + query <- transfer_labels( + query, + kidney_reference, + kidney_dims, + kidney_refdata + ) + + # Read in results from existing Azimuth label transfer code + srat_02b <- readRDS( + file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds")) + ) + + # create final data frame with all annotations + query@meta.data[, kidney_annotation_columns] |> + tibble::rownames_to_column(var = "barcode") |> + mutate( + sample_id = id, + version = "adapted_azimuth" + ) |> + # existing results + bind_rows( + data.frame( + sample_id = id, + barcode = colnames(srat_02b), + version = "azimuth", + predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, + predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score, + predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, + predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score + ) + ) + } + ) + + write_rds(fetal_kidney, kidney_results_file) +} else { + fetal_kidney <- read_rds(kidney_results_file) +} +``` + + +## Compare results + +We expect: +- The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal +- Any annotations that disagree should have low scores + + +### Fetal full reference + +Note that results from the L2 reference are not plotted because they are not used in cell type annotation. + + +```{r fig.height=8, fig.width=14} +fetal_full |> + purrr::walk( + \(dat) { + compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1") + compare(dat, predicted.organ, predicted.organ.score, "organ") + } + ) +``` + + +### Fetal kidney reference + +```{r fig.height=8, fig.width=14} +fetal_kidney |> + purrr::walk( + \(dat) { + compare(dat, predicted.compartment, predicted.compartment.score, "compartment") + compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type") + } + ) +``` + + + +## Conclusions + +The vast majority of the time, labels agree. +Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected. + +Additional notable differences are shown in tables below: + +### Fetal full reference: + +- The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial +- Some other kidney-related differences are noted: + +| Sample | Reference | Count | Azimuth | Azimuth-adapted | +|--------|-----------|-------|---------|-----------------| +| SCPSC000179 | L1 | 70 | Metanephritic cells | Intestinal epithelial cells | +| SCPSC000179 | Organ | 64 | Kidney | Intestine | +| SCPSC000179 | Organ | 20 | Lung | Kidney | +| SCPSC000194 | L1 | 60 | Stromal cells | Mesangial cells | +| SCPSC000194 | Organ | 35 | Kidney | Intestine | +| SCPSC000194 | Organ | 36 | Lung | Kidney | +| SCPSC000205 | Organ | 56 | Kidney | Intestine | +| SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells | +| SCPSC000208 | L1 | 101 | Intestinal epithelial cells | Metanephritic cells | +| SCPSC000208 | Organ | 149 | Kidney | Intestine | + + +### Fetal kidney reference: + + +- There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184. +This is the main discrepancy. +- Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table. + - `kidney cell` vs `podocyte` + - `kidney epithelial cell` vs `kidney cell` + - `mesenchymal cell` vs `mesenchymal stem cell` +- There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor. +- Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. `kidney epithelial cell` vs `kidney cell`). + + +| Sample | Reference | Count | Azimuth | Azimuth-adapted | +|--------|-----------|-------|---------|-----------------| +| SCPSC000179 | cell type | 202 | mesenchymal cell | kidney epithelial cell | +| SCPSC000184 | compartment | 111 | fetal nephron | stroma | +| SCPSC000184 | cell type | 536 | kidney epithelial cell | mesenchymal cell | +| SCPSC000194 | compartment | 565 | fetal nephron | stroma | +| SCPSC000194 | cell type | 89 | kidney epithelial cell | mesenchymal cell | +| SCPSC000205 | compartment | 684 | fetal nephron | stroma | +| SCPSC000208 | compartment | 2111 | fetal nephron | stroma | + + +## Session Info + +```{r} +sessionInfo() +``` diff --git a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html new file mode 100644 index 000000000..a6b0d19c5 --- /dev/null +++ b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html @@ -0,0 +1,3666 @@ + + + + + + + + + + + + + + +Compare label transfer results between Azimuth and Azimuth-adapted strategy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + +

The goal of this notebook is to compare label transfer results +between:

+
    +
  • Label transfer code with Azimuth currently in main at +commit 6af112d. These results are referred to as +"azimuth".
  • +
  • Label transfer code adapted from Azimuth. These results are referred +to as "adapted_azimuth".
  • +
+
+

Setup

+ + + +
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
+options(future.globals.maxSize = 891289600000000)
+
+suppressPackageStartupMessages({
+  library(tidyverse)
+  library(patchwork)
+  library(Seurat)
+})
+
+repository_base <- rprojroot::find_root(rprojroot::is_git_root)
+module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")  
+result_dir <- file.path(module_base, "results")
+
+
+# functions to perform label transfer with azimuth-adapted approach
+source(
+  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
+)
+
+# Output files
+full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
+kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")
+ + + +
+
+

Functions

+ + + +
# Make a heatmap of counts for label transfer strategies
+plot_count_heatmap <- function(df, title, sample_id) {
+  all_preds <- union(df$azimuth, df$adapted_azimuth)
+  
+  plotme <- data.frame(
+    azimuth = all_preds, 
+    adapted_azimuth = all_preds
+  ) |> 
+    expand(azimuth, adapted_azimuth) |>
+    mutate(n = NA_integer_) |>
+    anti_join(distinct(df)) |>
+    bind_rows(
+      df |> count(azimuth, adapted_azimuth)
+    ) |> 
+    arrange(azimuth) |>
+    mutate(
+      color = case_when(
+        is.na(n) ~ "white", 
+        n <= 20 ~ "grey90",
+        n <= 50 ~ "lightblue",
+        n <= 100 ~ "cornflowerblue", 
+        n <= 500 ~ "red",
+        n <= 1000 ~ "yellow2",
+        .default = "yellow"
+      )
+    )
+
+    ggplot(plotme) + 
+      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
+      geom_tile(alpha = 0.5) + 
+      geom_abline(color = "firebrick", alpha = 0.5) +
+      geom_text(size = 3.5) + 
+      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
+      scale_fill_identity() +
+      theme_bw() + 
+      theme(axis.text.y = element_text(size = 7), 
+            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
+            legend.position = "bottom", 
+            legend.title = element_text(size = 9), 
+            legend.text = element_text(size = 8)) +
+      labs(
+        title = glue::glue("{sample_id}: {str_to_title(title)}")
+      )
+}
+
+
+# Wrapper function to compare results between approaches
+# Makes two plots:
+# - heatmap comparing counts for cell labels between approaches
+# - density plot of annotation scores for labels that agree and disagree between approaches
+compare <- function(df, compare_column, score_column, title) {
+  
+  spread_df <- df |>
+    select({{compare_column}}, barcode, version) |>
+    pivot_wider(names_from = version, values_from = {{compare_column}})
+
+  
+  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))  
+  
+  disagree_barcodes <- spread_df |>
+    filter(azimuth != adapted_azimuth) |>
+    pull(barcode)
+
+  df2 <- df |>
+    mutate(
+      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
+      agree = fct_relevel(agree, "labels disagree", "labels agree")
+    ) 
+  
+  density_plot <- ggplot(df2) + 
+    aes(x = {{score_column}}, fill = agree) + 
+    geom_density(alpha = 0.6) + 
+    theme_bw() +
+    ggtitle(
+      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
+    ) +
+    theme(legend.position = "bottom")
+
+  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))
+
+}
+ + + +
+
+

Label transfer

+

This section both:

+
    +
  • Reads in existing Azimuth label transfer results
  • +
  • Performs label transfer with Azimuth-adapted approach
  • +
+

If results are already available, we read in the files rather than +regenerating results.

+ + + +
# sample ids to process
+sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")
+
+# read in seurat input objects, as needed
+if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
+  srat_objects <- sample_ids |>
+    purrr::map(
+      \(id) {
+        srat <- readRDS(
+          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")
+        ))
+        DefaultAssay(srat) <- "RNA"
+        
+        return(srat)
+    })
+  names(srat_objects) <- sample_ids
+}
+ + + +
+

Label transfer for fetal full

+ + + +
if (!file.exists(full_results_file)) {
+  
+  # read reference
+  ref <- readRDS(file.path(
+  module_base,
+    "results",
+    "references",
+    "cao_formatted_ref.rds"
+  ))
+  full_reference <- ref$reference
+  full_refdata <- ref$refdata
+  full_dims <- ref$dims
+  full_annotation_columns <- c(
+    glue::glue("predicted.{ref$annotation_levels}"),
+    glue::glue("predicted.{ref$annotation_levels}.score")
+  )
+
+  
+  fetal_full <- srat_objects |>
+    purrr::imap(
+      \(srat, id) {
+        
+        # Perform label transfer with new code
+        set.seed(params$seed)
+        query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds"))
+        query <- transfer_labels(
+          query,
+          full_reference,
+          full_dims,
+          full_refdata
+        )
+        
+        # Read in results from existing Azimuth label transfer code
+        srat_02a <- readRDS(
+          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
+        )
+        
+        # create final data frame with all annotations
+        query@meta.data[, full_annotation_columns] |> 
+          tibble::rownames_to_column(var = "barcode") |>
+          mutate(
+            sample_id = id, 
+            version = "adapted_azimuth"
+          ) |>
+          # existing results
+          bind_rows(
+            data.frame(
+              sample_id = id,
+              barcode = colnames(srat_02a),
+              version = "azimuth", 
+              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, 
+              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
+              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, 
+              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
+              predicted.organ = srat_02a$fetal_full_predicted.organ, 
+              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
+            ) 
+          )
+      }
+    )
+  write_rds(fetal_full, full_results_file)
+} else {
+  fetal_full <- read_rds(full_results_file)
+}
+ + + +
+
+

Label transfer for fetal kidney

+ + + +
if (!file.exists(kidney_results_file)) {
+  
+  
+  # read reference
+  ref <- readRDS(file.path(
+    module_base,
+    "results",
+    "references",
+    "stewart_formatted_ref.rds"
+  ))
+  
+  # Pull out information from the reference object we need for label transfer
+  kidney_reference <- ref$reference
+  kidney_refdata <- ref$refdata
+  kidney_dims <- ref$dims
+  kidney_annotation_columns <- c(
+    glue::glue("predicted.{ref$annotation_levels}"),
+    glue::glue("predicted.{ref$annotation_levels}.score")
+  )
+  
+  
+  fetal_kidney <- srat_objects |>
+    purrr::imap(
+      \(srat, id) {
+        
+        # Perform label transfer with new code
+        set.seed(params$seed)
+        query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds"))
+        query <- transfer_labels(
+          query,
+          kidney_reference,
+          kidney_dims,
+          kidney_refdata
+        )
+        
+        # Read in results from existing Azimuth label transfer code
+        srat_02b <- readRDS(
+          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
+        )
+        
+        # create final data frame with all annotations
+        query@meta.data[, kidney_annotation_columns] |> 
+          tibble::rownames_to_column(var = "barcode") |>
+          mutate(
+            sample_id = id, 
+            version = "adapted_azimuth"
+          ) |>
+          # existing results
+          bind_rows(
+            data.frame(
+              sample_id = id,
+              barcode = colnames(srat_02b),
+              version = "azimuth", 
+              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, 
+              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
+              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, 
+              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
+            ) 
+          )
+      }
+    )
+
+  write_rds(fetal_kidney, kidney_results_file)
+} else {
+  fetal_kidney <- read_rds(kidney_results_file)
+}
+ + + +
+
+
+

Compare results

+

We expect: - The majority of annotations match between approaches, +with heatmap counts primarily falling along the diagonal - Any +annotations that disagree should have low scores

+
+

Fetal full reference

+

Note that results from the L2 reference are not plotted because they +are not used in cell type annotation.

+ + + +
fetal_full |>
+  purrr::walk(
+    \(dat) {
+      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
+      compare(dat, predicted.organ, predicted.organ.score, "organ")
+    }
+  )
+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + + +
+
+

Fetal kidney reference

+ + + +
fetal_kidney |>
+  purrr::walk(
+    \(dat) {
+      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
+      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
+    }
+  )
+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + +

+ + + +
+
+
+

Conclusions

+

The vast majority of the time, labels agree. Generally speaking, when +labels do not agree, their annotation scores are much lower, which is as +expected.

+

Additional notable differences are shown in tables below:

+
+

Fetal full reference:

+
    +
  • The Azimuth-adapted approach occasionally calls kidney or +kidney-related cells as intestine or intestine epithelial
  • +
  • Some other kidney-related differences are noted:
  • +
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SampleReferenceCountAzimuthAzimuth-adapted
SCPSC000179L170Metanephritic cellsIntestinal epithelial cells
SCPSC000179Organ64KidneyIntestine
SCPSC000179Organ20LungKidney
SCPSC000194L160Stromal cellsMesangial cells
SCPSC000194Organ35KidneyIntestine
SCPSC000194Organ36LungKidney
SCPSC000205Organ56KidneyIntestine
SCPSC000208L1101Mesangial cellsMetanephritic cells
SCPSC000208L1101Intestinal epithelial cellsMetanephritic cells
SCPSC000208Organ149KidneyIntestine
+
+
+

Fetal kidney reference:

+
    +
  • There are a small but notable number of cells flipped between +mesenchyme and kidney cells, in particular for sample SCPSC000184. This +is the main discrepancy.
  • +
  • Most of the cell type differences are not necessarily biologically +meaningful for our purposes, as listed below. These are not noted in the +table. +
      +
    • kidney cell vs podocyte
    • +
    • kidney epithelial cell vs kidney cell
    • +
    • mesenchymal cell vs +mesenchymal stem cell
    • +
  • +
  • There are a decent number of times when stroma and fetal nephron are +flipped, but this makes sense given that we expect many of the stroma +may be tumor.
  • +
  • Disagreeing annotation scores when using the cell type reference +were often higher, but many of the disagreements for this reference were +not meaningful (e.g. kidney epithelial cell vs +kidney cell).
  • +
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SampleReferenceCountAzimuthAzimuth-adapted
SCPSC000179cell type202mesenchymal cellkidney epithelial cell
SCPSC000184compartment111fetal nephronstroma
SCPSC000184cell type536kidney epithelial cellmesenchymal cell
SCPSC000194compartment565fetal nephronstroma
SCPSC000194cell type89kidney epithelial cellmesenchymal cell
SCPSC000205compartment684fetal nephronstroma
SCPSC000208compartment2111fetal nephronstroma
+
+
+
+

Session Info

+ + + +
sessionInfo()
+ + +
R version 4.4.1 (2024-06-14)
+Platform: aarch64-apple-darwin20
+Running under: macOS 15.1
+
+Matrix products: default
+BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
+LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
+
+locale:
+[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+
+time zone: America/New_York
+tzcode source: internal
+
+attached base packages:
+[1] stats     graphics  grDevices datasets  utils     methods   base     
+
+other attached packages:
+ [1] Seurat_5.1.0       SeuratObject_5.0.2 sp_2.1-4           patchwork_1.2.0    lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
+ [8] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0   
+
+loaded via a namespace (and not attached):
+  [1] RColorBrewer_1.1-3     rstudioapi_0.16.0      jsonlite_1.8.8         magrittr_2.0.3         spatstat.utils_3.1-0   farver_2.1.2          
+  [7] rmarkdown_2.28         vctrs_0.6.5            ROCR_1.0-11            spatstat.explore_3.3-2 htmltools_0.5.8.1      sass_0.4.9            
+ [13] sctransform_0.4.1      parallelly_1.38.0      KernSmooth_2.23-24     bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
+ [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12             cachem_1.1.0           igraph_2.0.3           mime_0.12             
+ [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0           R6_2.5.1               fastmap_1.2.0          fitdistrplus_1.2-1    
+ [31] future_1.34.0          shiny_1.9.1            digest_0.6.37          colorspace_2.1-1       rprojroot_2.0.4        tensor_1.5            
+ [37] RSpectra_0.16-2        irlba_2.3.5.1          labeling_0.4.3         progressr_0.14.0       fansi_1.0.6            spatstat.sparse_3.1-0 
+ [43] timechange_0.3.0       httr_1.4.7             polyclip_1.10-7        abind_1.4-5            compiler_4.4.1         withr_3.0.1           
+ [49] fastDummies_1.7.4      MASS_7.3-61            tools_4.4.1            lmtest_0.9-40          httpuv_1.6.15          future.apply_1.11.2   
+ [55] goftest_1.2-3          glue_1.7.0             nlme_3.1-166           promises_1.3.0         grid_4.4.1             Rtsne_0.17            
+ [61] cluster_2.1.6          reshape2_1.4.4         generics_0.1.3         gtable_0.3.5           spatstat.data_3.1-2    tzdb_0.4.0            
+ [67] data.table_1.16.0      hms_1.1.3              utf8_1.2.4             spatstat.geom_3.3-2    RcppAnnoy_0.0.22       ggrepel_0.9.5         
+ [73] RANN_2.6.2             pillar_1.9.0           spam_2.10-0            RcppHNSW_0.6.0         later_1.3.2            splines_4.4.1         
+ [79] lattice_0.22-6         renv_1.0.7             survival_3.7-0         deldir_2.0-4           tidyselect_1.2.1       miniUI_0.1.1.1        
+ [85] pbapply_1.7-2          knitr_1.48             gridExtra_2.3          scattermore_1.2        xfun_0.47              matrixStats_1.3.0     
+ [91] stringi_1.8.4          lazyeval_0.2.2         yaml_2.3.10            evaluate_0.24.0        codetools_0.2-20       BiocManager_1.30.25   
+ [97] cli_3.6.3              uwot_0.2.2             xtable_1.8-4           reticulate_1.38.0      munsell_0.5.1          jquerylib_0.1.4       
+[103] Rcpp_1.0.13            globals_0.16.3         spatstat.random_3.3-1  png_0.1-8              spatstat.univar_3.0-0  parallel_4.4.1        
+[109] dotCall64_1.1-1        listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0           ggridges_0.5.6         leiden_0.4.3.1        
+[115] rlang_1.1.4            cowplot_1.1.3         
+ + +
+ +
---
title: "Compare label transfer results between Azimuth and Azimuth-adapted strategy"
author: Stephanie Spielman, Data Lab
output: 
  html_notebook: 
    toc: yes
    toc_float: yes
params:
  seed: 12345
---


The goal of this notebook is to compare label transfer results between:

- Label transfer code with Azimuth currently in `main` at commit `6af112d`. These results are referred to as `"azimuth"`.
- Label transfer code adapted from Azimuth. These results are referred to as `"adapted_azimuth"`.


## Setup

```{r setup}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(future.globals.maxSize = 891289600000000)

suppressPackageStartupMessages({
  library(tidyverse)
  library(patchwork)
  library(Seurat)
})

repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")  
result_dir <- file.path(module_base, "results")


# functions to perform label transfer with azimuth-adapted approach
source(
  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
)

# Output files
full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")
```

## Functions

```{r functions}
# Make a heatmap of counts for label transfer strategies
plot_count_heatmap <- function(df, title, sample_id) {
  all_preds <- union(df$azimuth, df$adapted_azimuth)
  
  plotme <- data.frame(
    azimuth = all_preds, 
    adapted_azimuth = all_preds
  ) |> 
    expand(azimuth, adapted_azimuth) |>
    mutate(n = NA_integer_) |>
    anti_join(distinct(df)) |>
    bind_rows(
      df |> count(azimuth, adapted_azimuth)
    ) |> 
    arrange(azimuth) |>
    mutate(
      color = case_when(
        is.na(n) ~ "white", 
        n <= 20 ~ "grey90",
        n <= 50 ~ "lightblue",
        n <= 100 ~ "cornflowerblue", 
        n <= 500 ~ "red",
        n <= 1000 ~ "yellow2",
        .default = "yellow"
      )
    )

    ggplot(plotme) + 
      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
      geom_tile(alpha = 0.5) + 
      geom_abline(color = "firebrick", alpha = 0.5) +
      geom_text(size = 3.5) + 
      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
      scale_fill_identity() +
      theme_bw() + 
      theme(axis.text.y = element_text(size = 7), 
            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
            legend.position = "bottom", 
            legend.title = element_text(size = 9), 
            legend.text = element_text(size = 8)) +
      labs(
        title = glue::glue("{sample_id}: {str_to_title(title)}")
      )
}


# Wrapper function to compare results between approaches
# Makes two plots:
# - heatmap comparing counts for cell labels between approaches
# - density plot of annotation scores for labels that agree and disagree between approaches
compare <- function(df, compare_column, score_column, title) {
  
  spread_df <- df |>
    select({{compare_column}}, barcode, version) |>
    pivot_wider(names_from = version, values_from = {{compare_column}})

  
  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))  
  
  disagree_barcodes <- spread_df |>
    filter(azimuth != adapted_azimuth) |>
    pull(barcode)

  df2 <- df |>
    mutate(
      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
      agree = fct_relevel(agree, "labels disagree", "labels agree")
    ) 
  
  density_plot <- ggplot(df2) + 
    aes(x = {{score_column}}, fill = agree) + 
    geom_density(alpha = 0.6) + 
    theme_bw() +
    ggtitle(
      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
    ) +
    theme(legend.position = "bottom")

  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))

}
```


## Label transfer

This section both:

- Reads in existing Azimuth label transfer results
- Performs label transfer with Azimuth-adapted approach

If results are already available, we read in the files rather than regenerating results.

```{r}
# sample ids to process
sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")

# read in seurat input objects, as needed
if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
  srat_objects <- sample_ids |>
    purrr::map(
      \(id) {
        srat <- readRDS(
          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")
        ))
        DefaultAssay(srat) <- "RNA"
        
        return(srat)
    })
  names(srat_objects) <- sample_ids
}
```


### Label transfer for fetal full

```{r}
if (!file.exists(full_results_file)) {
  
  # read reference
  ref <- readRDS(file.path(
  module_base,
    "results",
    "references",
    "cao_formatted_ref.rds"
  ))
  full_reference <- ref$reference
  full_refdata <- ref$refdata
  full_dims <- ref$dims
  full_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )

  
  fetal_full <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          full_reference,
          full_dims,
          full_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02a <- readRDS(
          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, full_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02a),
              version = "azimuth", 
              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, 
              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, 
              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
              predicted.organ = srat_02a$fetal_full_predicted.organ, 
              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
            ) 
          )
      }
    )
  write_rds(fetal_full, full_results_file)
} else {
  fetal_full <- read_rds(full_results_file)
}
```


### Label transfer for fetal kidney


```{r}
if (!file.exists(kidney_results_file)) {
  
  
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "stewart_formatted_ref.rds"
  ))
  
  # Pull out information from the reference object we need for label transfer
  kidney_reference <- ref$reference
  kidney_refdata <- ref$refdata
  kidney_dims <- ref$dims
  kidney_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )
  
  
  fetal_kidney <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          kidney_reference,
          kidney_dims,
          kidney_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02b <- readRDS(
          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, kidney_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02b),
              version = "azimuth", 
              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, 
              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, 
              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
            ) 
          )
      }
    )

  write_rds(fetal_kidney, kidney_results_file)
} else {
  fetal_kidney <- read_rds(kidney_results_file)
}
```


## Compare results

We expect:
- The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal
- Any annotations that disagree should have low scores


### Fetal full reference

Note that results from the L2 reference are not plotted because they are not used in cell type annotation.


```{r fig.height=8, fig.width=14}
fetal_full |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
      compare(dat, predicted.organ, predicted.organ.score, "organ")
    }
  )
```


### Fetal kidney reference

```{r fig.height=8, fig.width=14}
fetal_kidney |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
    }
  )
```



## Conclusions

The vast majority of the time, labels agree. 
Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected.
 
Additional notable differences are shown in tables below:
    
### Fetal full reference:

- The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial
- Some other kidney-related differences are noted:

| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | L1 | 70 | Metanephritic cells | Intestinal epithelial cells | 
| SCPSC000179 | Organ | 64 | Kidney | Intestine | 
| SCPSC000179 | Organ | 20 | Lung | Kidney | 
| SCPSC000194 | L1 | 60 | Stromal cells | Mesangial cells | 
| SCPSC000194 | Organ | 35 | Kidney | Intestine | 
| SCPSC000194 | Organ | 36 | Lung | Kidney | 
| SCPSC000205 | Organ | 56 | Kidney | Intestine |
| SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells |  
| SCPSC000208 | L1 | 101 | Intestinal epithelial cells | Metanephritic cells |  
| SCPSC000208 | Organ | 149 | Kidney | Intestine | 


### Fetal kidney reference:


- There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184.
This is the main discrepancy.
- Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table.
   - `kidney cell` vs `podocyte`
   - `kidney epithelial cell` vs `kidney cell` 
   - `mesenchymal cell` vs `mesenchymal stem cell` 
- There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor.
- Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. `kidney epithelial cell` vs `kidney cell`).


| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | cell type | 202 | mesenchymal cell | kidney epithelial cell | 
| SCPSC000184 | compartment | 111 | fetal nephron |  stroma | 
| SCPSC000184 | cell type | 536 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000194 | compartment | 565 | fetal nephron |  stroma | 
| SCPSC000194 | cell type  | 89 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000205 | compartment  | 684 | fetal nephron |  stroma | 
| SCPSC000208 | compartment  | 2111 | fetal nephron |  stroma | 


## Session Info

```{r}
sessionInfo()
```

+ + +
+
+ +
+ + + + + + + + + + + + + + + + + From 76d9ace8420b1565e2912dc65ff66eabe33da67f Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 29 Oct 2024 15:19:17 -0400 Subject: [PATCH 16/22] little regex tweak --- .gitleaks.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitleaks.toml b/.gitleaks.toml index 7be6fcbc5..5c5d51d82 100644 --- a/.gitleaks.toml +++ b/.gitleaks.toml @@ -8,5 +8,5 @@ regexes = [ # skip base64 encoded images, which might have substrings that look like tokens '''(?i)''', # skip jQuery definition function - '''^!function\(.+jQuery''' + '''^!function\(.+?jQuery''' ] From 04332452bed33b21a63d922551f1b149fd33b994 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 1 Nov 2024 15:57:43 -0400 Subject: [PATCH 17/22] add query assay arguments to functions --- .../utils/label-transfer-functions.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R index af43b6454..b19b1a455 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/utils/label-transfer-functions.R @@ -21,10 +21,15 @@ #' #' @param query The Seurat object which will undergo label transfer #' @param reference_rownames The rownames (aka, features) in the reference object +#' @param assay Name of assay in query to prepare #' @param homolog_file Path to the homologs.rds file obtained from Seurat #' #' @return Seurat object prepared for label transfer -prepare_query <- function(query, reference_rownames, homolog_file = homologs_file) { +prepare_query <- function( + query, + reference_rownames, + assay = NULL, + homolog_file = homologs_file) { # Convert the query (sample) row names from ensembl IDs to gene names to match what # the Azimuth reference uses # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L99-L104 @@ -41,7 +46,7 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil calcn <- as.data.frame(x = Seurat:::CalcN(object = query[["RNA"]])) colnames(x = calcn) <- paste( colnames(x = calcn), - NULL, # assay + assay, sep = "_" ) query <- AddMetaData( @@ -59,7 +64,7 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil object = query, pattern = "^MT-", col.name = "percent.mt", - assay = NULL + assay = assay ) } @@ -77,8 +82,9 @@ prepare_query <- function(query, reference_rownames, homolog_file = homologs_fil #' @param query The Seurat object which will undergo label transfer #' @param reference The reference dataset #' @param reference_dims Dimensions calculated from the reference dataset -#' @param refdata object used by Azimuth +#' @param refdata Annotation information used by TransferData #' @param reference_dims Number of dimensions to use in the anchor weighting procedure +#' @param query.assay name of assay in query to use #' @param k.weight Number of neighbors to consider when weighting anchors. This should be #' <=15 when running on OpenScPCA test data #' @param n.trees More trees gives higher precision when using annoy approximate @@ -95,6 +101,7 @@ transfer_labels <- function( reference, reference_dims, refdata, + query.assay = NULL, k.weight = 50, n.trees = 20, mapping.score.k = 80, @@ -108,7 +115,7 @@ transfer_labels <- function( k.filter = NA, reference.neighbors = "refdr.annoy.neighbors", reference.assay = "refAssay", - query.assay = NULL, + query.assay = query.assay, reference.reduction = "refDR", normalization.method = "SCT", features = rownames(Loadings(reference[["refDR"]])), @@ -123,7 +130,7 @@ transfer_labels <- function( query <- TransferData( reference = reference, query = query, - query.assay = NULL, + query.assay = query.assay, dims = 1:reference_dims, anchorset = anchors, refdata = refdata, From 90694c4120141d7b57fe36e49807c3f00b195fa3 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 1 Nov 2024 16:00:38 -0400 Subject: [PATCH 18/22] Update notebook using RNA as the query assay --- .../compare-label-transfer-approaches.Rmd | 46 +- .../compare-label-transfer-approaches.nb.html | 407 ++++++++++-------- 2 files changed, 261 insertions(+), 192 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd index e240bdb73..1e2bc5242 100644 --- a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd +++ b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.Rmd @@ -181,17 +181,25 @@ if (!file.exists(full_results_file)) { ) + # Perform label transfer with new code + assay <- "RNA" fetal_full <- srat_objects |> purrr::imap( \(srat, id) { - # Perform label transfer with new code set.seed(params$seed) - query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds")) + + query <- prepare_query( + srat, + rownames(full_reference), + assay, + file.path(module_base, "scratch", "homologs.rds") + ) query <- transfer_labels( query, full_reference, full_dims, - full_refdata + full_refdata, + query.assay = assay ) # Read in results from existing Azimuth label transfer code @@ -252,17 +260,25 @@ if (!file.exists(kidney_results_file)) { ) + # Perform label transfer with new code + assay <- "RNA" fetal_kidney <- srat_objects |> purrr::imap( \(srat, id) { - # Perform label transfer with new code set.seed(params$seed) - query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds")) + + query <- prepare_query( + srat, + rownames(kidney_reference), + assay, + file.path(module_base, "scratch", "homologs.rds") + ) query <- transfer_labels( query, kidney_reference, kidney_dims, - kidney_refdata + kidney_refdata, + query.assay = assay ) # Read in results from existing Azimuth label transfer code @@ -358,32 +374,22 @@ Additional notable differences are shown in tables below: | SCPSC000194 | Organ | 36 | Lung | Kidney | | SCPSC000205 | Organ | 56 | Kidney | Intestine | | SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells | -| SCPSC000208 | L1 | 101 | Intestinal epithelial cells | Metanephritic cells | +| SCPSC000208 | L1 | 75 | Intestinal epithelial cells | Metanephritic cells | | SCPSC000208 | Organ | 149 | Kidney | Intestine | ### Fetal kidney reference: - -- There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184. -This is the main discrepancy. -- Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table. +- Most of the cell type differences are not in the table below because they are not necessarily biologically meaningful for our purposes: - `kidney cell` vs `podocyte` - `kidney epithelial cell` vs `kidney cell` - `mesenchymal cell` vs `mesenchymal stem cell` -- There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor. -- Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. `kidney epithelial cell` vs `kidney cell`). | Sample | Reference | Count | Azimuth | Azimuth-adapted | |--------|-----------|-------|---------|-----------------| -| SCPSC000179 | cell type | 202 | mesenchymal cell | kidney epithelial cell | -| SCPSC000184 | compartment | 111 | fetal nephron | stroma | -| SCPSC000184 | cell type | 536 | kidney epithelial cell | mesenchymal cell | -| SCPSC000194 | compartment | 565 | fetal nephron | stroma | -| SCPSC000194 | cell type | 89 | kidney epithelial cell | mesenchymal cell | -| SCPSC000205 | compartment | 684 | fetal nephron | stroma | -| SCPSC000208 | compartment | 2111 | fetal nephron | stroma | +| SCPSC000179 | cell type | 94 | mesenchymal cell | kidney epithelial cell | +| SCPSC000205 | compartment | 52 | fetal nephron | stroma | ## Session Info diff --git a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html index a6b0d19c5..53e065d5c 100644 --- a/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html +++ b/analyses/cell-type-wilms-tumor-06/supplemental-notebooks/compare-label-transfer-approaches.nb.html @@ -2918,7 +2918,7 @@

Stephanie Spielman, Data Lab

Setup

- +
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
 options(future.globals.maxSize = 891289600000000)
 
@@ -2929,7 +2929,7 @@ 

Setup

}) repository_base <- rprojroot::find_root(rprojroot::is_git_root) -module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") +module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") result_dir <- file.path(module_base, "results") @@ -2949,50 +2949,52 @@

Setup

Functions

- +
# Make a heatmap of counts for label transfer strategies
 plot_count_heatmap <- function(df, title, sample_id) {
   all_preds <- union(df$azimuth, df$adapted_azimuth)
-  
+
   plotme <- data.frame(
-    azimuth = all_preds, 
+    azimuth = all_preds,
     adapted_azimuth = all_preds
-  ) |> 
+  ) |>
     expand(azimuth, adapted_azimuth) |>
     mutate(n = NA_integer_) |>
     anti_join(distinct(df)) |>
     bind_rows(
       df |> count(azimuth, adapted_azimuth)
-    ) |> 
+    ) |>
     arrange(azimuth) |>
     mutate(
       color = case_when(
-        is.na(n) ~ "white", 
+        is.na(n) ~ "white",
         n <= 20 ~ "grey90",
         n <= 50 ~ "lightblue",
-        n <= 100 ~ "cornflowerblue", 
+        n <= 100 ~ "cornflowerblue",
         n <= 500 ~ "red",
         n <= 1000 ~ "yellow2",
         .default = "yellow"
       )
     )
 
-    ggplot(plotme) + 
-      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
-      geom_tile(alpha = 0.5) + 
-      geom_abline(color = "firebrick", alpha = 0.5) +
-      geom_text(size = 3.5) + 
-      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
-      scale_fill_identity() +
-      theme_bw() + 
-      theme(axis.text.y = element_text(size = 7), 
-            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
-            legend.position = "bottom", 
-            legend.title = element_text(size = 9), 
-            legend.text = element_text(size = 8)) +
-      labs(
-        title = glue::glue("{sample_id}: {str_to_title(title)}")
-      )
+  ggplot(plotme) +
+    aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) +
+    geom_tile(alpha = 0.5) +
+    geom_abline(color = "firebrick", alpha = 0.5) +
+    geom_text(size = 3.5) +
+    # scale_fill_viridis_c(name = "count", na.value = "grey90") +
+    scale_fill_identity() +
+    theme_bw() +
+    theme(
+      axis.text.y = element_text(size = 7),
+      axis.text.x = element_text(angle = 30, size = 7, hjust = 1),
+      legend.position = "bottom",
+      legend.title = element_text(size = 9),
+      legend.text = element_text(size = 8)
+    ) +
+    labs(
+      title = glue::glue("{sample_id}: {str_to_title(title)}")
+    )
 }
 
 
@@ -3001,14 +3003,13 @@ 

Functions

# - heatmap comparing counts for cell labels between approaches # - density plot of annotation scores for labels that agree and disagree between approaches compare <- function(df, compare_column, score_column, title) { - spread_df <- df |> - select({{compare_column}}, barcode, version) |> - pivot_wider(names_from = version, values_from = {{compare_column}}) + select({{ compare_column }}, barcode, version) |> + pivot_wider(names_from = version, values_from = {{ compare_column }}) + + + heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id)) - - heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id)) - disagree_barcodes <- spread_df |> filter(azimuth != adapted_azimuth) |> pull(barcode) @@ -3017,11 +3018,11 @@

Functions

mutate( agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"), agree = fct_relevel(agree, "labels disagree", "labels agree") - ) - - density_plot <- ggplot(df2) + - aes(x = {{score_column}}, fill = agree) + - geom_density(alpha = 0.6) + + ) + + density_plot <- ggplot(df2) + + aes(x = {{ score_column }}, fill = agree) + + geom_density(alpha = 0.6) + theme_bw() + ggtitle( glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}") @@ -3029,7 +3030,6 @@

Functions

theme(legend.position = "bottom") print(heatmap + density_plot + plot_layout(widths = c(2, 1))) - }
@@ -3046,7 +3046,7 @@

Label transfer

regenerating results.

- +
# sample ids to process
 sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")
 
@@ -3056,12 +3056,13 @@ 

Label transfer

purrr::map( \(id) { srat <- readRDS( - file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds") - )) + file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")) + ) DefaultAssay(srat) <- "RNA" - + return(srat) - }) + } + ) names(srat_objects) <- sample_ids }
@@ -3071,12 +3072,11 @@

Label transfer

Label transfer for fetal full

- +
if (!file.exists(full_results_file)) {
-  
   # read reference
   ref <- readRDS(file.path(
-  module_base,
+    module_base,
     "results",
     "references",
     "cao_formatted_ref.rds"
@@ -3089,31 +3089,39 @@ 

Label transfer for fetal full

glue::glue("predicted.{ref$annotation_levels}.score") ) - + + # Perform label transfer with new code + assay <- "RNA" fetal_full <- srat_objects |> purrr::imap( \(srat, id) { - # Perform label transfer with new code set.seed(params$seed) - query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds")) + + query <- prepare_query( + srat, + rownames(full_reference), + assay, + file.path(module_base, "scratch", "homologs.rds") + ) query <- transfer_labels( query, full_reference, full_dims, - full_refdata + full_refdata, + query.assay = assay ) - + # Read in results from existing Azimuth label transfer code srat_02a <- readRDS( file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds")) ) - + # create final data frame with all annotations - query@meta.data[, full_annotation_columns] |> + query@meta.data[, full_annotation_columns] |> tibble::rownames_to_column(var = "barcode") |> mutate( - sample_id = id, + sample_id = id, version = "adapted_azimuth" ) |> # existing results @@ -3121,14 +3129,14 @@

Label transfer for fetal full

data.frame( sample_id = id, barcode = colnames(srat_02a), - version = "azimuth", - predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, + version = "azimuth", + predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score, - predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, + predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score, - predicted.organ = srat_02a$fetal_full_predicted.organ, + predicted.organ = srat_02a$fetal_full_predicted.organ, predicted.organ.score = srat_02a$fetal_full_predicted.organ.score - ) + ) ) } ) @@ -3144,10 +3152,8 @@

Label transfer for fetal full

Label transfer for fetal kidney

- +
if (!file.exists(kidney_results_file)) {
-  
-  
   # read reference
   ref <- readRDS(file.path(
     module_base,
@@ -3155,7 +3161,7 @@ 

Label transfer for fetal kidney

"references", "stewart_formatted_ref.rds" )) - + # Pull out information from the reference object we need for label transfer kidney_reference <- ref$reference kidney_refdata <- ref$refdata @@ -3164,32 +3170,39 @@

Label transfer for fetal kidney

glue::glue("predicted.{ref$annotation_levels}"), glue::glue("predicted.{ref$annotation_levels}.score") ) - - + + + # Perform label transfer with new code + assay <- "RNA" fetal_kidney <- srat_objects |> purrr::imap( \(srat, id) { - - # Perform label transfer with new code set.seed(params$seed) - query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds")) + + query <- prepare_query( + srat, + rownames(kidney_reference), + assay, + file.path(module_base, "scratch", "homologs.rds") + ) query <- transfer_labels( query, kidney_reference, kidney_dims, - kidney_refdata + kidney_refdata, + query.assay = assay ) - + # Read in results from existing Azimuth label transfer code srat_02b <- readRDS( file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds")) ) - + # create final data frame with all annotations - query@meta.data[, kidney_annotation_columns] |> + query@meta.data[, kidney_annotation_columns] |> tibble::rownames_to_column(var = "barcode") |> mutate( - sample_id = id, + sample_id = id, version = "adapted_azimuth" ) |> # existing results @@ -3197,12 +3210,12 @@

Label transfer for fetal kidney

data.frame( sample_id = id, barcode = colnames(srat_02b), - version = "azimuth", - predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, + version = "azimuth", + predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score, - predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, + predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score - ) + ) ) } ) @@ -3237,34 +3250,34 @@

Fetal full reference

)
-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

@@ -3283,34 +3296,34 @@

Fetal kidney reference

)
-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

@@ -3406,7 +3419,7 @@

Fetal full reference:

SCPSC000208 L1 -101 +75 Intestinal epithelial cells Metanephritic cells @@ -3423,25 +3436,14 @@

Fetal full reference:

Fetal kidney reference:

    -
  • There are a small but notable number of cells flipped between -mesenchyme and kidney cells, in particular for sample SCPSC000184. This -is the main discrepancy.
  • -
  • Most of the cell type differences are not necessarily biologically -meaningful for our purposes, as listed below. These are not noted in the -table. +
  • Most of the cell type differences are not in the table below because +they are not necessarily biologically meaningful for our purposes:
    • kidney cell vs podocyte
    • kidney epithelial cell vs kidney cell
    • mesenchymal cell vs mesenchymal stem cell
  • -
  • There are a decent number of times when stroma and fetal nephron are -flipped, but this makes sense given that we expect many of the stroma -may be tumor.
  • -
  • Disagreeing annotation scores when using the cell type reference -were often higher, but many of the disagreements for this reference were -not meaningful (e.g. kidney epithelial cell vs -kidney cell).
@@ -3464,49 +3466,14 @@

Fetal kidney reference:

- + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -3521,7 +3488,7 @@

Session Info

sessionInfo()
- +
R version 4.4.1 (2024-06-14)
 Platform: aarch64-apple-darwin20
 Running under: macOS 15.1
@@ -3537,38 +3504,134 @@ 

Session Info

tzcode source: internal attached base packages: -[1] stats graphics grDevices datasets utils methods base +[1] stats graphics grDevices datasets +[5] utils methods base other attached packages: - [1] Seurat_5.1.0 SeuratObject_5.0.2 sp_2.1-4 patchwork_1.2.0 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 - [8] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 + [1] Seurat_5.1.0 SeuratObject_5.0.2 + [3] sp_2.1-4 patchwork_1.2.0 + [5] lubridate_1.9.3 forcats_1.0.0 + [7] stringr_1.5.1 dplyr_1.1.4 + [9] purrr_1.0.2 readr_2.1.5 +[11] tidyr_1.3.1 tibble_3.2.1 +[13] ggplot2_3.5.1 tidyverse_2.0.0 loaded via a namespace (and not attached): - [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8 magrittr_2.0.3 spatstat.utils_3.1-0 farver_2.1.2 - [7] rmarkdown_2.28 vctrs_0.6.5 ROCR_1.0-11 spatstat.explore_3.3-2 htmltools_0.5.8.1 sass_0.4.9 - [13] sctransform_0.4.1 parallelly_1.38.0 KernSmooth_2.23-24 bslib_0.8.0 htmlwidgets_1.6.4 ica_1.0-3 - [19] plyr_1.8.9 plotly_4.10.4 zoo_1.8-12 cachem_1.1.0 igraph_2.0.3 mime_0.12 - [25] lifecycle_1.0.4 pkgconfig_2.0.3 Matrix_1.7-0 R6_2.5.1 fastmap_1.2.0 fitdistrplus_1.2-1 - [31] future_1.34.0 shiny_1.9.1 digest_0.6.37 colorspace_2.1-1 rprojroot_2.0.4 tensor_1.5 - [37] RSpectra_0.16-2 irlba_2.3.5.1 labeling_0.4.3 progressr_0.14.0 fansi_1.0.6 spatstat.sparse_3.1-0 - [43] timechange_0.3.0 httr_1.4.7 polyclip_1.10-7 abind_1.4-5 compiler_4.4.1 withr_3.0.1 - [49] fastDummies_1.7.4 MASS_7.3-61 tools_4.4.1 lmtest_0.9-40 httpuv_1.6.15 future.apply_1.11.2 - [55] goftest_1.2-3 glue_1.7.0 nlme_3.1-166 promises_1.3.0 grid_4.4.1 Rtsne_0.17 - [61] cluster_2.1.6 reshape2_1.4.4 generics_0.1.3 gtable_0.3.5 spatstat.data_3.1-2 tzdb_0.4.0 - [67] data.table_1.16.0 hms_1.1.3 utf8_1.2.4 spatstat.geom_3.3-2 RcppAnnoy_0.0.22 ggrepel_0.9.5 - [73] RANN_2.6.2 pillar_1.9.0 spam_2.10-0 RcppHNSW_0.6.0 later_1.3.2 splines_4.4.1 - [79] lattice_0.22-6 renv_1.0.7 survival_3.7-0 deldir_2.0-4 tidyselect_1.2.1 miniUI_0.1.1.1 - [85] pbapply_1.7-2 knitr_1.48 gridExtra_2.3 scattermore_1.2 xfun_0.47 matrixStats_1.3.0 - [91] stringi_1.8.4 lazyeval_0.2.2 yaml_2.3.10 evaluate_0.24.0 codetools_0.2-20 BiocManager_1.30.25 - [97] cli_3.6.3 uwot_0.2.2 xtable_1.8-4 reticulate_1.38.0 munsell_0.5.1 jquerylib_0.1.4 -[103] Rcpp_1.0.13 globals_0.16.3 spatstat.random_3.3-1 png_0.1-8 spatstat.univar_3.0-0 parallel_4.4.1 -[109] dotCall64_1.1-1 listenv_0.9.1 viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.6 leiden_0.4.3.1 -[115] rlang_1.1.4 cowplot_1.1.3
+ [1] deldir_2.0-4 + [2] pbapply_1.7-2 + [3] gridExtra_2.3 + [4] rlang_1.1.4 + [5] magrittr_2.0.3 + [6] RcppAnnoy_0.0.22 + [7] spatstat.geom_3.3-2 + [8] matrixStats_1.3.0 + [9] ggridges_0.5.6 + [10] compiler_4.4.1 + [11] png_0.1-8 + [12] vctrs_0.6.5 + [13] reshape2_1.4.4 + [14] pkgconfig_2.0.3 + [15] fastmap_1.2.0 + [16] labeling_0.4.3 + [17] utf8_1.2.4 + [18] promises_1.3.0 + [19] tzdb_0.4.0 + [20] xfun_0.47 + [21] jsonlite_1.8.8 + [22] goftest_1.2-3 + [23] later_1.3.2 + [24] spatstat.utils_3.1-0 + [25] irlba_2.3.5.1 + [26] parallel_4.4.1 + [27] cluster_2.1.6 + [28] R6_2.5.1 + [29] ica_1.0-3 + [30] spatstat.data_3.1-2 + [31] stringi_1.8.4 + [32] RColorBrewer_1.1-3 + [33] reticulate_1.38.0 + [34] spatstat.univar_3.0-0 + [35] parallelly_1.38.0 + [36] lmtest_0.9-40 + [37] scattermore_1.2 + [38] Rcpp_1.0.13 + [39] knitr_1.48 + [40] tensor_1.5 + [41] future.apply_1.11.2 + [42] zoo_1.8-12 + [43] sctransform_0.4.1 + [44] httpuv_1.6.15 + [45] Matrix_1.7-0 + [46] splines_4.4.1 + [47] igraph_2.0.3 + [48] timechange_0.3.0 + [49] tidyselect_1.2.1 + [50] abind_1.4-5 + [51] rstudioapi_0.16.0 + [52] yaml_2.3.10 + [53] spatstat.random_3.3-1 + [54] spatstat.explore_3.3-2 + [55] codetools_0.2-20 + [56] miniUI_0.1.1.1 + [57] listenv_0.9.1 + [58] lattice_0.22-6 + [59] plyr_1.8.9 + [60] shiny_1.9.1 + [61] withr_3.0.1 + [62] ROCR_1.0-11 + [63] Rtsne_0.17 + [64] future_1.34.0 + [65] fastDummies_1.7.4 + [66] survival_3.7-0 + [67] polyclip_1.10-7 + [68] fitdistrplus_1.2-1 + [69] pillar_1.9.0 + [70] BiocManager_1.30.25 + [71] KernSmooth_2.23-24 + [72] renv_1.0.7 + [73] plotly_4.10.4 + [74] generics_0.1.3 + [75] rprojroot_2.0.4 + [76] RcppHNSW_0.6.0 + [77] hms_1.1.3 + [78] munsell_0.5.1 + [79] scales_1.3.0 + [80] globals_0.16.3 + [81] xtable_1.8-4 + [82] glue_1.7.0 + [83] lazyeval_0.2.2 + [84] tools_4.4.1 + [85] data.table_1.16.0 + [86] RSpectra_0.16-2 + [87] RANN_2.6.2 + [88] leiden_0.4.3.1 + [89] dotCall64_1.1-1 + [90] cowplot_1.1.3 + [91] grid_4.4.1 + [92] colorspace_2.1-1 + [93] nlme_3.1-166 + [94] cli_3.6.3 + [95] spatstat.sparse_3.1-0 + [96] spam_2.10-0 + [97] fansi_1.0.6 + [98] viridisLite_0.4.2 + [99] uwot_0.2.2 +[100] gtable_0.3.5 +[101] digest_0.6.37 +[102] progressr_0.14.0 +[103] ggrepel_0.9.5 +[104] farver_2.1.2 +[105] htmlwidgets_1.6.4 +[106] htmltools_0.5.8.1 +[107] lifecycle_1.0.4 +[108] httr_1.4.7 +[109] mime_0.12 +[110] MASS_7.3-61 -
---
title: "Compare label transfer results between Azimuth and Azimuth-adapted strategy"
author: Stephanie Spielman, Data Lab
output: 
  html_notebook: 
    toc: yes
    toc_float: yes
params:
  seed: 12345
---


The goal of this notebook is to compare label transfer results between:

- Label transfer code with Azimuth currently in `main` at commit `6af112d`. These results are referred to as `"azimuth"`.
- Label transfer code adapted from Azimuth. These results are referred to as `"adapted_azimuth"`.


## Setup

```{r setup}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(future.globals.maxSize = 891289600000000)

suppressPackageStartupMessages({
  library(tidyverse)
  library(patchwork)
  library(Seurat)
})

repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")  
result_dir <- file.path(module_base, "results")


# functions to perform label transfer with azimuth-adapted approach
source(
  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
)

# Output files
full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")
```

## Functions

```{r functions}
# Make a heatmap of counts for label transfer strategies
plot_count_heatmap <- function(df, title, sample_id) {
  all_preds <- union(df$azimuth, df$adapted_azimuth)
  
  plotme <- data.frame(
    azimuth = all_preds, 
    adapted_azimuth = all_preds
  ) |> 
    expand(azimuth, adapted_azimuth) |>
    mutate(n = NA_integer_) |>
    anti_join(distinct(df)) |>
    bind_rows(
      df |> count(azimuth, adapted_azimuth)
    ) |> 
    arrange(azimuth) |>
    mutate(
      color = case_when(
        is.na(n) ~ "white", 
        n <= 20 ~ "grey90",
        n <= 50 ~ "lightblue",
        n <= 100 ~ "cornflowerblue", 
        n <= 500 ~ "red",
        n <= 1000 ~ "yellow2",
        .default = "yellow"
      )
    )

    ggplot(plotme) + 
      aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) + 
      geom_tile(alpha = 0.5) + 
      geom_abline(color = "firebrick", alpha = 0.5) +
      geom_text(size = 3.5) + 
      #scale_fill_viridis_c(name = "count", na.value = "grey90") +
      scale_fill_identity() +
      theme_bw() + 
      theme(axis.text.y = element_text(size = 7), 
            axis.text.x = element_text(angle = 30, size = 7, hjust=1), 
            legend.position = "bottom", 
            legend.title = element_text(size = 9), 
            legend.text = element_text(size = 8)) +
      labs(
        title = glue::glue("{sample_id}: {str_to_title(title)}")
      )
}


# Wrapper function to compare results between approaches
# Makes two plots:
# - heatmap comparing counts for cell labels between approaches
# - density plot of annotation scores for labels that agree and disagree between approaches
compare <- function(df, compare_column, score_column, title) {
  
  spread_df <- df |>
    select({{compare_column}}, barcode, version) |>
    pivot_wider(names_from = version, values_from = {{compare_column}})

  
  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))  
  
  disagree_barcodes <- spread_df |>
    filter(azimuth != adapted_azimuth) |>
    pull(barcode)

  df2 <- df |>
    mutate(
      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
      agree = fct_relevel(agree, "labels disagree", "labels agree")
    ) 
  
  density_plot <- ggplot(df2) + 
    aes(x = {{score_column}}, fill = agree) + 
    geom_density(alpha = 0.6) + 
    theme_bw() +
    ggtitle(
      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
    ) +
    theme(legend.position = "bottom")

  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))

}
```


## Label transfer

This section both:

- Reads in existing Azimuth label transfer results
- Performs label transfer with Azimuth-adapted approach

If results are already available, we read in the files rather than regenerating results.

```{r}
# sample ids to process
sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")

# read in seurat input objects, as needed
if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
  srat_objects <- sample_ids |>
    purrr::map(
      \(id) {
        srat <- readRDS(
          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds")
        ))
        DefaultAssay(srat) <- "RNA"
        
        return(srat)
    })
  names(srat_objects) <- sample_ids
}
```


### Label transfer for fetal full

```{r}
if (!file.exists(full_results_file)) {
  
  # read reference
  ref <- readRDS(file.path(
  module_base,
    "results",
    "references",
    "cao_formatted_ref.rds"
  ))
  full_reference <- ref$reference
  full_refdata <- ref$refdata
  full_dims <- ref$dims
  full_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )

  
  fetal_full <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(full_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          full_reference,
          full_dims,
          full_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02a <- readRDS(
          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, full_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02a),
              version = "azimuth", 
              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1, 
              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2, 
              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
              predicted.organ = srat_02a$fetal_full_predicted.organ, 
              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
            ) 
          )
      }
    )
  write_rds(fetal_full, full_results_file)
} else {
  fetal_full <- read_rds(full_results_file)
}
```


### Label transfer for fetal kidney


```{r}
if (!file.exists(kidney_results_file)) {
  
  
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "stewart_formatted_ref.rds"
  ))
  
  # Pull out information from the reference object we need for label transfer
  kidney_reference <- ref$reference
  kidney_refdata <- ref$refdata
  kidney_dims <- ref$dims
  kidney_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )
  
  
  fetal_kidney <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        # Perform label transfer with new code
        set.seed(params$seed)
        query <- prepare_query(srat, rownames(kidney_reference), file.path(module_base, "scratch", "homologs.rds"))
        query <- transfer_labels(
          query,
          kidney_reference,
          kidney_dims,
          kidney_refdata
        )
        
        # Read in results from existing Azimuth label transfer code
        srat_02b <- readRDS(
          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
        )
        
        # create final data frame with all annotations
        query@meta.data[, kidney_annotation_columns] |> 
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id, 
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02b),
              version = "azimuth", 
              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment, 
              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type, 
              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
            ) 
          )
      }
    )

  write_rds(fetal_kidney, kidney_results_file)
} else {
  fetal_kidney <- read_rds(kidney_results_file)
}
```


## Compare results

We expect:
- The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal
- Any annotations that disagree should have low scores


### Fetal full reference

Note that results from the L2 reference are not plotted because they are not used in cell type annotation.


```{r fig.height=8, fig.width=14}
fetal_full |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
      compare(dat, predicted.organ, predicted.organ.score, "organ")
    }
  )
```


### Fetal kidney reference

```{r fig.height=8, fig.width=14}
fetal_kidney |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
    }
  )
```



## Conclusions

The vast majority of the time, labels agree. 
Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected.
 
Additional notable differences are shown in tables below:
    
### Fetal full reference:

- The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial
- Some other kidney-related differences are noted:

| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | L1 | 70 | Metanephritic cells | Intestinal epithelial cells | 
| SCPSC000179 | Organ | 64 | Kidney | Intestine | 
| SCPSC000179 | Organ | 20 | Lung | Kidney | 
| SCPSC000194 | L1 | 60 | Stromal cells | Mesangial cells | 
| SCPSC000194 | Organ | 35 | Kidney | Intestine | 
| SCPSC000194 | Organ | 36 | Lung | Kidney | 
| SCPSC000205 | Organ | 56 | Kidney | Intestine |
| SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells |  
| SCPSC000208 | L1 | 101 | Intestinal epithelial cells | Metanephritic cells |  
| SCPSC000208 | Organ | 149 | Kidney | Intestine | 


### Fetal kidney reference:


- There are a small but notable number of cells flipped between mesenchyme and kidney cells, in particular for sample SCPSC000184.
This is the main discrepancy.
- Most of the cell type differences are not necessarily biologically meaningful for our purposes, as listed below. These are not noted in the table.
   - `kidney cell` vs `podocyte`
   - `kidney epithelial cell` vs `kidney cell` 
   - `mesenchymal cell` vs `mesenchymal stem cell` 
- There are a decent number of times when stroma and fetal nephron are flipped, but this makes sense given that we expect many of the stroma may be tumor.
- Disagreeing annotation scores when using the cell type reference were often higher, but many of the disagreements for this reference were not meaningful (e.g. `kidney epithelial cell` vs `kidney cell`).


| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | cell type | 202 | mesenchymal cell | kidney epithelial cell | 
| SCPSC000184 | compartment | 111 | fetal nephron |  stroma | 
| SCPSC000184 | cell type | 536 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000194 | compartment | 565 | fetal nephron |  stroma | 
| SCPSC000194 | cell type  | 89 | kidney epithelial cell | mesenchymal cell | 
| SCPSC000205 | compartment  | 684 | fetal nephron |  stroma | 
| SCPSC000208 | compartment  | 2111 | fetal nephron |  stroma | 


## Session Info

```{r}
sessionInfo()
```

+
---
title: "Compare label transfer results between Azimuth and Azimuth-adapted strategy"
author: Stephanie Spielman, Data Lab
output: 
  html_notebook: 
    toc: yes
    toc_float: yes
params:
  seed: 12345
---


The goal of this notebook is to compare label transfer results between:

- Label transfer code with Azimuth currently in `main` at commit `6af112d`. These results are referred to as `"azimuth"`.
- Label transfer code adapted from Azimuth. These results are referred to as `"adapted_azimuth"`.


## Setup

```{r setup}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
options(future.globals.maxSize = 891289600000000)

suppressPackageStartupMessages({
  library(tidyverse)
  library(patchwork)
  library(Seurat)
})

repository_base <- rprojroot::find_root(rprojroot::is_git_root)
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")
result_dir <- file.path(module_base, "results")


# functions to perform label transfer with azimuth-adapted approach
source(
  file.path(module_base, "notebook_template", "utils", "label-transfer-functions.R")
)

# Output files
full_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-full.rds")
kidney_results_file <- file.path(module_base, "scratch", "compare-label-transfer_fetal-kidney.rds")
```

## Functions

```{r functions}
# Make a heatmap of counts for label transfer strategies
plot_count_heatmap <- function(df, title, sample_id) {
  all_preds <- union(df$azimuth, df$adapted_azimuth)

  plotme <- data.frame(
    azimuth = all_preds,
    adapted_azimuth = all_preds
  ) |>
    expand(azimuth, adapted_azimuth) |>
    mutate(n = NA_integer_) |>
    anti_join(distinct(df)) |>
    bind_rows(
      df |> count(azimuth, adapted_azimuth)
    ) |>
    arrange(azimuth) |>
    mutate(
      color = case_when(
        is.na(n) ~ "white",
        n <= 20 ~ "grey90",
        n <= 50 ~ "lightblue",
        n <= 100 ~ "cornflowerblue",
        n <= 500 ~ "red",
        n <= 1000 ~ "yellow2",
        .default = "yellow"
      )
    )

  ggplot(plotme) +
    aes(x = azimuth, y = adapted_azimuth, fill = color, label = n) +
    geom_tile(alpha = 0.5) +
    geom_abline(color = "firebrick", alpha = 0.5) +
    geom_text(size = 3.5) +
    # scale_fill_viridis_c(name = "count", na.value = "grey90") +
    scale_fill_identity() +
    theme_bw() +
    theme(
      axis.text.y = element_text(size = 7),
      axis.text.x = element_text(angle = 30, size = 7, hjust = 1),
      legend.position = "bottom",
      legend.title = element_text(size = 9),
      legend.text = element_text(size = 8)
    ) +
    labs(
      title = glue::glue("{sample_id}: {str_to_title(title)}")
    )
}


# Wrapper function to compare results between approaches
# Makes two plots:
# - heatmap comparing counts for cell labels between approaches
# - density plot of annotation scores for labels that agree and disagree between approaches
compare <- function(df, compare_column, score_column, title) {
  spread_df <- df |>
    select({{ compare_column }}, barcode, version) |>
    pivot_wider(names_from = version, values_from = {{ compare_column }})


  heatmap <- plot_count_heatmap(spread_df, title, unique(df$sample_id))

  disagree_barcodes <- spread_df |>
    filter(azimuth != adapted_azimuth) |>
    pull(barcode)

  df2 <- df |>
    mutate(
      agree = ifelse(barcode %in% disagree_barcodes, "labels disagree", "labels agree"),
      agree = fct_relevel(agree, "labels disagree", "labels agree")
    )

  density_plot <- ggplot(df2) +
    aes(x = {{ score_column }}, fill = agree) +
    geom_density(alpha = 0.6) +
    theme_bw() +
    ggtitle(
      glue::glue("Disagree count: {length(disagree_barcodes)} out of {nrow(spread_df)}")
    ) +
    theme(legend.position = "bottom")

  print(heatmap + density_plot + plot_layout(widths = c(2, 1)))
}
```


## Label transfer

This section both:

- Reads in existing Azimuth label transfer results
- Performs label transfer with Azimuth-adapted approach

If results are already available, we read in the files rather than regenerating results.

```{r}
# sample ids to process
sample_ids <- c("SCPCS000179", "SCPCS000184", "SCPCS000194", "SCPCS000205", "SCPCS000208")

# read in seurat input objects, as needed
if ((!file.exists(full_results_file)) || (!file.exists(kidney_results_file))) {
  srat_objects <- sample_ids |>
    purrr::map(
      \(id) {
        srat <- readRDS(
          file.path(result_dir, id, glue::glue("01-Seurat_{id}.Rds"))
        )
        DefaultAssay(srat) <- "RNA"

        return(srat)
      }
    )
  names(srat_objects) <- sample_ids
}
```


### Label transfer for fetal full

```{r}
if (!file.exists(full_results_file)) {
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "cao_formatted_ref.rds"
  ))
  full_reference <- ref$reference
  full_refdata <- ref$refdata
  full_dims <- ref$dims
  full_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )


  # Perform label transfer with new code
  assay <- "RNA"
  fetal_full <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        
        set.seed(params$seed)

        query <- prepare_query(
          srat, 
          rownames(full_reference), 
          assay, 
          file.path(module_base, "scratch", "homologs.rds")
        )
        query <- transfer_labels(
          query,
          full_reference,
          full_dims,
          full_refdata, 
          query.assay = assay
        )

        # Read in results from existing Azimuth label transfer code
        srat_02a <- readRDS(
          file.path(result_dir, id, glue::glue("02a-fetal_full_label-transfer_{id}.Rds"))
        )

        # create final data frame with all annotations
        query@meta.data[, full_annotation_columns] |>
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id,
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02a),
              version = "azimuth",
              predicted.annotation.l1 = srat_02a$fetal_full_predicted.annotation.l1,
              predicted.annotation.l1.score = srat_02a$fetal_full_predicted.annotation.l1.score,
              predicted.annotation.l2 = srat_02a$fetal_full_predicted.annotation.l2,
              predicted.annotation.l2.score = srat_02a$fetal_full_predicted.annotation.l2.score,
              predicted.organ = srat_02a$fetal_full_predicted.organ,
              predicted.organ.score = srat_02a$fetal_full_predicted.organ.score
            )
          )
      }
    )
  write_rds(fetal_full, full_results_file)
} else {
  fetal_full <- read_rds(full_results_file)
}
```


### Label transfer for fetal kidney


```{r}
if (!file.exists(kidney_results_file)) {
  # read reference
  ref <- readRDS(file.path(
    module_base,
    "results",
    "references",
    "stewart_formatted_ref.rds"
  ))

  # Pull out information from the reference object we need for label transfer
  kidney_reference <- ref$reference
  kidney_refdata <- ref$refdata
  kidney_dims <- ref$dims
  kidney_annotation_columns <- c(
    glue::glue("predicted.{ref$annotation_levels}"),
    glue::glue("predicted.{ref$annotation_levels}.score")
  )


  # Perform label transfer with new code
  assay <- "RNA"
  fetal_kidney <- srat_objects |>
    purrr::imap(
      \(srat, id) {
        set.seed(params$seed)

        query <- prepare_query(
          srat, 
          rownames(kidney_reference), 
          assay, 
          file.path(module_base, "scratch", "homologs.rds")
        )
        query <- transfer_labels(
          query,
          kidney_reference,
          kidney_dims,
          kidney_refdata,
          query.assay = assay
        )

        # Read in results from existing Azimuth label transfer code
        srat_02b <- readRDS(
          file.path(result_dir, id, glue::glue("02b-fetal_kidney_label-transfer_{id}.Rds"))
        )

        # create final data frame with all annotations
        query@meta.data[, kidney_annotation_columns] |>
          tibble::rownames_to_column(var = "barcode") |>
          mutate(
            sample_id = id,
            version = "adapted_azimuth"
          ) |>
          # existing results
          bind_rows(
            data.frame(
              sample_id = id,
              barcode = colnames(srat_02b),
              version = "azimuth",
              predicted.compartment = srat_02b$fetal_kidney_predicted.compartment,
              predicted.compartment.score = srat_02b$fetal_kidney_predicted.compartment.score,
              predicted.cell_type = srat_02b$fetal_kidney_predicted.cell_type,
              predicted.cell_type.score = srat_02b$fetal_kidney_predicted.cell_type.score
            )
          )
      }
    )

  write_rds(fetal_kidney, kidney_results_file)
} else {
  fetal_kidney <- read_rds(kidney_results_file)
}
```


## Compare results

We expect:
- The majority of annotations match between approaches, with heatmap counts primarily falling along the diagonal
- Any annotations that disagree should have low scores


### Fetal full reference

Note that results from the L2 reference are not plotted because they are not used in cell type annotation.


```{r fig.height=8, fig.width=14}
fetal_full |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.annotation.l1, predicted.annotation.l1.score, "l1")
      compare(dat, predicted.organ, predicted.organ.score, "organ")
    }
  )
```


### Fetal kidney reference

```{r fig.height=8, fig.width=14}
fetal_kidney |>
  purrr::walk(
    \(dat) {
      compare(dat, predicted.compartment, predicted.compartment.score, "compartment")
      compare(dat, predicted.cell_type, predicted.cell_type.score, "cell_type")
    }
  )
```



## Conclusions

The vast majority of the time, labels agree. 
Generally speaking, when labels do not agree, their annotation scores are much lower, which is as expected.
 
Additional notable differences are shown in tables below:
    
### Fetal full reference:

- The Azimuth-adapted approach occasionally calls kidney or kidney-related cells as intestine or intestine epithelial
- Some other kidney-related differences are noted:

| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | L1 | 70 | Metanephritic cells | Intestinal epithelial cells | 
| SCPSC000179 | Organ | 64 | Kidney | Intestine | 
| SCPSC000179 | Organ | 20 | Lung | Kidney | 
| SCPSC000194 | L1 | 60 | Stromal cells | Mesangial cells | 
| SCPSC000194 | Organ | 35 | Kidney | Intestine | 
| SCPSC000194 | Organ | 36 | Lung | Kidney | 
| SCPSC000205 | Organ | 56 | Kidney | Intestine |
| SCPSC000208 | L1 | 101 | Mesangial cells | Metanephritic cells |  
| SCPSC000208 | L1 | 75 | Intestinal epithelial cells | Metanephritic cells |  
| SCPSC000208 | Organ | 149 | Kidney | Intestine | 


### Fetal kidney reference:

- Most of the cell type differences are not in the table below because they are not necessarily biologically meaningful for our purposes:
   - `kidney cell` vs `podocyte`
   - `kidney epithelial cell` vs `kidney cell` 
   - `mesenchymal cell` vs `mesenchymal stem cell` 


| Sample | Reference | Count | Azimuth | Azimuth-adapted |
|--------|-----------|-------|---------|-----------------|
| SCPSC000179 | cell type | 94 | mesenchymal cell | kidney epithelial cell | 
| SCPSC000205 | compartment  | 52 | fetal nephron |  stroma | 


## Session Info

```{r}
sessionInfo()
```

From 69a8a7a3f72709cf8202ccdcd6c293359e810739 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 1 Nov 2024 16:03:16 -0400 Subject: [PATCH 19/22] Specify RNA assay in the actual label transfer notebooks --- ...label-transfer_fetal_full_reference_Cao.Rmd | 11 +++++++++-- ...transfer_fetal_kidney_reference_Stewart.Rmd | 18 ++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd index 8d3140341..391dd7782 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd @@ -154,12 +154,18 @@ output_dir <- file.path(module_base, "results", params$sample_id) ```{r load, message=FALSE, warning=FALSE} # open the processed rds object srat <- readRDS(file.path(data_dir, paste0("01-Seurat_", params$sample_id, ".Rds"))) +srat_assay <- "RNA" # prepare the query for label transfer # we don't want to overwrite the srat object since `prepare_query` # removes features that are not present in the reference -DefaultAssay(srat) <- "RNA" -query <- prepare_query(srat, rownames(reference), params$homologs_file) +DefaultAssay(srat) <- srat_assay +query <- prepare_query( + srat, + rownames(reference), + srat_assay, + params$homologs_file +) ``` @@ -179,6 +185,7 @@ query_labeled <- transfer_labels( reference, dims, refdata, + query.assay = srat_assay, k.weight = k.weight ) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd index abe006e64..e14420898 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd @@ -156,10 +156,18 @@ output_dir <- file.path(module_base, "results", params$sample_id) ```{r load, message=FALSE, warning=FALSE} # open the processed rds object srat <- readRDS(file.path(data_dir, paste0("02a-fetal_full_label-transfer_", params$sample_id, ".Rds"))) +srat_assay <- "RNA" # prepare the query for label transfer -DefaultAssay(srat) <- "RNA" -srat <- prepare_query(srat, rownames(reference), params$homologs_file) +# we don't want to overwrite the srat object since `prepare_query` +# removes features that are not present in the reference +DefaultAssay(srat) <- srat_assay +query <- prepare_query( + srat, + rownames(reference), + srat_assay, + params$homologs_file +) ``` @@ -174,14 +182,16 @@ if (params$testing) { } else { k.weight <- 50 # Azimuth default } -s <- transfer_labels( - srat, +query_labeled <- transfer_labels( + query, reference, dims, refdata, + query.assay = srat_assay, k.weight = k.weight ) + # We transfer the annotation to the pre-processed `Seurat` object as we don't want to keep changes done on the query by Azimuth annotation_columns <- c( glue::glue("predicted.{annotation_levels}"), From 6b3d7ed724c27cabc5c6c161de0c8df97273ec9a Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Fri, 1 Nov 2024 16:20:59 -0400 Subject: [PATCH 20/22] why was s still there? and fix a typo --- .../02a_label-transfer_fetal_full_reference_Cao.Rmd | 4 ++-- .../02b_label-transfer_fetal_kidney_reference_Stewart.Rmd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd index 391dd7782..d3683978d 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02a_label-transfer_fetal_full_reference_Cao.Rmd @@ -194,9 +194,9 @@ annotation_columns <- c( glue::glue("predicted.{annotation_levels}"), glue::glue("predicted.{annotation_levels}.score") ) -metadata_to_trasfer <- query_labeled@meta.data[, annotation_columns] +metadata_to_transfer <- query_labeled@meta.data[, annotation_columns] -srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_full_", annotation_columns)) +srat <- AddMetaData(srat, metadata_to_transfer, col.name = paste0("fetal_full_", annotation_columns)) rm(query) rm(query_labeled) diff --git a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd index e14420898..7ef0751c7 100644 --- a/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd +++ b/analyses/cell-type-wilms-tumor-06/notebook_template/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd @@ -197,9 +197,9 @@ annotation_columns <- c( glue::glue("predicted.{annotation_levels}"), glue::glue("predicted.{annotation_levels}.score") ) -metadata_to_trasfer <- s@meta.data[, annotation_columns] +metadata_to_transfer <- query_labeled@meta.data[, annotation_columns] -srat <- AddMetaData(srat, metadata_to_trasfer, col.name = paste0("fetal_kidney_", annotation_columns)) +srat <- AddMetaData(srat, metadata_to_transfer, col.name = paste0("fetal_kidney_", annotation_columns)) ``` ```{r plot_azimuth, fig.height=8, fig.width=8, warnings=FALSE} From 3a0ba1e9b8508a5f7e70bdb70b38599c4fedd0c8 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Nov 2024 10:27:23 -0500 Subject: [PATCH 21/22] fix typo - output for 02b needs to be 02b --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 8ee886647..5da478e1c 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -80,7 +80,7 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd', params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', - output_file = '02a_fetal_all_reference_Stewart_${sample_id}.html', + output_file = '02b_fetal_all_reference_Stewart_${sample_id}.html', output_dir = '${sample_notebook_dir}')" # Temporarily this code is not run in CI. From 7522c56fa7e5962cddc129c634846550364bd0eb Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Nov 2024 11:17:12 -0500 Subject: [PATCH 22/22] names are hard --- analyses/cell-type-wilms-tumor-06/00_run_workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh index 5da478e1c..181065898 100755 --- a/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh +++ b/analyses/cell-type-wilms-tumor-06/00_run_workflow.sh @@ -80,7 +80,7 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd', params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${IS_CI}), output_format = 'html_document', - output_file = '02b_fetal_all_reference_Stewart_${sample_id}.html', + output_file = '02b_fetal_kidney_reference_Stewart_${sample_id}.html', output_dir = '${sample_notebook_dir}')" # Temporarily this code is not run in CI.
SCPSC000179 cell type20294 mesenchymal cell kidney epithelial cell
SCPSC000184compartment111fetal nephronstroma
SCPSC000184cell type536kidney epithelial cellmesenchymal cell
SCPSC000194compartment565fetal nephronstroma
SCPSC000194cell type89kidney epithelial cellmesenchymal cell
SCPSC000205 compartment684fetal nephronstroma
SCPSC000208compartment211152 fetal nephron stroma