From ad8f910f2088313a53b467d343b540a994a2993a Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Nov 2024 12:19:24 -0500 Subject: [PATCH 1/3] #692: use fs::dir_create where not yet used --- scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd | 4 +--- scRNA-seq-advanced/02-dataset_integration.Rmd | 4 +--- scRNA-seq-advanced/03-differential_expression.Rmd | 4 +--- scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd index 8e60af40..b9d87a79 100644 --- a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd +++ b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd @@ -93,9 +93,7 @@ mito_file <- file.path(ref_dir, "hs_mitochondrial_genes.tsv") normalized_dir <- file.path(data_dir, "normalized") # create the directory if it does not exist -if (!dir.exists(normalized_dir)) { - dir.create(normalized_dir, recursive = TRUE) -} +fs::dir_create(normalized_dir) # output RDS file for normalized data output_sce_file <- file.path(normalized_dir, diff --git a/scRNA-seq-advanced/02-dataset_integration.Rmd b/scRNA-seq-advanced/02-dataset_integration.Rmd index 3449abc3..4e6754ad 100644 --- a/scRNA-seq-advanced/02-dataset_integration.Rmd +++ b/scRNA-seq-advanced/02-dataset_integration.Rmd @@ -76,9 +76,7 @@ input_dir <- file.path("data", "rms", "processed") output_dir <- file.path("data", "rms", "integrated") # Create output directory if it doesn't exist -if (!(dir.exists(output_dir))) { - dir.create(output_dir) -} +fs::dir_create(output_dir) # Define output file name for the integrated object integrated_sce_file <- file.path(output_dir, "rms_integrated_subset.rds") diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd index 58c05972..eac8050f 100644 --- a/scRNA-seq-advanced/03-differential_expression.Rmd +++ b/scRNA-seq-advanced/03-differential_expression.Rmd @@ -79,9 +79,7 @@ sample_metadata_file <- file.path(data_dir, # directory to store output deseq_dir <- file.path("analysis", "rms", "deseq") -if(!dir.exists(deseq_dir)){ - dir.create(deseq_dir, recursive = TRUE) -} +fs::dir_create(deseq_dir) # results file to output from DE analysis deseq_output_file <- file.path(deseq_dir, diff --git a/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd b/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd index 41bc9068..ce9a36b2 100644 --- a/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd +++ b/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd @@ -78,9 +78,7 @@ hodgkins_analysis_dir <- file.path("analysis", "hodgkins") # We'll create a directory to specifically hold the pathway results if it doesn't # exist yet results_dir <- file.path(hodgkins_analysis_dir, "pathway-analysis") -if (!dir.exists(results_dir)) { - dir.create(results_dir, recursive = TRUE) -} +fs::dir_create(results_dir) ``` #### Input files From 87037ac16f58d994e4383f9e1bfe06a0b20f6bf9 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Nov 2024 12:35:45 -0500 Subject: [PATCH 2/3] #734: All colour->color, use brewer where appropriate, and make ggplot call order consistent in the integration notebook --- .../01-read_filter_normalize_scRNA.Rmd | 4 +- scRNA-seq-advanced/02-dataset_integration.Rmd | 44 +++++++++++-------- .../03-differential_expression.Rmd | 12 ++--- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd index b9d87a79..0f0844ff 100644 --- a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd +++ b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd @@ -495,7 +495,7 @@ We can also visualize the UMAP results using the `plotReducedDim()` function. scater::plotReducedDim(normalized_sce, "UMAP", # color by the most variable gene - colour_by = hv_genes[1]) + color_by = hv_genes[1]) ``` @@ -549,7 +549,7 @@ Here rather than the general `plotReducedDim()` function, we will use `plotUMAP( ```{r plot clusters, live=TRUE} # plot UMAP with assigned clusters scater::plotUMAP(normalized_sce, - colour_by = "nn_cluster") + color_by = "nn_cluster") ``` What do you see in these results? diff --git a/scRNA-seq-advanced/02-dataset_integration.Rmd b/scRNA-seq-advanced/02-dataset_integration.Rmd index 4e6754ad..576c0c45 100644 --- a/scRNA-seq-advanced/02-dataset_integration.Rmd +++ b/scRNA-seq-advanced/02-dataset_integration.Rmd @@ -464,11 +464,15 @@ To see how this looks, let's look at the UMAP when calculated from individual sa # Plot UMAP calculated from individual samples with separate scaling scater::plotReducedDim(merged_sce, dimred = "UMAP", - colour_by = "sample", + color_by = "sample", point_size = 0.5, point_alpha = 0.2) + - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + + # Use a CVD-friendly color scheme + scale_color_brewer(palette = "Dark2") + + # Modify the legend key so its points are larger and easier to see + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + ggtitle("UMAP calculated on each sample separately") + ``` @@ -552,13 +556,12 @@ Now, let's see how this new `merged_UMAP` looks compared to the `UMAP` calculate # UMAPs scaled together when calculated from the merged SCE scater::plotReducedDim(merged_sce, dimred = "merged_UMAP", - colour_by = "sample", + color_by = "sample", # Some styling to help us see the points: point_size = 0.5, point_alpha = 0.2) + - # Modify the legend key so its points are larger and easier to see - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + - # Add a plot title + scale_color_brewer(palette = "Dark2") + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + ggtitle("UMAP calculated on merged_sce") ``` @@ -631,13 +634,12 @@ scater::plotReducedDim(merged_sce, # plot the fastMNN coordinates dimred = "fastmnn_UMAP", # color by sample - colour_by = "sample", + color_by = "sample", # Some styling to help us see the points: point_size = 0.5, point_alpha = 0.2) + - # Modify legend so they key is larger and easier to see - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + - # add plot title + scale_color_brewer(palette = "Dark2") + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + ggtitle("UMAP after integration with fastMNN") ``` @@ -677,10 +679,11 @@ Let's re-plot this UMAP to highlight cell types: scater::plotReducedDim(merged_sce, dimred = "fastmnn_UMAP", # color by broad celltypes - colour_by = "celltype_broad", + color_by = "celltype_broad", point_size = 0.5, point_alpha = 0.2) + - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + + scale_color_brewer(palette = "Dark2") + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + ggtitle("UMAP after integration with fastMNN") ``` @@ -693,12 +696,13 @@ One way we can see all the points a bit better is to facet the plot by sample, u ```{r plot fastmnn umap celltypes faceted} scater::plotReducedDim(merged_sce, dimred = "fastmnn_UMAP", - colour_by = "celltype_broad", + color_by = "celltype_broad", point_size = 0.5, point_alpha = 0.2, # Allow for faceting by a variable using `other_fields`: other_fields = "sample") + - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + + scale_color_brewer(palette = "Dark2") + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + ggtitle("UMAP after integration with fastMNN") + # Facet by sample facet_wrap(vars(sample)) + @@ -786,11 +790,12 @@ Let's see how the `harmony` UMAP, colored by sample, looks compared to the `fast ```{r plot harmony umap batches} scater::plotReducedDim(merged_sce, dimred = "harmony_UMAP", - colour_by = "sample", + color_by = "sample", point_size = 0.5, point_alpha = 0.2) + - ggtitle("UMAP after integration with harmony") + - guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) + scale_color_brewer(palette = "Dark2") + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + + ggtitle("UMAP after integration with harmony") ``` How do you think this `harmony` UMAP compares to that from `fastMNN` integration? @@ -800,13 +805,14 @@ Let's see how this UMAP looks colored by cell type, and faceted for visibility: ```{r plot harmony umap celltypes} scater::plotReducedDim(merged_sce, dimred = "harmony_UMAP", - colour_by = "celltype_broad", + color_by = "celltype_broad", point_size = 0.5, point_alpha = 0.2, # Specify variable for faceting other_fields = "sample") + + scale_color_brewer(palette = "Dark2", na.value = "grey80") + + guides(color = guide_legend(override.aes = list(size = 3))) + ggtitle("UMAP after integration with harmony") + - guides(colour = guide_legend(override.aes = list(size = 3))) + facet_wrap(vars(sample)) ``` diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd index eac8050f..c702e3bf 100644 --- a/scRNA-seq-advanced/03-differential_expression.Rmd +++ b/scRNA-seq-advanced/03-differential_expression.Rmd @@ -206,7 +206,7 @@ In the chunk below we will start by taking a look at our integration results and # UMAP of all samples, separating by diagnosis group scater::plotReducedDim(integrated_sce, dimred = "fastmnn_UMAP", - colour_by = "diagnosis_group", + color_by = "diagnosis_group", point_size= 0.5, point_alpha = 0.2) ``` @@ -228,7 +228,7 @@ The samples which could be further classified have a mix of `Tumor_Mesoderm`, `T scater::plotReducedDim(integrated_sce, dimred = "fastmnn_UMAP", # color each point by cell type - colour_by = "celltype_broad", + color_by = "celltype_broad", point_size= 0.5, point_alpha = 0.4) ``` @@ -248,7 +248,7 @@ In the below plot we will color our cells by cell type while also using `facet_g scater::plotReducedDim(integrated_sce, dimred = "fastmnn_UMAP", # color each point by cell type - colour_by = "celltype_broad", + color_by = "celltype_broad", point_size= 0.5, point_alpha = 0.4, # tell scater to use diagnosis_group for plotting @@ -679,7 +679,7 @@ myoblast_combined_sce <- rms_sce[, which(rms_sce$celltype_broad == "Tumor_Myobla # plot PTPRT (ENSG00000196090) expression in ARMS vs. ERMS scater::plotReducedDim(myoblast_combined_sce, dimred = "fastmnn_UMAP", - colour_by = "ENSG00000196090", #PTPRT + color_by = "ENSG00000196090", #PTPRT point_size= 0.5, point_alpha = 0.4, other_fields = "diagnosis_group") + @@ -718,7 +718,7 @@ scater::plotExpression(tumor_sce, # a vector of genes to plot features = genes_to_plot, x = "diagnosis_group", - colour_by = "diagnosis_group", + color_by = "diagnosis_group", other_fields = "celltype_broad", point_size = 0.1) + # each celltype is its own column @@ -727,7 +727,7 @@ scater::plotExpression(tumor_sce, rows = vars(Feature)) + # change the font size of the facet labels theme(strip.text = element_text(size = 7)) + - guides(colour = guide_legend( + guides(color = guide_legend( title = "Subtype", # update the legend title # change the size of the legend colors override.aes = list(size = 3, alpha = 1)) From d954ed6513f451a187a6c26e1a7e5ad1404bafa7 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Nov 2024 12:41:38 -0500 Subject: [PATCH 3/3] one more spot for dark2 --- scRNA-seq-advanced/03-differential_expression.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd index c702e3bf..fa6b3c82 100644 --- a/scRNA-seq-advanced/03-differential_expression.Rmd +++ b/scRNA-seq-advanced/03-differential_expression.Rmd @@ -275,6 +275,7 @@ ggplot(tumor_cells_df, aes(x = sample, fill = celltype_broad)) + y = "Proportion of cells", fill = "Cell type" ) + + scale_fill_brewer(palette = "Dark2") + theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+ # facet by diagnosis group