From ad8f910f2088313a53b467d343b540a994a2993a Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Mon, 4 Nov 2024 12:19:24 -0500
Subject: [PATCH 1/3] #692: use fs::dir_create where not yet used

---
 scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd  | 4 +---
 scRNA-seq-advanced/02-dataset_integration.Rmd          | 4 +---
 scRNA-seq-advanced/03-differential_expression.Rmd      | 4 +---
 scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd | 4 +---
 4 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
index 8e60af40..b9d87a79 100644
--- a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
+++ b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
@@ -93,9 +93,7 @@ mito_file <- file.path(ref_dir, "hs_mitochondrial_genes.tsv")
 normalized_dir <- file.path(data_dir, "normalized")
 
 # create the directory if it does not exist
-if (!dir.exists(normalized_dir)) {
-  dir.create(normalized_dir, recursive = TRUE)
-}
+fs::dir_create(normalized_dir)
 
 # output RDS file for normalized data
 output_sce_file <- file.path(normalized_dir,
diff --git a/scRNA-seq-advanced/02-dataset_integration.Rmd b/scRNA-seq-advanced/02-dataset_integration.Rmd
index 3449abc3..4e6754ad 100644
--- a/scRNA-seq-advanced/02-dataset_integration.Rmd
+++ b/scRNA-seq-advanced/02-dataset_integration.Rmd
@@ -76,9 +76,7 @@ input_dir <- file.path("data", "rms", "processed")
 output_dir <- file.path("data", "rms", "integrated")
 
 # Create output directory if it doesn't exist
-if (!(dir.exists(output_dir))) {
-  dir.create(output_dir)
-}
+fs::dir_create(output_dir)
 
 # Define output file name for the integrated object
 integrated_sce_file <- file.path(output_dir, "rms_integrated_subset.rds")
diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd
index 58c05972..eac8050f 100644
--- a/scRNA-seq-advanced/03-differential_expression.Rmd
+++ b/scRNA-seq-advanced/03-differential_expression.Rmd
@@ -79,9 +79,7 @@ sample_metadata_file <- file.path(data_dir,
 
 # directory to store output
 deseq_dir <- file.path("analysis", "rms", "deseq")
-if(!dir.exists(deseq_dir)){
-  dir.create(deseq_dir, recursive = TRUE)
-}
+fs::dir_create(deseq_dir)
 
 # results file to output from DE analysis
 deseq_output_file <- file.path(deseq_dir, 
diff --git a/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd b/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd
index 41bc9068..ce9a36b2 100644
--- a/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd
+++ b/scRNA-seq-advanced/05-gene_set_enrichment_analysis.Rmd
@@ -78,9 +78,7 @@ hodgkins_analysis_dir <- file.path("analysis", "hodgkins")
 # We'll create a directory to specifically hold the pathway results if it doesn't
 # exist yet
 results_dir <- file.path(hodgkins_analysis_dir, "pathway-analysis")
-if (!dir.exists(results_dir)) {
-  dir.create(results_dir, recursive = TRUE)
-}
+fs::dir_create(results_dir)
 ```
 
 #### Input files

From 87037ac16f58d994e4383f9e1bfe06a0b20f6bf9 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Mon, 4 Nov 2024 12:35:45 -0500
Subject: [PATCH 2/3] #734: All colour->color, use brewer where appropriate,
 and make ggplot call order consistent in the integration notebook

---
 .../01-read_filter_normalize_scRNA.Rmd        |  4 +-
 scRNA-seq-advanced/02-dataset_integration.Rmd | 44 +++++++++++--------
 .../03-differential_expression.Rmd            | 12 ++---
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
index b9d87a79..0f0844ff 100644
--- a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
+++ b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
@@ -495,7 +495,7 @@ We can also visualize the UMAP results using the `plotReducedDim()` function.
 scater::plotReducedDim(normalized_sce,
                        "UMAP",
                        # color by the most variable gene
-                       colour_by = hv_genes[1])
+                       color_by = hv_genes[1])
 ```
 
 
@@ -549,7 +549,7 @@ Here rather than the general `plotReducedDim()` function, we will use `plotUMAP(
 ```{r plot clusters, live=TRUE}
 # plot UMAP with assigned clusters
 scater::plotUMAP(normalized_sce,
-                 colour_by = "nn_cluster")
+                 color_by = "nn_cluster")
 ```
 
 What do you see in these results?
diff --git a/scRNA-seq-advanced/02-dataset_integration.Rmd b/scRNA-seq-advanced/02-dataset_integration.Rmd
index 4e6754ad..576c0c45 100644
--- a/scRNA-seq-advanced/02-dataset_integration.Rmd
+++ b/scRNA-seq-advanced/02-dataset_integration.Rmd
@@ -464,11 +464,15 @@ To see how this looks, let's look at the UMAP when calculated from individual sa
 # Plot UMAP calculated from individual samples with separate scaling
 scater::plotReducedDim(merged_sce,
                        dimred = "UMAP",
-                       colour_by = "sample",
+                       color_by = "sample",
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
+  # Use a CVD-friendly color scheme
+  scale_color_brewer(palette = "Dark2") +  
+  # Modify the legend key so its points are larger and easier to see
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
   ggtitle("UMAP calculated on each sample separately")
+  
 ```
 
 
@@ -552,13 +556,12 @@ Now, let's see how this new `merged_UMAP` looks compared to the `UMAP` calculate
 # UMAPs scaled together when calculated from the merged SCE
 scater::plotReducedDim(merged_sce,
                        dimred = "merged_UMAP",
-                       colour_by = "sample",
+                       color_by = "sample",
                        # Some styling to help us see the points:
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  # Modify the legend key so its points are larger and easier to see
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
-  # Add a plot title
+  scale_color_brewer(palette = "Dark2") +  
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
   ggtitle("UMAP calculated on merged_sce")
 ```
 
@@ -631,13 +634,12 @@ scater::plotReducedDim(merged_sce,
                        # plot the fastMNN coordinates
                        dimred = "fastmnn_UMAP",
                        # color by sample
-                       colour_by = "sample",
+                       color_by = "sample",
                        # Some styling to help us see the points:
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  # Modify legend so they key is larger and easier to see
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
-  # add plot title
+  scale_color_brewer(palette = "Dark2") +  
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
   ggtitle("UMAP after integration with fastMNN")
 ```
 
@@ -677,10 +679,11 @@ Let's re-plot this UMAP to highlight cell types:
 scater::plotReducedDim(merged_sce,
                        dimred = "fastmnn_UMAP",
                        # color by broad celltypes
-                       colour_by = "celltype_broad",
+                       color_by = "celltype_broad",
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
+  scale_color_brewer(palette = "Dark2") +  
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
   ggtitle("UMAP after integration with fastMNN")
 ```
 
@@ -693,12 +696,13 @@ One way we can see all the points a bit better is to facet the plot by sample, u
 ```{r plot fastmnn umap celltypes faceted}
 scater::plotReducedDim(merged_sce,
                        dimred = "fastmnn_UMAP",
-                       colour_by = "celltype_broad",
+                       color_by = "celltype_broad",
                        point_size = 0.5,
                        point_alpha = 0.2,
                        # Allow for faceting by a variable using `other_fields`:
                        other_fields = "sample") +
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
+  scale_color_brewer(palette = "Dark2") +  
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
   ggtitle("UMAP after integration with fastMNN") +
   # Facet by sample
   facet_wrap(vars(sample)) +
@@ -786,11 +790,12 @@ Let's see how the `harmony` UMAP, colored by sample, looks compared to the `fast
 ```{r plot harmony umap batches}
 scater::plotReducedDim(merged_sce,
                        dimred = "harmony_UMAP",
-                       colour_by = "sample",
+                       color_by = "sample",
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  ggtitle("UMAP after integration with harmony") +
-  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1)))
+  scale_color_brewer(palette = "Dark2") +
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
+  ggtitle("UMAP after integration with harmony")
 ```
 
 How do you think this `harmony` UMAP compares to that from `fastMNN` integration?
@@ -800,13 +805,14 @@ Let's see how this UMAP looks colored by cell type, and faceted for visibility:
 ```{r plot harmony umap celltypes}
 scater::plotReducedDim(merged_sce,
                        dimred = "harmony_UMAP",
-                       colour_by = "celltype_broad",
+                       color_by = "celltype_broad",
                        point_size = 0.5,
                        point_alpha = 0.2,
                        # Specify variable for faceting
                        other_fields = "sample") +
+  scale_color_brewer(palette = "Dark2", na.value = "grey80") +
+  guides(color = guide_legend(override.aes = list(size = 3))) +
   ggtitle("UMAP after integration with harmony") +
-  guides(colour = guide_legend(override.aes = list(size = 3))) +
   facet_wrap(vars(sample))
 ```
 
diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd
index eac8050f..c702e3bf 100644
--- a/scRNA-seq-advanced/03-differential_expression.Rmd
+++ b/scRNA-seq-advanced/03-differential_expression.Rmd
@@ -206,7 +206,7 @@ In the chunk below we will start by taking a look at our integration results and
 # UMAP of all samples, separating by diagnosis group
 scater::plotReducedDim(integrated_sce,
                        dimred = "fastmnn_UMAP",
-                       colour_by = "diagnosis_group",
+                       color_by = "diagnosis_group",
                        point_size= 0.5,
                        point_alpha = 0.2) 
 ```
@@ -228,7 +228,7 @@ The samples which could be further classified have a mix of `Tumor_Mesoderm`, `T
 scater::plotReducedDim(integrated_sce,
                        dimred = "fastmnn_UMAP",
                        # color each point by cell type
-                       colour_by = "celltype_broad",
+                       color_by = "celltype_broad",
                        point_size= 0.5, 
                        point_alpha = 0.4)
 ```
@@ -248,7 +248,7 @@ In the below plot we will color our cells by cell type while also using `facet_g
 scater::plotReducedDim(integrated_sce,
                        dimred = "fastmnn_UMAP",
                        # color each point by cell type
-                       colour_by = "celltype_broad",
+                       color_by = "celltype_broad",
                        point_size= 0.5, 
                        point_alpha = 0.4,
                        # tell scater to use diagnosis_group for plotting
@@ -679,7 +679,7 @@ myoblast_combined_sce <- rms_sce[, which(rms_sce$celltype_broad == "Tumor_Myobla
 # plot PTPRT (ENSG00000196090) expression in ARMS vs. ERMS
 scater::plotReducedDim(myoblast_combined_sce,
                        dimred = "fastmnn_UMAP",
-                       colour_by = "ENSG00000196090", #PTPRT
+                       color_by = "ENSG00000196090", #PTPRT
                        point_size= 0.5,
                        point_alpha = 0.4,
                        other_fields = "diagnosis_group") +
@@ -718,7 +718,7 @@ scater::plotExpression(tumor_sce,
                        # a vector of genes to plot
                        features = genes_to_plot, 
                        x = "diagnosis_group", 
-                       colour_by = "diagnosis_group",
+                       color_by = "diagnosis_group",
                        other_fields = "celltype_broad",
                        point_size = 0.1) +
   # each celltype is its own column
@@ -727,7 +727,7 @@ scater::plotExpression(tumor_sce,
              rows = vars(Feature)) + 
   # change the font size of the facet labels
   theme(strip.text = element_text(size = 7)) + 
-  guides(colour = guide_legend(
+  guides(color = guide_legend(
     title = "Subtype", # update the legend title
     # change the size of the legend colors
     override.aes = list(size = 3, alpha = 1))

From d954ed6513f451a187a6c26e1a7e5ad1404bafa7 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Mon, 4 Nov 2024 12:41:38 -0500
Subject: [PATCH 3/3] one more spot for dark2

---
 scRNA-seq-advanced/03-differential_expression.Rmd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scRNA-seq-advanced/03-differential_expression.Rmd b/scRNA-seq-advanced/03-differential_expression.Rmd
index c702e3bf..fa6b3c82 100644
--- a/scRNA-seq-advanced/03-differential_expression.Rmd
+++ b/scRNA-seq-advanced/03-differential_expression.Rmd
@@ -275,6 +275,7 @@ ggplot(tumor_cells_df, aes(x = sample, fill = celltype_broad)) +
       y = "Proportion of cells", 
       fill = "Cell type"
     ) +
+  scale_fill_brewer(palette = "Dark2") +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
   # facet by diagnosis group