Merge pull request #359 from ncborcherding/dev

v2.0.3
BorchLab · May 3, 2024 · 93d31af · 93d31af
2 parents a416610 + 7b0d94e
commit 93d31af
Show file tree

Hide file tree

Showing 55 changed files with 1,460 additions and 317 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: scRepertoire
 Title: A toolkit for single-cell immune receptor profiling
-Version: 2.0.0
+Version: 2.0.3
 Authors@R: c(
     person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "[email protected]"), 
     person(given = "Qile", family = "Yang", role = c("aut"), email = "[email protected]"), 

diff --git a/NAMESPACE b/NAMESPACE
@@ -35,7 +35,6 @@ export(positionalEntropy)
 export(positionalProperty)
 export(subsetClones)
 export(vizGenes)
-import(dplyr)
 import(ggplot2)
 importFrom(Rcpp,sourceCpp)
 importFrom(S4Vectors,DataFrame)
@@ -48,15 +47,28 @@ importFrom(SummarizedExperiment,colData)
 importFrom(VGAM,dpareto)
 importFrom(cubature,adaptIntegrate)
 importFrom(dplyr,"%>%")
+importFrom(dplyr,across)
+importFrom(dplyr,all_of)
+importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,coalesce)
 importFrom(dplyr,count)
+importFrom(dplyr,desc)
+importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
+importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_at)
+importFrom(dplyr,mutate_if)
+importFrom(dplyr,n)
 importFrom(dplyr,sample_n)
 importFrom(dplyr,select)
+importFrom(dplyr,slice_max)
 importFrom(dplyr,summarise)
+importFrom(dplyr,summarize)
 importFrom(dplyr,summarize_all)
+importFrom(dplyr,transmute)
+importFrom(dplyr,ungroup)
 importFrom(evmix,dgpd)
 importFrom(evmix,fgpd)
 importFrom(evmix,pgpd)
@@ -86,6 +98,7 @@ importFrom(igraph,components)
 importFrom(igraph,graph_from_data_frame)
 importFrom(igraph,graph_from_edgelist)
 importFrom(igraph,set_vertex_attr)
+importFrom(igraph,union)
 importFrom(methods,slot)
 importFrom(plyr,join)
 importFrom(plyr,llply)
@@ -94,6 +107,8 @@ importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
 importFrom(rjson,fromJSON)
 importFrom(rlang,"%||%")
+importFrom(rlang,":=")
+importFrom(rlang,sym)
 importFrom(stats,as.dist)
 importFrom(stats,hclust)
 importFrom(stats,mad)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,29 @@
+# scRepertoire VERSION 2.0.3
+
+## UNDERLYING CHANGES
+
+* Modified support for Omniscope format to allow for dual chains
+* Added ParseBio support int ```loadContigs()``` and testthat
+* Added support for productive variable to ```loadContigs()``` for BD, Omniscope, and Immcantation formats
+* Replace numerical indexing with name indexing for ```loadContigs()```
+* ```combineBCR()``` and ```combineTCR()``` no allow for unproductive contig inclusions with new **filterNonproductive** parameter.
+* ```combineBCR()``` will now prompt user if **samples** is not included instead of erroring.
+* Added base threshold by length for internal ```.lvcCmpare()```-
+* Ensured internal ```.lvcompare()``` only looks at first set of sequences in multi-sequence chain.
+* Fixed bug in exporting graph for ```clonaCluster()```
+* Fixed conflict in functions between igraph and dplyr packages
+
+# scRepertoire VERSION 2.0.2
+
+## UNDERLYING CHANGES
+
+*```clonalOccupy()``` rewrite counting and NA handling
+
+# scRepertoire VERSION 2.0.1 
+
+## UNDERLYING CHANGES
+
+*```clonalOverlay()``` arguments now cutpoint and use cut.category to select either clonalProportion or clonalFrequency
 
 # scRepertoire VERSION 2.0.0 (2024-01-10)
 

diff --git a/R/clonalCluster.R b/R/clonalCluster.R
@@ -6,6 +6,8 @@
 #' \code{\link{combineBCR}} or \code{\link{combineExpression}} and amends a 
 #' cluster to the data frame or meta data. If \strong{exportGraph} is set 
 #' to TRUE, the function returns an igraph object of the connected sequences. 
+#' If multiple sequences per chain are present, this function only compares
+#' the first sequence.
 #' 
 #' @examples
 #' # Getting the combined contigs
@@ -33,13 +35,15 @@
 #' sequences (\strong{TRUE}) or the amended input with a
 #' new cluster-based variable (\strong{FALSE}).
 #' @importFrom stringdist stringdist
-#' @importFrom igraph set_vertex_attr V
+#' @importFrom igraph set_vertex_attr V union
 #' @importFrom plyr join
-#' @importFrom dplyr bind_rows
+#' @importFrom dplyr bind_rows summarize
 #' @importFrom stringr str_split str_replace_all
 #' @importFrom rlang %||%
 #' @importFrom SummarizedExperiment colData<- colData
 #' @importFrom stats na.omit
+#' @importFrom S4Vectors DataFrame
+#' 
 #' @export
 #' @concept Visualizing_Clones
 #' @return Either amended input with edit-distanced clusters added 
@@ -95,13 +99,19 @@ clonalCluster <- function(input.data,
 
   if (!is.null(group.by)) {
     bound <- bind_rows(dat, .id = "group.by")
+    bound <- bound[!is.na(bound[,ref2]),]
+    retain.ref <- data.frame(old = bound[,ref2], new = str_split(bound[,ref2], ";", simplify = TRUE)[,1])
+    bound[,ref2] <- str_split(bound[,ref2], ";", simplify = TRUE)[,1]
     graph.variables <- bound %>%
                           group_by(bound[,ref2]) %>%
                           dplyr::summarize(sample_count = n(),
                                     unique_samples = paste0(unique(group.by), collapse = ","))
   } else {
     bound <- bind_rows(dat)
-    graph.variables <- bind_rows(dat) %>%
+    bound <- bound[!is.na(bound[,ref2]),]
+    retain.ref <- data.frame(old = bound[,ref2], new = str_split(bound[,ref2], ";", simplify = TRUE)[,1])
+    bound[,ref2] <- str_split(bound[,ref2], ";", simplify = TRUE)[,1]
+    graph.variables <- bound %>%
                           group_by(bound[,ref2]) %>%
                           dplyr::summarize(sample_count = n())
   }
@@ -118,12 +128,12 @@ clonalCluster <- function(input.data,
   #Grabbing column order for later return
   column.order <- colnames(bound)
 
-  #Returning the igraph object if eexportGraph = TRUE
+  #Returning the igraph object if exportGraph = TRUE
   if(exportGraph) {
-    cluster <- output.list[[1]]
+    cluster <- do.call(igraph::union, output.list)
     vertex <- names(V(cluster))
     data_df <- unique(data.frame(
-      id = V(cluster)$name
+      id = vertex
     ))
     data_df <- merge(data_df, graph.variables, by = 1)
     cluster <- set_vertex_attr(cluster, 
@@ -169,10 +179,11 @@ clonalCluster <- function(input.data,
       col.name <- names(PreMeta) %||% colnames(PreMeta)
       input.data[[col.name]] <- PreMeta
     } else {
-      rownames <- rownames(colData(input.data))
-      colData(input.data) <- cbind(colData(input.data), 
-                                   PreMeta[rownames,])[, union(colnames(colData(input.data)),  colnames(PreMeta))]
-      rownames(colData(input.data)) <- rownames 
+      combined_col_names <- unique(c(colnames(colData(sc.data)), colnames(PreMeta)))
+      full_data <- merge(colData(sc.data), PreMeta[rownames, , drop = FALSE], by = "row.names", all.x = TRUE)
+      rownames(full_data) <- full_data[, 1]
+      full_data  <- full_data[, -1]
+      colData(sc.data) <- DataFrame(full_data[, combined_col_names])
     }
   } else {
     #Reorder columns

diff --git a/R/clonalLength.R b/R/clonalLength.R
@@ -41,8 +41,6 @@ clonalLength <- function(input.data,
                          exportTable = FALSE, 
                          palette = "inferno") {
 
-
-
   input.data <- .data.wrangle(input.data, 
                               group.by, 
                               .theCall(input.data, cloneCall, check.df = FALSE), 

diff --git a/R/clonalNetwork.R b/R/clonalNetwork.R
@@ -45,7 +45,6 @@
 #' @param chain indicate if both or a specific chain should be used - 
 #' e.g. "both", "TRA", "TRG", "IGH", "IGL".
 #' @param exportTable Exports a table of the data into the global 
-#' environment in addition to the visualization.
 #' @param exportClones Exports a table of clones that are shared
 #' across multiple identity groups and ordered by the total number
 #' of clone copies.
@@ -55,7 +54,7 @@
 #' @import ggplot2
 #' @importFrom stringr str_sort
 #' @importFrom igraph graph_from_data_frame V `V<-`
-#' @importFrom dplyr %>% group_by select summarize_all count
+#' @importFrom dplyr %>% group_by select summarize_all count n across all_of desc
 #' @importFrom tidygraph as_tbl_graph activate
 #' @importFrom ggraph ggraph geom_edge_bend  geom_node_point scale_edge_colour_gradientn circle guide_edge_colourbar
 #' @importFrom stats setNames
@@ -72,8 +71,8 @@ clonalNetwork <- function(sc.data,
                           filter.graph = FALSE,
                           cloneCall = "strict", 
                           chain = "both", 
-                          exportTable = FALSE, 
                           exportClones = FALSE,
+                          exportTable = FALSE,
                           palette = "inferno") {
     to <- from <- weight <- y <- NULL
     meta <- .grabMeta(sc.data)
@@ -155,15 +154,15 @@ clonalNetwork <- function(sc.data,
       group_by(meta[,group.by]) %>%
       na.omit() %>%
       unique() %>%
-      summarise(n = n()) %>%
+      summarise(n = dplyr::n()) %>%
       {setNames(.$n, .$`meta[, group.by]`)} 
 
     #Total clones per group.by
     total.number <- meta %>%
       select(all_of(c(cloneCall, group.by))) %>%
       group_by(meta[,group.by]) %>%
       na.omit() %>%
-      summarise(n = n()) %>%
+      summarise(n = dplyr::n()) %>%
       {setNames(.$n, .$`meta[, group.by]`)} 
 
     edge.list <- NULL

diff --git a/R/clonalOccupy.R b/R/clonalOccupy.R
@@ -33,7 +33,7 @@
 #' environment in addition to the visualization.
 #' @param palette Colors to use in visualization - input any 
 #' \link[grDevices]{hcl.pals}.
-#' @importFrom dplyr %>% group_by mutate
+#' @importFrom dplyr %>% group_by mutate count
 #' @importFrom reshape2 melt
 #' @import ggplot2
 #' @export
@@ -50,20 +50,30 @@ clonalOccupy <- function(sc.data,
                          palette = "inferno") {
   .checkSingleObject(sc.data)
   meta <- .grabMeta(sc.data)
-  meta <- melt(table(meta[!is.na(meta[,"clonalFrequency"]), 
-                          c(x.axis, facet.by, "cloneSize")], useNA = "ifany"))
+
+  meta <- meta %>%
+            group_by(meta[,x.axis], meta[,facet.by], cloneSize) %>%
+            count() %>%
+            as.data.frame()
+  meta[,1] <- as.factor(meta[,1])
+
+  colnames(meta)[1] <- x.axis
+  if(!is.null(facet.by)) {
+    colnames(meta)[2] <- facet.by
+  }
+
   #Check for NAs
   if (!na.include) {
     meta <- na.omit(meta)
   }
-  meta <- meta[meta$value != 0,]
+  meta <- meta[meta$n != 0,]
 
   #Convert to proportion
   if(proportion) {
     meta <- meta %>%
       group_by(meta[,1]) %>%
-      mutate(total = sum(value), 
-             prop = value/total)
+      mutate(total = sum(n), 
+             prop = n/total)
     meta <- as.data.frame(meta)
   }
   if (exportTable) {
@@ -77,7 +87,7 @@ clonalOccupy <- function(sc.data,
     lab <- "Proportion of Cells"
 
   } else {
-    plot <- ggplot(meta, aes(x = meta[,x.axis], y = value, fill = cloneSize)) + 
+    plot <- ggplot(meta, aes(x = meta[,x.axis], y = n, fill = cloneSize)) + 
               geom_bar(stat = "identity", color = "black", lwd = 0.25) 
     lab <- "Single Cells"
 
@@ -94,7 +104,7 @@ clonalOccupy <- function(sc.data,
   }
   if (label) {
     plot <- plot + 
-              geom_text(aes(label = value), position = position_stack(vjust = 0.5))
+              geom_text(aes(label = n), position = position_stack(vjust = 0.5))
   }
   plot
 }
diff --git a/R/clonalOverlap.R b/R/clonalOverlap.R
@@ -77,10 +77,7 @@ clonalOverlap <- function(input.data,
                               .theCall(input.data, cloneCall, check.df = FALSE), 
                               chain)
     cloneCall <- .theCall(input.data, cloneCall)
-
-    input.data <- input.data[order(names(input.data))]
-    values <- str_sort(as.character(unique(names(input.data))), numeric = TRUE)
-    input.data <- input.data[values]
+
     num_samples <- length(input.data[])
     names_samples <- names(input.data)
     length <- seq_len(num_samples)

diff --git a/R/clonalOverlay.R b/R/clonalOverlay.R
@@ -20,13 +20,15 @@
 #' #Using clonalOverlay()
 #' clonalOverlay(scRep_example, 
 #'               reduction = "umap", 
-#'               freq.cutpoint = 0.3, 
+#'               cutpoint = 3, 
 #'               bins = 5) 
 #' 
 #' @param sc.data The single-cell object after \code{\link{combineExpression}}.
-#' @param reduction The dimensional reduction to visualize
-#' @param freq.cutpoint The overlay cut point to include, this corresponds to the 
-#' Frequency variable in the single-cell object
+#' @param reduction The dimensional reduction to visualize.
+#' @param cut.category Meta data variable of the single-cell object to use for 
+#' filtering.
+#' @param cutpoint The overlay cut point to include, this corresponds to the 
+#' cut.category variable in the meta data of the single-cell object.
 #' @param bins The number of contours to the overlay
 #' @param facet.by meta data variable to facet the comparison
 #' 
@@ -40,20 +42,28 @@
 
 clonalOverlay <- function(sc.data, 
                           reduction = NULL, 
-                          freq.cutpoint = 30, 
+                          cut.category = "clonalFrequency",
+                          cutpoint = 30, 
                           bins = 25, 
                           facet.by = NULL) {
   .checkSingleObject(sc.data)
 
   #Forming the data frame to plot
   tmp <- data.frame(.grabMeta(sc.data), .get.coord(sc.data, reduction))
+
+  if(cut.category %!in% colnames(tmp)) {
+    stop("If filtering the data using a cutpoint, ensure the cut.category correspond to a variable in the meta data.")
+  }
   #Add facet variable if present
   if (!is.null(facet.by)) { 
     facet.by <- tmp[,facet.by]
     tmp <- data.frame(facet.by, tmp)
   }
-  tmp$include <- ifelse(tmp[,"clonalFrequency"] >= freq.cutpoint, "Yes", NA)
-  tmp2 <- subset(tmp, include == "Yes")
+  #If using cut.category for filtering
+  if(!is.null(cut.category) & !is.null(cutpoint)) {
+    tmp$include <- ifelse(tmp[,cut.category] >= cutpoint, "Yes", NA)
+    tmp2 <- subset(tmp, include == "Yes")
+  }
 
   #Plotting
   plot <- ggplot(tmp2, mapping = aes(x = tmp2[,(ncol(tmp2)-2)], 

diff --git a/R/clonalProportion.R b/R/clonalProportion.R
@@ -32,7 +32,7 @@
 #' @import ggplot2
 #' @importFrom stringr str_sort
 #' @importFrom reshape2 melt
-#' @importFrom dplyr bind_rows
+#' @importFrom dplyr bind_rows n
 #'
 #' @export
 #' @concept Visualizing_Clones

diff --git a/R/clonalRarefaction.R b/R/clonalRarefaction.R
@@ -31,8 +31,8 @@
 #' coverage-based rarefaction/extrapolation curve (\code{type = 3}).   
 #' @param hill.numbers The Hill numbers to be plotted out 
 #' (0 - species richness, 1 - Shannon, 2 - Simpson)
-#' @param n.boots The number of bootstraps to downsample in o
-#' rder to get mean diversity.
+#' @param n.boots The number of bootstraps to downsample in order 
+#' to get mean diversity.
 #' @param exportTable Exports a table of the data into the global 
 #' environment in addition to the visualization.
 #' @param palette Colors to use in visualization - input any