diff --git a/DESCRIPTION b/DESCRIPTION index 019209ad..60f4d841 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: scRepertoire Title: A toolkit for single-cell immune receptor profiling -Version: 2.0.0 +Version: 2.0.3 Authors@R: c( person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"), person(given = "Qile", family = "Yang", role = c("aut"), email = "qile0317@gmail.com"), diff --git a/NAMESPACE b/NAMESPACE index c1c0a76e..682a158c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ export(positionalEntropy) export(positionalProperty) export(subsetClones) export(vizGenes) -import(dplyr) import(ggplot2) importFrom(Rcpp,sourceCpp) importFrom(S4Vectors,DataFrame) @@ -48,15 +47,28 @@ importFrom(SummarizedExperiment,colData) importFrom(VGAM,dpareto) importFrom(cubature,adaptIntegrate) importFrom(dplyr,"%>%") +importFrom(dplyr,across) +importFrom(dplyr,all_of) +importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) +importFrom(dplyr,coalesce) importFrom(dplyr,count) +importFrom(dplyr,desc) +importFrom(dplyr,filter) importFrom(dplyr,group_by) +importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_at) +importFrom(dplyr,mutate_if) +importFrom(dplyr,n) importFrom(dplyr,sample_n) importFrom(dplyr,select) +importFrom(dplyr,slice_max) importFrom(dplyr,summarise) +importFrom(dplyr,summarize) importFrom(dplyr,summarize_all) +importFrom(dplyr,transmute) +importFrom(dplyr,ungroup) importFrom(evmix,dgpd) importFrom(evmix,fgpd) importFrom(evmix,pgpd) @@ -86,6 +98,7 @@ importFrom(igraph,components) importFrom(igraph,graph_from_data_frame) importFrom(igraph,graph_from_edgelist) importFrom(igraph,set_vertex_attr) +importFrom(igraph,union) importFrom(methods,slot) importFrom(plyr,join) importFrom(plyr,llply) @@ -94,6 +107,8 @@ importFrom(reshape2,dcast) importFrom(reshape2,melt) importFrom(rjson,fromJSON) importFrom(rlang,"%||%") +importFrom(rlang,":=") +importFrom(rlang,sym) importFrom(stats,as.dist) importFrom(stats,hclust) importFrom(stats,mad) diff --git a/NEWS.md b/NEWS.md index 3f587a87..b8ad2abc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,29 @@ +# scRepertoire VERSION 2.0.3 + +## UNDERLYING CHANGES + +* Modified support for Omniscope format to allow for dual chains +* Added ParseBio support int ```loadContigs()``` and testthat +* Added support for productive variable to ```loadContigs()``` for BD, Omniscope, and Immcantation formats +* Replace numerical indexing with name indexing for ```loadContigs()``` +* ```combineBCR()``` and ```combineTCR()``` no allow for unproductive contig inclusions with new **filterNonproductive** parameter. +* ```combineBCR()``` will now prompt user if **samples** is not included instead of erroring. +* Added base threshold by length for internal ```.lvcCmpare()```- +* Ensured internal ```.lvcompare()``` only looks at first set of sequences in multi-sequence chain. +* Fixed bug in exporting graph for ```clonaCluster()``` +* Fixed conflict in functions between igraph and dplyr packages + +# scRepertoire VERSION 2.0.2 + +## UNDERLYING CHANGES + +*```clonalOccupy()``` rewrite counting and NA handling + +# scRepertoire VERSION 2.0.1 + +## UNDERLYING CHANGES + +*```clonalOverlay()``` arguments now cutpoint and use cut.category to select either clonalProportion or clonalFrequency # scRepertoire VERSION 2.0.0 (2024-01-10) diff --git a/R/clonalCluster.R b/R/clonalCluster.R index 61b6cc4e..2b4af6f7 100644 --- a/R/clonalCluster.R +++ b/R/clonalCluster.R @@ -6,6 +6,8 @@ #' \code{\link{combineBCR}} or \code{\link{combineExpression}} and amends a #' cluster to the data frame or meta data. If \strong{exportGraph} is set #' to TRUE, the function returns an igraph object of the connected sequences. +#' If multiple sequences per chain are present, this function only compares +#' the first sequence. #' #' @examples #' # Getting the combined contigs @@ -33,13 +35,15 @@ #' sequences (\strong{TRUE}) or the amended input with a #' new cluster-based variable (\strong{FALSE}). #' @importFrom stringdist stringdist -#' @importFrom igraph set_vertex_attr V +#' @importFrom igraph set_vertex_attr V union #' @importFrom plyr join -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows summarize #' @importFrom stringr str_split str_replace_all #' @importFrom rlang %||% #' @importFrom SummarizedExperiment colData<- colData #' @importFrom stats na.omit +#' @importFrom S4Vectors DataFrame +#' #' @export #' @concept Visualizing_Clones #' @return Either amended input with edit-distanced clusters added @@ -95,13 +99,19 @@ clonalCluster <- function(input.data, if (!is.null(group.by)) { bound <- bind_rows(dat, .id = "group.by") + bound <- bound[!is.na(bound[,ref2]),] + retain.ref <- data.frame(old = bound[,ref2], new = str_split(bound[,ref2], ";", simplify = TRUE)[,1]) + bound[,ref2] <- str_split(bound[,ref2], ";", simplify = TRUE)[,1] graph.variables <- bound %>% group_by(bound[,ref2]) %>% dplyr::summarize(sample_count = n(), unique_samples = paste0(unique(group.by), collapse = ",")) } else { bound <- bind_rows(dat) - graph.variables <- bind_rows(dat) %>% + bound <- bound[!is.na(bound[,ref2]),] + retain.ref <- data.frame(old = bound[,ref2], new = str_split(bound[,ref2], ";", simplify = TRUE)[,1]) + bound[,ref2] <- str_split(bound[,ref2], ";", simplify = TRUE)[,1] + graph.variables <- bound %>% group_by(bound[,ref2]) %>% dplyr::summarize(sample_count = n()) } @@ -118,12 +128,12 @@ clonalCluster <- function(input.data, #Grabbing column order for later return column.order <- colnames(bound) - #Returning the igraph object if eexportGraph = TRUE + #Returning the igraph object if exportGraph = TRUE if(exportGraph) { - cluster <- output.list[[1]] + cluster <- do.call(igraph::union, output.list) vertex <- names(V(cluster)) data_df <- unique(data.frame( - id = V(cluster)$name + id = vertex )) data_df <- merge(data_df, graph.variables, by = 1) cluster <- set_vertex_attr(cluster, @@ -169,10 +179,11 @@ clonalCluster <- function(input.data, col.name <- names(PreMeta) %||% colnames(PreMeta) input.data[[col.name]] <- PreMeta } else { - rownames <- rownames(colData(input.data)) - colData(input.data) <- cbind(colData(input.data), - PreMeta[rownames,])[, union(colnames(colData(input.data)), colnames(PreMeta))] - rownames(colData(input.data)) <- rownames + combined_col_names <- unique(c(colnames(colData(sc.data)), colnames(PreMeta))) + full_data <- merge(colData(sc.data), PreMeta[rownames, , drop = FALSE], by = "row.names", all.x = TRUE) + rownames(full_data) <- full_data[, 1] + full_data <- full_data[, -1] + colData(sc.data) <- DataFrame(full_data[, combined_col_names]) } } else { #Reorder columns diff --git a/R/clonalLength.R b/R/clonalLength.R index 48ecc604..e91703f8 100644 --- a/R/clonalLength.R +++ b/R/clonalLength.R @@ -41,8 +41,6 @@ clonalLength <- function(input.data, exportTable = FALSE, palette = "inferno") { - - input.data <- .data.wrangle(input.data, group.by, .theCall(input.data, cloneCall, check.df = FALSE), diff --git a/R/clonalNetwork.R b/R/clonalNetwork.R index 234176de..95a169c5 100644 --- a/R/clonalNetwork.R +++ b/R/clonalNetwork.R @@ -45,7 +45,6 @@ #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". #' @param exportTable Exports a table of the data into the global -#' environment in addition to the visualization. #' @param exportClones Exports a table of clones that are shared #' across multiple identity groups and ordered by the total number #' of clone copies. @@ -55,7 +54,7 @@ #' @import ggplot2 #' @importFrom stringr str_sort #' @importFrom igraph graph_from_data_frame V `V<-` -#' @importFrom dplyr %>% group_by select summarize_all count +#' @importFrom dplyr %>% group_by select summarize_all count n across all_of desc #' @importFrom tidygraph as_tbl_graph activate #' @importFrom ggraph ggraph geom_edge_bend geom_node_point scale_edge_colour_gradientn circle guide_edge_colourbar #' @importFrom stats setNames @@ -72,8 +71,8 @@ clonalNetwork <- function(sc.data, filter.graph = FALSE, cloneCall = "strict", chain = "both", - exportTable = FALSE, exportClones = FALSE, + exportTable = FALSE, palette = "inferno") { to <- from <- weight <- y <- NULL meta <- .grabMeta(sc.data) @@ -155,7 +154,7 @@ clonalNetwork <- function(sc.data, group_by(meta[,group.by]) %>% na.omit() %>% unique() %>% - summarise(n = n()) %>% + summarise(n = dplyr::n()) %>% {setNames(.$n, .$`meta[, group.by]`)} #Total clones per group.by @@ -163,7 +162,7 @@ clonalNetwork <- function(sc.data, select(all_of(c(cloneCall, group.by))) %>% group_by(meta[,group.by]) %>% na.omit() %>% - summarise(n = n()) %>% + summarise(n = dplyr::n()) %>% {setNames(.$n, .$`meta[, group.by]`)} edge.list <- NULL diff --git a/R/clonalOccupy.R b/R/clonalOccupy.R index 33171d8c..87000c0e 100644 --- a/R/clonalOccupy.R +++ b/R/clonalOccupy.R @@ -33,7 +33,7 @@ #' environment in addition to the visualization. #' @param palette Colors to use in visualization - input any #' \link[grDevices]{hcl.pals}. -#' @importFrom dplyr %>% group_by mutate +#' @importFrom dplyr %>% group_by mutate count #' @importFrom reshape2 melt #' @import ggplot2 #' @export @@ -50,20 +50,30 @@ clonalOccupy <- function(sc.data, palette = "inferno") { .checkSingleObject(sc.data) meta <- .grabMeta(sc.data) - meta <- melt(table(meta[!is.na(meta[,"clonalFrequency"]), - c(x.axis, facet.by, "cloneSize")], useNA = "ifany")) + + meta <- meta %>% + group_by(meta[,x.axis], meta[,facet.by], cloneSize) %>% + count() %>% + as.data.frame() + meta[,1] <- as.factor(meta[,1]) + + colnames(meta)[1] <- x.axis + if(!is.null(facet.by)) { + colnames(meta)[2] <- facet.by + } + #Check for NAs if (!na.include) { meta <- na.omit(meta) } - meta <- meta[meta$value != 0,] + meta <- meta[meta$n != 0,] #Convert to proportion if(proportion) { meta <- meta %>% group_by(meta[,1]) %>% - mutate(total = sum(value), - prop = value/total) + mutate(total = sum(n), + prop = n/total) meta <- as.data.frame(meta) } if (exportTable) { @@ -77,7 +87,7 @@ clonalOccupy <- function(sc.data, lab <- "Proportion of Cells" } else { - plot <- ggplot(meta, aes(x = meta[,x.axis], y = value, fill = cloneSize)) + + plot <- ggplot(meta, aes(x = meta[,x.axis], y = n, fill = cloneSize)) + geom_bar(stat = "identity", color = "black", lwd = 0.25) lab <- "Single Cells" @@ -94,7 +104,7 @@ clonalOccupy <- function(sc.data, } if (label) { plot <- plot + - geom_text(aes(label = value), position = position_stack(vjust = 0.5)) + geom_text(aes(label = n), position = position_stack(vjust = 0.5)) } plot } \ No newline at end of file diff --git a/R/clonalOverlap.R b/R/clonalOverlap.R index 9cedc284..7d26a405 100644 --- a/R/clonalOverlap.R +++ b/R/clonalOverlap.R @@ -77,10 +77,7 @@ clonalOverlap <- function(input.data, .theCall(input.data, cloneCall, check.df = FALSE), chain) cloneCall <- .theCall(input.data, cloneCall) - - input.data <- input.data[order(names(input.data))] - values <- str_sort(as.character(unique(names(input.data))), numeric = TRUE) - input.data <- input.data[values] + num_samples <- length(input.data[]) names_samples <- names(input.data) length <- seq_len(num_samples) diff --git a/R/clonalOverlay.R b/R/clonalOverlay.R index e130e1ea..85043dfd 100644 --- a/R/clonalOverlay.R +++ b/R/clonalOverlay.R @@ -20,13 +20,15 @@ #' #Using clonalOverlay() #' clonalOverlay(scRep_example, #' reduction = "umap", -#' freq.cutpoint = 0.3, +#' cutpoint = 3, #' bins = 5) #' #' @param sc.data The single-cell object after \code{\link{combineExpression}}. -#' @param reduction The dimensional reduction to visualize -#' @param freq.cutpoint The overlay cut point to include, this corresponds to the -#' Frequency variable in the single-cell object +#' @param reduction The dimensional reduction to visualize. +#' @param cut.category Meta data variable of the single-cell object to use for +#' filtering. +#' @param cutpoint The overlay cut point to include, this corresponds to the +#' cut.category variable in the meta data of the single-cell object. #' @param bins The number of contours to the overlay #' @param facet.by meta data variable to facet the comparison #' @@ -40,20 +42,28 @@ clonalOverlay <- function(sc.data, reduction = NULL, - freq.cutpoint = 30, + cut.category = "clonalFrequency", + cutpoint = 30, bins = 25, facet.by = NULL) { .checkSingleObject(sc.data) #Forming the data frame to plot tmp <- data.frame(.grabMeta(sc.data), .get.coord(sc.data, reduction)) + + if(cut.category %!in% colnames(tmp)) { + stop("If filtering the data using a cutpoint, ensure the cut.category correspond to a variable in the meta data.") + } #Add facet variable if present if (!is.null(facet.by)) { facet.by <- tmp[,facet.by] tmp <- data.frame(facet.by, tmp) } - tmp$include <- ifelse(tmp[,"clonalFrequency"] >= freq.cutpoint, "Yes", NA) - tmp2 <- subset(tmp, include == "Yes") + #If using cut.category for filtering + if(!is.null(cut.category) & !is.null(cutpoint)) { + tmp$include <- ifelse(tmp[,cut.category] >= cutpoint, "Yes", NA) + tmp2 <- subset(tmp, include == "Yes") + } #Plotting plot <- ggplot(tmp2, mapping = aes(x = tmp2[,(ncol(tmp2)-2)], diff --git a/R/clonalProportion.R b/R/clonalProportion.R index b44709a4..0aeb0f70 100644 --- a/R/clonalProportion.R +++ b/R/clonalProportion.R @@ -32,7 +32,7 @@ #' @import ggplot2 #' @importFrom stringr str_sort #' @importFrom reshape2 melt -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows n #' #' @export #' @concept Visualizing_Clones diff --git a/R/clonalRarefaction.R b/R/clonalRarefaction.R index dcae9675..7ec5d02a 100644 --- a/R/clonalRarefaction.R +++ b/R/clonalRarefaction.R @@ -31,8 +31,8 @@ #' coverage-based rarefaction/extrapolation curve (\code{type = 3}). #' @param hill.numbers The Hill numbers to be plotted out #' (0 - species richness, 1 - Shannon, 2 - Simpson) -#' @param n.boots The number of bootstraps to downsample in o -#' rder to get mean diversity. +#' @param n.boots The number of bootstraps to downsample in order +#' to get mean diversity. #' @param exportTable Exports a table of the data into the global #' environment in addition to the visualization. #' @param palette Colors to use in visualization - input any diff --git a/R/combineContigs.R b/R/combineContigs.R index d491807c..6248f6b9 100644 --- a/R/combineContigs.R +++ b/R/combineContigs.R @@ -43,8 +43,10 @@ utils::globalVariables(c( #' @param removeMulti This will remove barcodes with greater than 2 chains. #' @param filterMulti This option will allow for the selection of the 2 #' corresponding chains with the highest expression for a single barcode. +#' @param filterNonproductive This option will allow for the removal of +#' nonproductive chains if the variable exists in the contig data. Default +#' is set to TRUE to remove nonproductive contigs. #' -#' @import dplyr #' @export #' @concept Loading_and_Processing_Contigs #' @return List of clones for individual cell barcodes @@ -54,7 +56,8 @@ combineTCR <- function(input.data, ID = NULL, removeNA = FALSE, removeMulti = FALSE, - filterMulti = FALSE) { + filterMulti = FALSE, + filterNonproductive = TRUE) { input.data <- .checkList(input.data) input.data <- .checkContigs(input.data) out <- NULL @@ -63,7 +66,7 @@ combineTCR <- function(input.data, if(c("chain") %in% colnames(input.data[[i]])) { input.data[[i]] <- subset(input.data[[i]], chain != "Multi") } - if(c("productive") %in% colnames(input.data[[i]])) { + if(c("productive") %in% colnames(input.data[[i]]) & filterNonproductive) { input.data[[i]] <- subset(input.data[[i]], productive %in% c(TRUE, "TRUE", "True", "true")) } input.data[[i]]$sample <- samples[i] @@ -95,13 +98,22 @@ combineTCR <- function(input.data, Con.df[Con.df == "NA_NA" | Con.df == "NA;NA_NA;NA"] <- NA data3 <- merge(data2[,-which(names(data2) %in% c("TCR1","TCR2"))], Con.df, by = "barcode") - if (!is.null(samples) && !is.null(ID)) { - data3 <- data3[, c("barcode", "sample", "ID", tcr1_lines, tcr2_lines, - CT_lines)] } - else if (!is.null(samples) & is.null(ID)) { - data3<-data3[,c("barcode","sample",tcr1_lines,tcr2_lines, - CT_lines)] + + columns_to_include <- c("barcode") + # Conditionally add columns based on user input + if (!is.null(samples)) { + columns_to_include <- c(columns_to_include, "sample") + } + if (!is.null(ID)) { + columns_to_include <- c(columns_to_include, "ID") } + + # Add TCR and CT lines which are presumably always needed + columns_to_include <- c(columns_to_include, tcr1_lines, tcr2_lines, CT_lines) + + # Subset the data frame based on the dynamically built list of columns + data3 <- data3[, columns_to_include] + final[[i]] <- data3 } name_vector <- character(length(samples)) @@ -158,7 +170,7 @@ combineTCR <- function(input.data, #' #' @param input.data List of filtered contig annotations or outputs from #' \code{\link{loadContigs}}. -#' @param samples The labels of samples +#' @param samples The labels of samples (required). #' @param ID The additional sample labeling (optional). #' @param call.related.clones Use the nucleotide sequence and V gene #' to call related clones. Default is set to TRUE. FALSE will return @@ -169,7 +181,10 @@ combineTCR <- function(input.data, #' @param removeMulti This will remove barcodes with greater than 2 chains. #' @param filterMulti This option will allow for the selection of the #' highest-expressing light and heavy chains, if not calling related clones. -#' @import dplyr +#' @param filterNonproductive This option will allow for the removal of +#' nonproductive chains if the variable exists in the contig data. Default +#' is set to TRUE to remove nonproductive contigs. +#' @importFrom dplyr %>% mutate #' @export #' @concept Loading_and_Processing_Contigs #' @return List of clones for individual cell barcodes @@ -180,7 +195,11 @@ combineBCR <- function(input.data, threshold = 0.85, removeNA = FALSE, removeMulti = FALSE, - filterMulti = TRUE) { + filterMulti = TRUE, + filterNonproductive = TRUE) { + if(is.null(samples)) { + stop("combineBCR() requires the samples paramter for the calculation of edit distance.") + } input.data <- .checkList(input.data) input.data <- .checkContigs(input.data) out <- NULL @@ -190,6 +209,9 @@ combineBCR <- function(input.data, for (i in seq_along(input.data)) { input.data[[i]] <- subset(input.data[[i]], chain %in% c("IGH", "IGK", "IGL")) input.data[[i]]$ID <- ID[i] + if(c("productive") %in% colnames(input.data[[i]]) & filterNonproductive) { + input.data[[i]] <- subset(input.data[[i]], productive %in% c(TRUE, "TRUE", "True", "true")) + } if (filterMulti) { # Keep IGH / IGK / IGL info in save_chain input.data[[i]]$save_chain <- input.data[[i]]$chain diff --git a/R/combineExpression.R b/R/combineExpression.R index 2aace59c..afcfebe4 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -45,9 +45,10 @@ #' @param addLabel This will add a label to the frequency header, allowing #' the user to try multiple group.by variables or recalculate frequencies after #' subsetting the data. -#' @importFrom dplyr bind_rows %>% summarise -#' @importFrom rlang %||% +#' @importFrom dplyr bind_rows %>% summarise left_join mutate select n all_of coalesce +#' @importFrom rlang %||% sym := #' @importFrom SummarizedExperiment colData<- colData +#' @importFrom S4Vectors DataFrame #' @export #' @concept SC_Functions #' @return Single-cell object with clone information added to meta data @@ -72,6 +73,11 @@ combineExpression <- function(input.data, cloneCall <- .theCall(input.data, cloneCall) if (chain != "both") { + #Retain the full clone information + full.clone <- lapply(input.data, function(x) { + x[,c("barcode", cloneCall)] + full.clone <- bind_rows(full.clone) + }) for(i in seq_along(input.data)) { input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall) } @@ -87,11 +93,12 @@ combineExpression <- function(input.data, data <- data.frame(input.data[[i]], stringsAsFactors = FALSE) data2 <- unique(data[,c("barcode", cloneCall)]) + #This ensures all calculations are based on the cells in the SCO data2 <- na.omit(data2[data2[,"barcode"] %in% cell.names,]) data2 <- data2 %>% group_by(data2[,cloneCall]) %>% - summarise(clonalProportion = n()/nrow(data2), - clonalFrequency = n()) + summarise(clonalProportion = dplyr::n()/nrow(data2), + clonalFrequency = dplyr::n()) colnames(data2)[1] <- cloneCall data <- merge(data, data2, by = cloneCall, all = TRUE) if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) { @@ -107,11 +114,12 @@ combineExpression <- function(input.data, } else if (group.by != "none" || !is.null(group.by)) { data <- data.frame(bind_rows(input.data), stringsAsFactors = FALSE) data2 <- na.omit(unique(data[,c("barcode", cloneCall, group.by)])) + #This ensures all calculations are based on the cells in the SCO data2 <- data2[data2[,"barcode"] %in% cell.names, ] data2 <- as.data.frame(data2 %>% group_by(data2[,cloneCall], data2[,group.by]) %>% - summarise(clonalProportion = n()/nrow(data2), - clonalFrequency = n()) + summarise(clonalProportion = dplyr::n()/nrow(data2), + clonalFrequency = dplyr::n()) ) colnames(data2)[c(1,2)] <- c(cloneCall, group.by) @@ -131,7 +139,6 @@ combineExpression <- function(input.data, cloneSize[length(cloneSize)] <- max(na.omit(Con.df[,"clonalFrequency"])) } - #Creating the bins for cloneSize Con.df$cloneSize <- NA for (x in seq_along(cloneSize)) { @@ -163,8 +170,18 @@ combineExpression <- function(input.data, "CTaa", "CTstrict", "clonalProportion", "clonalFrequency", "cloneSize")]) } + #Removing any duplicate barcodes, should not be an issue dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))] PreMeta <- PreMeta[PreMeta$barcode %!in% dup,] + + #Re-adding full clones + if (chain != "both") { + clone_sym <- sym(cloneCall) + PreMeta <- PreMeta %>% + left_join(full.clone, by = "barcode", suffix = c("", ".from_full_clones")) %>% + mutate(!!column_sym := coalesce(!!sym(paste0(cloneCall, ".from_full_clones")), !!column_sym)) %>% + select(-all_of(paste0(cloneCall, ".from_full_clones"))) + } barcodes <- PreMeta$barcode PreMeta <- PreMeta[,-1] rownames(PreMeta) <- barcodes @@ -187,7 +204,13 @@ combineExpression <- function(input.data, if (length(which(rownames(PreMeta) %in% rownames))/length(rownames) < 0.01) { warning(.warn_str) } - colData(sc.data) <- cbind(colData(sc.data), PreMeta[rownames,])[, union(colnames(colData(sc.data)), colnames(PreMeta))] + + combined_col_names <- unique(c(colnames(colData(sc.data)), colnames(PreMeta))) + full_data <- merge(colData(sc.data), PreMeta[rownames, , drop = FALSE], by = "row.names", all.x = TRUE) + rownames(full_data) <- full_data[, 1] + full_data <- full_data[, -1] + colData(sc.data) <- DataFrame(full_data[, combined_col_names]) + rownames(colData(sc.data)) <- rownames } if (filterNA) { diff --git a/R/exportClones.R b/R/exportClones.R index 1de69e87..1069647a 100644 --- a/R/exportClones.R +++ b/R/exportClones.R @@ -58,6 +58,7 @@ exportClones <- function(input.data, write.csv(mat, file = filepath) } +#' @importFrom dplyr %>% group_by mutate .TCRmatchExport<- function(input.data) { input.data <- .data.wrangle(input.data, NULL, "CTgene", "TRB") diff --git a/R/global.R b/R/global.R index f4bc7bd2..ad831071 100644 --- a/R/global.R +++ b/R/global.R @@ -48,5 +48,6 @@ utils::globalVariables ("mat_melt") utils::globalVariables ("position") utils::globalVariables ("se") + utils::globalVariables ("column_sym") invisible () } diff --git a/R/loadContigs.R b/R/loadContigs.R index 40c74f9a..3044bbaf 100644 --- a/R/loadContigs.R +++ b/R/loadContigs.R @@ -12,6 +12,7 @@ #' \item BD = "Contigs_AIRR.tsv" #' \item Immcantation = "data.tsv" #' \item JSON = ".json" +#' \item ParseBio = "barcode_report.tsv" #' \item MiXCR = "clones.tsv" #' \item Omniscope = ".csv" #' \item TRUST4 = "barcode_report.tsv" @@ -30,7 +31,7 @@ #' #' @param input The directory in which contigs are located or a list with contig elements #' @param format The format of the single-cell contig, currently supporting: -#' "10X", "AIRR", "BD", "JSON", "MiXCR", "Omniscope", "TRUST4", and "WAT3R" +#' "10X", "AIRR", "BD", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R" #' @importFrom utils read.csv read.delim #' @importFrom rjson fromJSON #' @export @@ -49,7 +50,8 @@ loadContigs <- function(input, "JSON" = ".json", "TRUST4" = "barcode_report.tsv", "BD" = "Contigs_AIRR.tsv", - "Omniscope" =c("_OSB.csv", "_OST.csv")) + "Omniscope" =c("_OSB.csv", "_OST.csv"), + "ParseBio" = "barcode_report.tsv") file.pattern <- format.list[[format]] contig.files <- list.files(input, paste0(file.pattern, collapse = "|"), recursive = TRUE, full.names = TRUE) @@ -77,6 +79,7 @@ loadContigs <- function(input, "WAT3R" = .parseWAT3R, "Omniscope" = .parseOmniscope, "Immcantation" = .parseImmcantation, + "ParseBio" = .parseParse, stop("Invalid format provided")) df <- loadFunc(df) @@ -119,21 +122,20 @@ loadContigs <- function(input, #' @author Kyle Romine, Nick Borcherding .parseWAT3R <- function(df) { for (i in seq_along(df)) { - colnames(df[[i]])[1] <- "barcode" df[[i]][df[[i]] == ""] <- NA - chain2 <- df[[i]][,c(1,8,9,10,4,3,7,5)] + chain2 <- df[[i]][,c("BC","TRBV","TRBD","TRBJ","TRB_CDR3nuc","TRB_CDR3","TRB_nReads","TRB_CDR3_UMIcount")] chain2 <- data.frame(chain2[,1], chain = "TRB", chain2[,2:4], c_gene = NA, chain2[,5:8]) colnames(chain2) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis") #TRA Chain 1 - chain1 <- df[[i]][,c(1,16,17,12,11,15,13)] + chain1 <- df[[i]][,c("BC","TRAV","TRAJ","TRA_CDR3nuc","TRA_CDR3","TRA_nReads","TRA_CDR3_UMIcount")] chain1 <- data.frame(chain1[,1], chain = "TRA",chain1[,2], d_gene = NA, chain1[,3], c_gene = NA, chain1[,4:7]) colnames(chain1) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis") data2 <- rbind(chain1, chain2) data2[data2 == ""] <- NA #TRA Chain 2 - chain3 <- df[[i]][,c(1,23,24,19,18,22,20)] + chain3 <- df[[i]][,c("BC","TRAV.2","TRAJ.2","TRA.2_CDR3nuc","TRA.2_CDR3","TRA.2_nReads","TRA.2_CDR3_UMIcount")] chain3 <- data.frame(chain3[,1], chain = "TRA",chain3[,2], d_gene = NA, chain3[,3], c_gene = NA, chain3[,4:7]) colnames(chain3) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis") data2 <- rbind(chain1, chain2, chain3) @@ -173,8 +175,8 @@ loadContigs <- function(input, #Loads BD AIRR .parseBD <- function(df) { for (i in seq_along(df)) { - df[[i]] <- df[[i]][,c(1,2,20,25,30, 35, 48,49,4)] - colnames(df[[i]]) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads") + df[[i]] <- df[[i]][,c("cell_id","locus","v_call","d_call","j_call", "c_call", "cdr3","cdr3_aa","consensus_count", "productive")] + colnames(df[[i]]) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "productive") df[[i]] <- df[[i]][with(df[[i]], order(reads, chain)),] } return(df) @@ -191,11 +193,11 @@ loadContigs <- function(input, .parseOmniscope <- function(df) { for (i in seq_along(df)) { if("c_call" %in% colnames(df[[i]])) { - df[[i]] <- df[[i]][,c("contig_id", "locus", "umi_count", "v_call", "d_call", "j_call", "c_call", "cdr3", "cdr3_aa")] - colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3") + df[[i]] <- df[[i]][,c("cell_id", "locus", "umi_count", "v_call", "d_call", "j_call", "c_call", "cdr3", "cdr3_aa", "productive")] + colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "productive") } else { #TCR contigs do not include C gene - df[[i]] <- df[[i]][,c("contig_id", "locus", "umi_count", "v_call", "d_call", "j_call", "cdr3", "cdr3_aa")] - colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "cdr3_nt", "cdr3") + df[[i]] <- df[[i]][,c("cell_id", "locus", "umi_count", "v_call", "d_call", "j_call", "cdr3", "cdr3_aa", "productive")] + colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "cdr3_nt", "cdr3", "productive") df[[i]][,"c_gene"] <- NA df[[i]] <- df[[i]][with(df[[i]], order(reads, chain)),] } @@ -230,14 +232,46 @@ loadContigs <- function(input, df[[i]][df[[i]] == ""] <- NA df[[i]] <- as.data.frame(df[[i]]) if("c_call" %in% colnames(df[[i]])) { - df[[i]] <- df[[i]][,c("sequence_id", "locus", "consensus_count", "v_call", "d_call", "j_call", "c_call", "cdr3", "cdr3_aa")] - colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3") + df[[i]] <- df[[i]][,c("sequence_id", "locus", "consensus_count", "v_call", "d_call", "j_call", "c_call", "cdr3", "cdr3_aa", "productive")] + colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "productive") } else { - df[[i]] <- df[[i]][,c("sequence_id", "locus", "consensus_count", "v_call", "d_call", "j_call", "cdr3", "cdr3_aa")] - colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "cdr3_nt", "cdr3") + df[[i]] <- df[[i]][,c("sequence_id", "locus", "consensus_count", "v_call", "d_call", "j_call", "cdr3", "cdr3_aa", "productive")] + colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "cdr3_nt", "cdr3", "productive") df[[i]][,"c_gene"] <- NA } df[[i]]$barcode <- str_split(df[[i]][,"barcode"], "_", simplify = TRUE)[,1] } return(df) } + +.parseParse <- function(df) { + for (i in seq_along(df)) { + df[[i]][df[[i]] == ""] <- NA + df[[i]][df[[i]] == "NaN"] <- NA + df[[i]][df[[i]] == "nan"] <- NA + df[[i]] <- as.data.frame(df[[i]]) + TRA.1 <- df[[i]][,c("Barcode", "TRA_V", "TRA_D", "TRA_J", "TRA_C", "TRA_cdr3_aa", "TRA_read_count", "TRA_transcript_count")] + TRA.2 <- df[[i]][,c("Barcode", "secondary_TRA_V", "secondary_TRA_D", "secondary_TRA_J", "secondary_TRA_C", "secondary_TRA_cdr3_aa", "secondary_TRA_read_count", "secondary_TRA_transcript_count")] + colnames(TRA.1) <- 1:8 + colnames(TRA.2) <- 1:8 + TRA <- rbind(TRA.1, TRA.2) + TRA$chain <- "TRA" + + TRB.1 <- df[[i]][,c("Barcode", "TRB_V", "TRB_D", "TRB_J", "TRB_C", "TRB_cdr3_aa", "TRB_read_count", "TRB_transcript_count")] + TRB.2 <- df[[i]][,c("Barcode", "secondary_TRB_V", "secondary_TRB_D", "secondary_TRB_J", "secondary_TRB_C", "secondary_TRB_cdr3_aa", "secondary_TRB_read_count", "secondary_TRB_transcript_count")] + colnames(TRB.1) <- 1:8 + colnames(TRB.2) <- 1:8 + TRB <- rbind(TRB.1, TRB.2) + TRB$chain <- "TRB" + + data2 <- rbind(TRA, TRB) + data2 <- data2[rowSums(is.na(data2[2:8])) != 7, ] + colnames(data2) <- c("barcode", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3", "reads", "umis", "chain") + data2$cdr3_nt <- NA + data2 <- data2[,c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis")] + + df[[i]] <- data2 + df[[i]] <- df[[i]][with(df[[i]], order(reads, chain)),] + } + return(df) +} \ No newline at end of file diff --git a/R/percentAA.R b/R/percentAA.R index 5c50e85d..ab5942c3 100644 --- a/R/percentAA.R +++ b/R/percentAA.R @@ -21,7 +21,7 @@ #' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}. #' @import ggplot2 #' @importFrom reshape2 melt -#' @importFrom dplyr mutate_at %>% +#' @importFrom dplyr mutate_at %>% mutate_if #' @export #' @concept Summarize_Repertoire #' @return ggplot of stacked bar graphs of amino acid proportions diff --git a/R/percentGenes.R b/R/percentGenes.R index 0d6b4773..01194069 100644 --- a/R/percentGenes.R +++ b/R/percentGenes.R @@ -3,7 +3,7 @@ #' This function the proportion V or J genes used by #' grouping variables. This function only quantifies #' single gene loci for indicated \strong{chain}. For -#' examining VJ pairing, please see code{\link{percentVJ}} +#' examining VJ pairing, please see \code{\link{percentVJ}}. #' #' @examples #' #Making combined contig data diff --git a/R/startracDiversity.R b/R/startracDiversity.R index 6cce6301..bfcda0d1 100644 --- a/R/startracDiversity.R +++ b/R/startracDiversity.R @@ -40,6 +40,7 @@ #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}. #' @importFrom reshape2 melt +#' @importFrom dplyr %>% mutate group_by #' @import ggplot2 #' @export #' @concept SC_Functions diff --git a/R/utils.R b/R/utils.R index 2e02dcfb..e42df836 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,6 +7,7 @@ is_seurat_or_se_object <- function(obj) { } #Use to shuffle between chains Qile: the NA handling here *might* be related to the unnamed combineTCR bugs from the new rcpp con.df construction +#' @importFrom stringr str_split #' @keywords internal #' @author Ye-Lin Son Nick Borcherding .off.the.chain <- function(dat, chain, cloneCall, check = TRUE) { @@ -240,7 +241,7 @@ is_seurat_or_se_object <- function(obj) { } #Removing extra clones in barcodes with > 2 productive contigs -#' @import dplyr +#' @importFrom dplyr group_by %>% slice_max #' @keywords internal .filteringMulti <- function(x) { x <- x %>% @@ -266,7 +267,7 @@ is_seurat_or_se_object <- function(obj) { #Filtering NA contigs out of single-cell expression object -#' @import dplyr +#' @importFrom dplyr %>% transmute #' @importFrom SingleCellExperiment colData #' @keywords internal .filteringNA <- function(sc) { @@ -288,6 +289,7 @@ is_seurat_or_se_object <- function(obj) { #Organizing list of contigs for visualization #' @keywords internal +#' @importFrom dplyr %>% group_by n summarise .parseContigs <- function(df, i, names, cloneCall) { data <- df[[i]] data1 <- data %>% @@ -349,9 +351,10 @@ is_seurat_or_se_object <- function(obj) { # but now also constructs Con.df and runs the parseTCR algorithm on it, all in Rcpp #' @author Gloria Kraus, Nick Bormann, Nicky de Vrij, Nick Borcherding, Qile Yang #' @keywords internal +#' @importFrom dplyr %>% arrange .constructConDfAndParseTCR <- function(data2) { rcppConstructConDfAndParseTCR( - data2 %>% arrange(., chain, cdr3_nt), + data2 %>% dplyr::arrange(., chain, cdr3_nt), unique(data2[[1]]) # 1 is the index of the barcode column ) } @@ -459,7 +462,7 @@ is_seurat_or_se_object <- function(obj) { #Sorting the V/D/J/C gene sequences for T and B cells #' @importFrom stringr str_c str_replace_na -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows mutate %>% #' @keywords internal .makeGenes <- function(cellType, data2, chain1, chain2) { if(cellType %in% c("T")) { @@ -544,9 +547,10 @@ is_df_or_list_of_df <- function(x) { # Calculates the normalized Levenshtein Distance between the contig # nucleotide sequence. +#' @importFrom stringr str_split str_sort #' @importFrom stringdist stringdist #' @importFrom igraph graph_from_data_frame components graph_from_edgelist -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows filter #' @keywords internal .lvCompare <- function(dictionary, gene, chain, threshold, exportGraph = FALSE) { overlap <- NULL @@ -555,68 +559,47 @@ is_df_or_list_of_df <- function(x) { dictionary$v.gene <- stringr::str_split(dictionary[,gene], "[.]", simplify = TRUE)[,1] tmp <- na.omit(unique(dictionary[,c(chain, "v.gene")])) #chunking the sequences for distance by v.gene - unique.v <- na.omit(unique(tmp$v.gene)) - edge.list <- lapply(unique.v, function(y) { - #for(y in unique.v) { - secondary.list <- list() - filtered_df <- tmp %>% filter(v.gene == y) - filtered_df <- filtered_df[!is.na(filtered_df[,chain]),] - nucleotides <- unique(filtered_df[,chain]) - if (length(nucleotides) > 1) { - chain_col_number <- 1 - nucleotide_lengths <- nchar(nucleotides) - # Pre-allocate list - list <- vector("list", length = length(nucleotides)) - - for (i in seq_len((length(nucleotides) - 1))) { - temp_list <- vector("list", length = length(nucleotides) - i) + edge.list <- lapply(str_sort(na.omit(unique(dictionary$v.gene)), numeric = T), function(v_gene) { + filtered_df <- dplyr::filter(dictionary, v.gene == v_gene) + nucleotides <- filtered_df[[chain]] + nucleotides <- sort(unique(str_split(nucleotides, ";", simplify = TRUE)[,1])) + + if (length(nucleotides) <= 1) return(NULL) + + results <- list() + # Only iterate until the second last element to avoid the issue + for (i in 1:(length(nucleotides) - 1)) { + for (j in (i + 1):length(nucleotides)) { + # Check based on length difference feasibility + if (abs(nchar(nucleotides[i]) - nchar(nucleotides[j])) > max(nchar(nucleotides[i]), nchar(nucleotides[j])) * (1 - threshold)) { + next + } - idx_i <- tmp[, chain_col_number] == nucleotides[i] & tmp[,2] == y - len_i <- nucleotide_lengths[i] + distance <- stringdist::stringdist(nucleotides[i], nucleotides[j], method = "lv") + normalized_distance <- 1 - distance / mean(c(nchar(nucleotides[i]), nchar(nucleotides[j]))) - for (j in (i + 1):length(nucleotides)) { - distance <- stringdist::stringdist(nucleotides[i], nucleotides[j], method = "lv") - distance_norm <- 1 - distance / ((len_i + nucleotide_lengths[j]) / 2) - - if (!is.na(distance_norm) & distance_norm >= threshold) { - idx_j <- tmp[, chain_col_number] == nucleotides[j] - stored_positions <- idx_j & !idx_i - - if(any(stored_positions)) { - # Store this pair in the edge list that is not the same chain - temp_list[[j - i]] <- data.frame(from = which(idx_i), - to = which(stored_positions)) - } - } + if (normalized_distance >= threshold) { + results[[length(results) + 1]] <- data.frame( + from = nucleotides[i], + to = nucleotides[j], + distance = normalized_distance + ) } - list[[i]] <- do.call(rbind, temp_list) # Collapsing all data.frames in temp_list - } - - #Remove any NULL or 0 list elements - if(length(list) > 0) { - list <- list[-which(unlist(lapply(list, is.null)))] - list <- list[lapply(list,length)>0] - list <- bind_rows(list) %>% as.data.frame() - secondary.list[[i]] <- list - } - #Remove any NULL or 0 list elements - if(length(secondary.list) > 0) { - secondary.list <- secondary.list[-which(unlist(lapply(secondary.list, is.null)))] - secondary.list <- secondary.list[lapply(secondary.list,length)>0] } } - secondary.list + + do.call(rbind, results) }) - edge.list <- bind_rows(edge.list) + + edge.list <- do.call(rbind, edge.list) + if(exportGraph) { - edge.list[,1] <- tmp[,1][edge.list[,1]] - edge.list[,2] <- tmp[,1][edge.list[,2]] - graph <- graph_from_edgelist(as.matrix(edge.list)) + graph <- graph_from_edgelist(as.matrix(edge.list)[,c(1,2)]) return(graph) } - if (nrow(edge.list) > 0) { + if (!is.null(dim(edge.list))) { edge.list <- unique(edge.list) g <- graph_from_data_frame(edge.list) components <- igraph::components(g, mode = c("weak")) @@ -626,7 +609,6 @@ is_df_or_list_of_df <- function(x) { out <- subset(out, cluster %in% filter) if(nrow(out) > 1) { out$cluster <- paste0(gene, ":Cluster", ".", out$cluster) - out$filtered <- tmp[,1][as.numeric(out$filtered)] uni_IG <- as.data.frame(unique(tmp[,1][tmp[,1] %!in% out$filtered])) } } else { diff --git a/R/vizGenes.R b/R/vizGenes.R index f846ad7d..caa0099e 100644 --- a/R/vizGenes.R +++ b/R/vizGenes.R @@ -35,7 +35,7 @@ #' @import ggplot2 #' @importFrom stringr str_split #' @importFrom stats sd -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows %>% group_by mutate ungroup summarise #' @export #' @concept Visualizing_Clones #' @return ggplot bar diagram or heatmap of gene usage diff --git a/inst/WORDLIST b/inst/WORDLIST index 4208488a..40858001 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,6 +1,7 @@ ADT AE AIRR +AUCell Andreatta Atchley Autoencoder @@ -9,6 +10,10 @@ BCRs BD Barcode BioC +CAERGSGGSYIPTF +CARKVRDSSYKLIF +CASSDPSGRQGPRWDTQYF +CASSDSGYNEQFF CASSVRRERANTGELFF CDR CMD @@ -26,13 +31,19 @@ CoNGA Codecov CombineBCR CombineExpression +Commun Contig Contigs Corvino Drs FilteringMulti Francesco +GEX GSE +GSEA +GSEABase +GSVA +GeneSetCollection Gini Github Hyperexpanded @@ -54,25 +65,31 @@ Mazziotta McPAS MiXCR Morisita +Morisita's Multiomic +NG +Nebulosa Noonan OHE Omniscope PIRD PMID +ParseBio Pielou -Pre +Preprocessed ProjectTIL Readded Rebasing Rebumping -RenameCells Repo SCE +STARTRAC Satija's +SeuratObject SingleCellExperiment Startrac StartracDiversity +Steric TCR TCRA TCRB @@ -87,13 +104,17 @@ TRD TRG TUST Trex +UCell UMAP VAE VDJ VDJC VDJdb +VHSE Vgene +Vishwakarma VizGene +Voigt WAT WNN Zhang @@ -106,13 +127,13 @@ al alluvialClones alluvialClonotype alluvialClonotypes +asy autoencoder autoencoding barcode barcodes bioconductor bronchoalveolar -cav cdr chao checkBlanks @@ -124,6 +145,7 @@ clonalBias clonalCluster clonalCompare clonalDiversity +clonalFrequency clonalHomeostasis clonalLength clonalNetwork @@ -145,11 +167,9 @@ clonotype clonotypeBias clonotypeSizeDistribution clonotypes -clonotypic clusterTCR colData coldata -colorectal combineBCR combineContig combineExpression @@ -167,6 +187,7 @@ convolutional createHTOContigList csv customizable +densityEnrichment dev df dir @@ -175,6 +196,7 @@ downsample downsampling epitope epitopes +erroring et expa explainability @@ -182,11 +204,14 @@ exportClones exportGraph exportTable filterMulti +filterNonproductive filteringMulti +formated frac gd getCirclize getCoord +geyserEnrichment ggalluvial ggdendrogram ggfittext @@ -199,7 +224,8 @@ grey greyed groupBy hammingCompare -hcl +heatmapEnrichment +hexbin highlightClones highlightClonotypes hypermutation @@ -207,13 +233,16 @@ iNEXT iedb igraph incongruent +ingle inv io jaccard +jk json kmer kmers lapply +latform lengthContigs lengthDF ln @@ -229,7 +258,10 @@ migr migra morisita multisystem +nFeature na +nalysis +nrichment nt nucleotides occupiedClonotype @@ -238,32 +270,44 @@ occupiedscRepertoire parseAIRR parseBCR parseTCR +pca percentAA percentGenes percentKmer percentVJ +performNormalization +positionalEntropy +positionalProperty powerTCR quantContig -quantifications rda +rder readRDS +reclustering regressClonotype relevel removeMulti removeNA repo +ridgeEnrichment +runEscape runIbex +runPCA runTrex sc scRep -scRepetoire scater +scater's scatterClonotype scatterClonotypes +scatterEnrichment screp seurat shannon simpson +splitEnrichment +ssGSEA +stScales startrac str stripBarcodes @@ -271,6 +315,7 @@ subsetClones subtype subtypes summarise +tScales testthat theCall trackable diff --git a/man/clonalCluster.Rd b/man/clonalCluster.Rd index 2c5d167d..72af6aca 100644 --- a/man/clonalCluster.Rd +++ b/man/clonalCluster.Rd @@ -47,7 +47,9 @@ sequences of the CDR3 and V genes to cluster similar TCR/BCRs together. As a default, the function takes the input from \code{\link{combineTCR}}, \code{\link{combineBCR}} or \code{\link{combineExpression}} and amends a cluster to the data frame or meta data. If \strong{exportGraph} is set -to TRUE, the function returns an igraph object of the connected sequences. +to TRUE, the function returns an igraph object of the connected sequences. +If multiple sequences per chain are present, this function only compares +the first sequence. } \examples{ # Getting the combined contigs diff --git a/man/clonalNetwork.Rd b/man/clonalNetwork.Rd index 95d0d950..7b2be773 100644 --- a/man/clonalNetwork.Rd +++ b/man/clonalNetwork.Rd @@ -14,8 +14,8 @@ clonalNetwork( filter.graph = FALSE, cloneCall = "strict", chain = "both", - exportTable = FALSE, exportClones = FALSE, + exportTable = FALSE, palette = "inferno" ) } @@ -48,13 +48,12 @@ in the data.} \item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} -\item{exportTable}{Exports a table of the data into the global -environment in addition to the visualization.} - \item{exportClones}{Exports a table of clones that are shared across multiple identity groups and ordered by the total number of clone copies.} +\item{exportTable}{Exports a table of the data into the global} + \item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.} } diff --git a/man/clonalOverlay.Rd b/man/clonalOverlay.Rd index 285e686e..6e0810dd 100644 --- a/man/clonalOverlay.Rd +++ b/man/clonalOverlay.Rd @@ -7,7 +7,8 @@ clonalOverlay( sc.data, reduction = NULL, - freq.cutpoint = 30, + cut.category = "clonalFrequency", + cutpoint = 30, bins = 25, facet.by = NULL ) @@ -15,10 +16,13 @@ clonalOverlay( \arguments{ \item{sc.data}{The single-cell object after \code{\link{combineExpression}}.} -\item{reduction}{The dimensional reduction to visualize} +\item{reduction}{The dimensional reduction to visualize.} -\item{freq.cutpoint}{The overlay cut point to include, this corresponds to the -Frequency variable in the single-cell object} +\item{cut.category}{Meta data variable of the single-cell object to use for +filtering.} + +\item{cutpoint}{The overlay cut point to include, this corresponds to the +cut.category variable in the meta data of the single-cell object.} \item{bins}{The number of contours to the overlay} @@ -48,7 +52,7 @@ scRep_example <- combineExpression(combined, #Using clonalOverlay() clonalOverlay(scRep_example, reduction = "umap", - freq.cutpoint = 0.3, + cutpoint = 3, bins = 5) } diff --git a/man/clonalRarefaction.Rd b/man/clonalRarefaction.Rd index 0b1b49ee..9ca6d458 100644 --- a/man/clonalRarefaction.Rd +++ b/man/clonalRarefaction.Rd @@ -37,8 +37,8 @@ coverage-based rarefaction/extrapolation curve (\code{type = 3}).} \item{hill.numbers}{The Hill numbers to be plotted out (0 - species richness, 1 - Shannon, 2 - Simpson)} -\item{n.boots}{The number of bootstraps to downsample in o -rder to get mean diversity.} +\item{n.boots}{The number of bootstraps to downsample in order +to get mean diversity.} \item{exportTable}{Exports a table of the data into the global environment in addition to the visualization.} diff --git a/man/combineBCR.Rd b/man/combineBCR.Rd index 74909e16..073d4f80 100644 --- a/man/combineBCR.Rd +++ b/man/combineBCR.Rd @@ -12,14 +12,15 @@ combineBCR( threshold = 0.85, removeNA = FALSE, removeMulti = FALSE, - filterMulti = TRUE + filterMulti = TRUE, + filterNonproductive = TRUE ) } \arguments{ \item{input.data}{List of filtered contig annotations or outputs from \code{\link{loadContigs}}.} -\item{samples}{The labels of samples} +\item{samples}{The labels of samples (required).} \item{ID}{The additional sample labeling (optional).} @@ -36,6 +37,10 @@ the number the more similarity of sequence will be used for clustering.} \item{filterMulti}{This option will allow for the selection of the highest-expressing light and heavy chains, if not calling related clones.} + +\item{filterNonproductive}{This option will allow for the removal of +nonproductive chains if the variable exists in the contig data. Default +is set to TRUE to remove nonproductive contigs.} } \value{ List of clones for individual cell barcodes diff --git a/man/combineTCR.Rd b/man/combineTCR.Rd index 8f10161e..4702c537 100644 --- a/man/combineTCR.Rd +++ b/man/combineTCR.Rd @@ -10,7 +10,8 @@ combineTCR( ID = NULL, removeNA = FALSE, removeMulti = FALSE, - filterMulti = FALSE + filterMulti = FALSE, + filterNonproductive = TRUE ) } \arguments{ @@ -27,6 +28,10 @@ outputs from \code{\link{loadContigs}}.} \item{filterMulti}{This option will allow for the selection of the 2 corresponding chains with the highest expression for a single barcode.} + +\item{filterNonproductive}{This option will allow for the removal of +nonproductive chains if the variable exists in the contig data. Default +is set to TRUE to remove nonproductive contigs.} } \value{ List of clones for individual cell barcodes diff --git a/man/loadContigs.Rd b/man/loadContigs.Rd index 0135382e..740c0059 100644 --- a/man/loadContigs.Rd +++ b/man/loadContigs.Rd @@ -10,7 +10,7 @@ loadContigs(input, format = "10X") \item{input}{The directory in which contigs are located or a list with contig elements} \item{format}{The format of the single-cell contig, currently supporting: -"10X", "AIRR", "BD", "JSON", "MiXCR", "Omniscope", "TRUST4", and "WAT3R"} +"10X", "AIRR", "BD", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R"} } \value{ List of contigs for compatibility with \code{\link{combineTCR}} or @@ -30,6 +30,7 @@ The files that this function parses includes: \item BD = "Contigs_AIRR.tsv" \item Immcantation = "data.tsv" \item JSON = ".json" + \item ParseBio = "barcode_report.tsv" \item MiXCR = "clones.tsv" \item Omniscope = ".csv" \item TRUST4 = "barcode_report.tsv" diff --git a/man/percentGenes.Rd b/man/percentGenes.Rd index 579af053..1c7d3536 100644 --- a/man/percentGenes.Rd +++ b/man/percentGenes.Rd @@ -35,7 +35,7 @@ ggplot of percentage of indicated genes as a heatmap This function the proportion V or J genes used by grouping variables. This function only quantifies single gene loci for indicated \strong{chain}. For -examining VJ pairing, please see code{\link{percentVJ}} +examining VJ pairing, please see \code{\link{percentVJ}}. } \examples{ #Making combined contig data diff --git a/tests/testthat/_snaps/clonalAbundance/clonalabundance-scaled-plot.svg b/tests/testthat/_snaps/clonalAbundance/clonalabundance-scaled-plot.svg index 7b142e41..da00c7c8 100644 --- a/tests/testthat/_snaps/clonalAbundance/clonalabundance-scaled-plot.svg +++ b/tests/testthat/_snaps/clonalAbundance/clonalabundance-scaled-plot.svg @@ -27,22 +27,22 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/clonalLength/clonallength-scaled-plot.svg b/tests/testthat/_snaps/clonalLength/clonallength-scaled-plot.svg index 49c7dfee..d7b0618e 100644 --- a/tests/testthat/_snaps/clonalLength/clonallength-scaled-plot.svg +++ b/tests/testthat/_snaps/clonalLength/clonallength-scaled-plot.svg @@ -27,22 +27,22 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/clonalOccupy/clonaloccupy-default-plot.svg b/tests/testthat/_snaps/clonalOccupy/clonaloccupy-default-plot.svg index 3cb0c794..c04c8b9c 100644 --- a/tests/testthat/_snaps/clonalOccupy/clonaloccupy-default-plot.svg +++ b/tests/testthat/_snaps/clonalOccupy/clonaloccupy-default-plot.svg @@ -27,64 +27,64 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -1 -11 -1 -3 -34 -18 -34 -18 -23 -26 -25 -19 -5 -16 -4 -1 -12 -4 -6 -1 -41 -27 -5 -8 -11 -3 -1 -5 -2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +34 +41 +18 +27 +1 +34 +5 +11 +18 +8 +23 +11 +26 +3 +1 +25 +1 +19 +5 +5 +3 +16 +4 +1 +12 +4 +6 +1 +2 @@ -97,14 +97,38 @@ - - - - -0 -5 -10 -15 + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 Single Cells cloneSize diff --git a/tests/testthat/_snaps/clonalOccupy/clonaloccupy-proportion-plot.svg b/tests/testthat/_snaps/clonalOccupy/clonaloccupy-proportion-plot.svg index e1ec1560..635b1f4a 100644 --- a/tests/testthat/_snaps/clonalOccupy/clonaloccupy-proportion-plot.svg +++ b/tests/testthat/_snaps/clonalOccupy/clonaloccupy-proportion-plot.svg @@ -27,35 +27,35 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -70,14 +70,38 @@ - - - - -0 -5 -10 -15 + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 Proportion of Cells cloneSize diff --git a/tests/testthat/_snaps/clonalOverlap/clonaloverlap-reorder-plot.svg b/tests/testthat/_snaps/clonalOverlap/clonaloverlap-reorder-plot.svg new file mode 100644 index 00000000..bc31beaa --- /dev/null +++ b/tests/testthat/_snaps/clonalOverlap/clonaloverlap-reorder-plot.svg @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +26 +0 +0 +0 +0 +85 +0 +0 +0 +0 +0 +0 +0 +0 +23 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +251 + + + +P18B +P18L +P17B +P17L +P20B +P20L +P19B +P19L + + + + + + + + + + + + + + + + + +P18B +P18L +P17B +P17L +P20B +P20L +P19B +P19L + +Raw + + + + + + + + + + + + + +0 +50 +100 +150 +200 +250 +clonalOverlap_reorder_plot + + diff --git a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.svg b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.svg new file mode 100644 index 00000000..ff6a0981 --- /dev/null +++ b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.svg @@ -0,0 +1,654 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +P19 + + + + + + + + + + +P20 + + + + + + + + + + +P17 + + + + + + + + + + +P18 + + + + + + + +-4 +0 +4 +8 + + + + + +-4 +0 +4 +8 + +-5 +0 +5 + + + + +-5 +0 +5 + + + +Dimension 1 +Dimension 2 +clonalOverlay_clonalProportion_plot + + diff --git a/tests/testthat/test-clonalOverlap.R b/tests/testthat/test-clonalOverlap.R index 8ec84691..79ff61e3 100644 --- a/tests/testthat/test-clonalOverlap.R +++ b/tests/testthat/test-clonalOverlap.R @@ -11,6 +11,13 @@ test_that("clonalOverlap works", { method = "raw") ) + expect_doppelganger( # warning from testthat: Removed 36 rows containing missing values (`geom_text()`). + "clonalOverlap_reorder_plot", + clonalOverlap( + combined[c(3,4,1,2,7,8,5,6)], + method = "raw") + ) + expect_doppelganger( # warning from testthat: Removed 36 rows containing missing values (`geom_text()`). "clonalOverlap_morisita_plot", clonalOverlap( diff --git a/tests/testthat/test-clonalOverlay.R b/tests/testthat/test-clonalOverlay.R index b341a7e4..fbff6c48 100644 --- a/tests/testthat/test-clonalOverlay.R +++ b/tests/testthat/test-clonalOverlay.R @@ -11,10 +11,22 @@ test_that("clonalOverlay works", { "clonalOverlay_plot", clonalOverlay(test_obj, reduction = "umap", - freq.cutpoint = 1, + cutpoint = 1, bins = 10, facet.by = "Patient") + guides(color = "none") ) + set.seed(42) + expect_doppelganger( + "clonalOverlay_clonalProportion_plot", + clonalOverlay(test_obj, + reduction = "umap", + cut.category = "clonalProportion", + cutpoint = 0.2, + bins = 10, + facet.by = "Patient") + + guides(color = "none") + ) + }) diff --git a/tests/testthat/test-combineContigs.R b/tests/testthat/test-combineContigs.R index af7bc490..0a9c5b73 100644 --- a/tests/testthat/test-combineContigs.R +++ b/tests/testthat/test-combineContigs.R @@ -33,16 +33,26 @@ test_that("combineTCR works", { ) expect_identical(trial4, getdata("combineContigs", "combineTCR_list_removeNA")) + + trial5 <- combineTCR( + input.data = contig_list[1:2], + samples = c("P17B", "P17L"), + filterNonproductive = FALSE + ) + + expect_identical(trial5, getdata("combineContigs", "combineTCR_list_nonproductive")) }) # TODO combineTCR & combineBCR (need more edge cases, different args, errors, etc.) +# TODO combineTCR for non-10x formats test_that("combineBCR works", { BCR <- read.csv("https://www.borch.dev/uploads/contigs/b_contigs.csv") - trial1 <- combineBCR(BCR, + bcr.trial1 <- combineBCR(BCR, samples = "Patient1") + bcr.trial1[[1]] <- bcr.trial1[[1]][order(bcr.trial1[[1]]$barcode),] - expect_identical(trial1, getdata("combineContigs", "combineBCR_list_expected")) + expect_identical(bcr.trial1, getdata("combineContigs", "combineBCR_list_expected")) }) diff --git a/tests/testthat/test-loadContigs.R b/tests/testthat/test-loadContigs.R index a538d72f..061e14ab 100644 --- a/tests/testthat/test-loadContigs.R +++ b/tests/testthat/test-loadContigs.R @@ -39,12 +39,18 @@ test_that("loadContigs works", { getdata("load", "loadContigs_Immcantation") ) - OS <- read.csv("https://www.borch.dev/uploads/contigs/OS_contigs.csv") + OS <- read.csv("https://www.borch.dev/uploads/contigs/OS_contigs2.csv") trial7 <- loadContigs(OS, format = "Omniscope") expect_identical(trial7, getdata("load", "loadContigs_Omniscope") ) + + Parse <- read.csv("https://www.borch.dev/uploads/contigs/Parse_contigs.csv") + trial8 <- loadContigs(Parse, format = "ParseBio") + expect_identical(trial8, + getdata("load", "loadContigs_Parse") + ) }) -# TODO Add tests for .json and AIRR and OS +# TODO Add tests for .json and AIRR # TODO Would be nice to have a dir option diff --git a/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds b/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds index f5d04a7f..2bceb237 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds and b/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds differ diff --git a/tests/testthat/testdata/clustering/clonalCluster_IGHaa_data.rds b/tests/testthat/testdata/clustering/clonalCluster_IGHaa_data.rds index 09fc009e..1d66bf6d 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_IGHaa_data.rds and b/tests/testthat/testdata/clustering/clonalCluster_IGHaa_data.rds differ diff --git a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds index f8dc1631..a409c1cb 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds and b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds differ diff --git a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds index 528720e9..0af9addb 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds and b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds differ diff --git a/tests/testthat/testdata/combineContigs/combineBCR_list_expected.rds b/tests/testthat/testdata/combineContigs/combineBCR_list_expected.rds index 0cd8b247..82d18f30 100644 Binary files a/tests/testthat/testdata/combineContigs/combineBCR_list_expected.rds and b/tests/testthat/testdata/combineContigs/combineBCR_list_expected.rds differ diff --git a/tests/testthat/testdata/combineContigs/combineTCR_list_nonproductive.rds b/tests/testthat/testdata/combineContigs/combineTCR_list_nonproductive.rds new file mode 100644 index 00000000..ef32ed71 Binary files /dev/null and b/tests/testthat/testdata/combineContigs/combineTCR_list_nonproductive.rds differ diff --git a/tests/testthat/testdata/load/loadContigs_BD.rds b/tests/testthat/testdata/load/loadContigs_BD.rds index bef773d5..fdbaceee 100644 Binary files a/tests/testthat/testdata/load/loadContigs_BD.rds and b/tests/testthat/testdata/load/loadContigs_BD.rds differ diff --git a/tests/testthat/testdata/load/loadContigs_Immcantation.rds b/tests/testthat/testdata/load/loadContigs_Immcantation.rds index 920e03d4..4ea67ef1 100644 Binary files a/tests/testthat/testdata/load/loadContigs_Immcantation.rds and b/tests/testthat/testdata/load/loadContigs_Immcantation.rds differ diff --git a/tests/testthat/testdata/load/loadContigs_Omniscope.rds b/tests/testthat/testdata/load/loadContigs_Omniscope.rds index 747e8100..70d5a149 100644 Binary files a/tests/testthat/testdata/load/loadContigs_Omniscope.rds and b/tests/testthat/testdata/load/loadContigs_Omniscope.rds differ diff --git a/tests/testthat/testdata/load/loadContigs_Parse.rds b/tests/testthat/testdata/load/loadContigs_Parse.rds new file mode 100644 index 00000000..c7b5e220 Binary files /dev/null and b/tests/testthat/testdata/load/loadContigs_Parse.rds differ diff --git a/tests/testthat/testdata/seuratFunctions/clonalOccupy_exportTable.rds b/tests/testthat/testdata/seuratFunctions/clonalOccupy_exportTable.rds index 529524e6..c0d7d65a 100644 Binary files a/tests/testthat/testdata/seuratFunctions/clonalOccupy_exportTable.rds and b/tests/testthat/testdata/seuratFunctions/clonalOccupy_exportTable.rds differ diff --git a/vignettes/articles/Repertoire_Summary.Rmd b/vignettes/articles/Repertoire_Summary.Rmd index 91d724c0..4b246883 100644 --- a/vignettes/articles/Repertoire_Summary.Rmd +++ b/vignettes/articles/Repertoire_Summary.Rmd @@ -69,6 +69,7 @@ positionalEntropy(combined.TCR, chain = "TRB", aa.length = 20) ``` + ## positionalProperty Like ```positionalEntropy()```, we can also examine a series of amino acid properties along the cdr3 sequences using ```positionalProperty()```. Important differences from the above function for ```positionalProperty()``` is dropping NA values as they would void the mean calculation. ```positionalProperty()``` also display a ribbon with the 95% confidence interval surrounding the mean value for the selected properties. diff --git a/vignettes/articles/Running_Escape.Rmd b/vignettes/articles/Running_Escape.Rmd index 368da6c4..32390e7d 100644 --- a/vignettes/articles/Running_Escape.Rmd +++ b/vignettes/articles/Running_Escape.Rmd @@ -297,7 +297,7 @@ Most of the visualizations in *escape* have a defined set of parameters. **scale** * **TRUE** - z-transform the enrichment values. -* **FALSE** - leave raw values (**DEFAULT**). +* **FALSE** - leave raw values (**default**). In addition, ```heatmapEnrichment()``` allows for the reclustering of rows and columns using Euclidean distance of the enrichment scores and the Ward2 methods for clustering using **cluster.rows** and **cluster.columns**. @@ -311,7 +311,7 @@ heatmapEnrichment(scRep_example, cluster.columns = TRUE) ``` -Each visualization has an additional argument called **palette that supplies the coloring scheme to be used - available color palettes can be viewed with ```hcl.pals()```. +Each visualization has an additional argument called **palette** that supplies the coloring scheme to be used - available color palettes can be viewed with ```hcl.pals()```. ```{r} hcl.pals() diff --git a/vignettes/articles/SC_Visualizations.Rmd b/vignettes/articles/SC_Visualizations.Rmd index 07fac896..d9b8aea0 100644 --- a/vignettes/articles/SC_Visualizations.Rmd +++ b/vignettes/articles/SC_Visualizations.Rmd @@ -69,10 +69,13 @@ Using the dimensional reduction graphs as a reference, we can also generate an o * The dimensional reduction for the visualization, **(default = "pca")** -**freq.cutpoint** +**cut.category** -* lowest clonal frequency or proportion to generate the contour plot +* Use of "clonalFrequency" or "clonalProportion" to filter the overlay + +**cutpoint** +* lowest clonal frequency or proportion to generate the contour plot **bins** @@ -85,7 +88,7 @@ This visualization was authored by Dr. Francesco Mazziotta and inspired by Drs. ```{r tidy = FALSE} clonalOverlay(scRep_example, reduction = "umap", - freq.cutpoint = 1, + cutpoint = 1, bins = 10, facet.by = "Patient") + guides(color = "none") @@ -126,7 +129,6 @@ clonalNetwork(scRep_example, filter.clones = NULL, filter.identity = NULL, cloneCall = "aa") - ``` We can look at the clonal relationships relative to a single cluster using the **filter.identity** parameter. @@ -182,9 +184,9 @@ clonalOccupy(scRep_example, x.axis = "seurat_clusters") clonalOccupy(scRep_example, - x.axis = "ident", - proportion = TRUE, - label = FALSE) + x.axis = "ident", + proportion = TRUE, + label = FALSE) ``` ## alluvialClones diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 92541694..7924c76e 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -877,10 +877,13 @@ Using the dimensional reduction graphs as a reference, we can also generate an o * The dimensional reduction for the visualization, **(default = "pca")** -**freq.cutpoint** +**cut.category** -* lowest clonal frequency or proportion to generate the contour plot +* Use of "clonalFrequency" or "clonalProportion" to filter the overlay + +**cutpoint** +* lowest clonal frequency or proportion to generate the contour plot **bins** @@ -896,7 +899,7 @@ scRep_example$Patient <- substr(scRep_example$orig.ident, 1,3) clonalOverlay(scRep_example, reduction = "umap", - freq.cutpoint = 1, + cutpoint = 1, bins = 10, facet.by = "Patient") + guides(color = "none") @@ -993,9 +996,9 @@ clonalOccupy(scRep_example, x.axis = "seurat_clusters") clonalOccupy(scRep_example, - x.axis = "ident", - proportion = TRUE, - label = FALSE) + x.axis = "ident", + proportion = TRUE, + label = FALSE) ``` ## alluvialClones