Skip to content

Commit

Permalink
Merge pull request #87 from calico/issue-86
Browse files Browse the repository at this point in the history
Issue 86 - added sample malahanobis distance fxn based on svd lowD & allow for diff input in get_design_tbl()
  • Loading branch information
shackett authored Sep 9, 2024
2 parents 51a055a + 5cc2f44 commit fbdd47a
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: romic
Type: Package
Title: R for High-Dimensional Omic Data
Version: 1.2.2
Version: 1.2.3
Authors@R: c(
person(
given = "Sean",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(add_pcs)
export(app_flow)
export(app_heatmap)
export(app_pcs)
export(calculate_sample_mahalanobis_distances)
export(center_tomic)
export(check_design)
export(check_tomic)
Expand Down
16 changes: 13 additions & 3 deletions R/design.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,25 @@
#'
#' Get a tabular summary of all variables.
#'
#' @inheritParams tomic_to
#' @param tomic_or_design Either a \code{tomic} object or its embedded design list
#'
#' @returns a tibble reflecting the \code{tomic} object's design.
#'
#' @examples
#' get_design_tbl(brauer_2008_triple)
#' get_design_tbl(brauer_2008_triple$design)
#'
#' @export
get_design_tbl <- function(tomic) {
tomic$design[c("features", "samples", "measurements")] %>%
get_design_tbl <- function(tomic_or_design) {

if (inherits(tomic_or_design, "tomic")) {
design <- tomic_or_design$design
} else {
check_design(tomic_or_design)
design <- tomic_or_design
}

design[c("features", "samples", "measurements")] %>%
{
purrr::map2(unname(.), names(.), function(x, y) {
x %>%
Expand Down
74 changes: 74 additions & 0 deletions R/dim_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,77 @@ find_triple_omic_missing_values <- function(

return(output)
}

#' Calculate Sample Mahalanobis Distances
#'
#' Determine each samples distance from the center of the data using Mahalanobis distance.
#'
#' @inheritParams tomic_to
#' @param value_var the measurement variable to use for calculating distances
#' @param max_pcs the maximum number of principal components to used for
#' representing the covariance matrix.
#' @param scale if TRUE then the data will be scaled before calculating distances
#'
#' @returns The samples tibble with a new column `pc_distance` which contains the
#' Mahalanobis distances of individual samples from the PC elipsoid
#'
#' @details
#' Since `romic` is built around using tall data where there are more features than
#' samples calculating Mahalanobis distance off of the covariance matrix is not
#' possible. Instead, we use SVD to create a low-dimensional representation of the
#' covariance matrix and calculate distances from the center of the data in this
#' space. This essentially involves weighting the principal components by their
#' loadings.
#'
#' @examples
#' calculate_sample_mahalanobis_distances(brauer_2008_tidy)
#' @export
calculate_sample_mahalanobis_distances <- function (
tomic,
value_var = NULL,
max_pcs = 10,
scale = FALSE
) {

checkmate::assertClass(tomic, "tomic")
value_var <- value_var_handler(value_var, tomic$design)
checkmate::assertIntegerish(max_pcs, len = 1)

n_features <- nrow(romic::get_tomic_table(tomic, "features"))
n_samples <- nrow(romic::get_tomic_table(tomic, "samples"))
npcs <- pmin(max_pcs, n_features, n_samples)

X_std <- tomic %>%
# remove features with missing values
romic::remove_missing_values(value_var, "drop_features", verbose = FALSE) %>%
# convert to a matrix
romic::tomic_to_matrix(value_var) %>%
{scale(t(.), scale = scale) %>% t()}

# if X is constant for a feature (such all reads are 0) then
# scaling will put it to NaN. We need to remove these features
valid_X_std <- X_std[rowSums(is.nan(X_std)) == 0,]

# perform a singular value decomposition
svd_results <- svd(valid_X_std, nv = npcs)

# re-weight principal components by eigenvalues
X_std_v <- svd_results$v * matrix(svd_results$d[1:npcs], ncol = npcs, nrow = ncol(valid_X_std), byrow = TRUE)
rownames(X_std_v) <- colnames(valid_X_std)

pc_distances_by_sample <- tibble::tibble(
!!rlang::sym(tomic$design$sample_pk) := rownames(X_std_v),
pc_distance = rowSums(X_std_v^2)
) %>%
dplyr::arrange(dplyr::desc(pc_distance))

# validate calculation
varex_leading_components <- sum(svd_results$d^2) * sum((svd_results$d^2 / sum(svd_results$d^2))[1:npcs])

stopifnot((sum(pc_distances_by_sample$pc_distance) - varex_leading_components)/varex_leading_components < 0.01)

samples_w_pc_distances <- romic::get_tomic_table(tomic, "samples") %>%
dplyr::left_join(pc_distances_by_sample, by = tomic$design$sample_pk)

return(samples_w_pc_distances)
}
8 changes: 5 additions & 3 deletions R/mutates.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ center <- function(x) {
#' select(-DR)
#' new_variable_tables <- c("new_sample_var" = "samples")
#' @export
update_tidy_omic <- function(tidy_omic,
updated_tidy_data,
new_variable_tables = c()) {
update_tidy_omic <- function(
tidy_omic,
updated_tidy_data,
new_variable_tables = c()
) {
checkmate::assertClass(tidy_omic, "tomic")
checkmate::assertClass(tidy_omic, "tidy_omic")
checkmate::assertDataFrame(updated_tidy_data)
Expand Down
1 change: 1 addition & 0 deletions R/romic-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ utils::globalVariables(c(
"sample_label",
"ordered_sampleId",
"orderedId",
"pc_distance",
"valid_tables",
"NAME",
"name",
Expand Down
41 changes: 41 additions & 0 deletions man/calculate_sample_mahalanobis_distances.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/get_design_tbl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit fbdd47a

Please sign in to comment.