Skip to content

Commit

Permalink
more tests of hclust
Browse files Browse the repository at this point in the history
  • Loading branch information
shackett committed Sep 24, 2024
1 parent bfc54eb commit 205ab5a
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 29 deletions.
38 changes: 21 additions & 17 deletions R/hclust.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
hclust_tidy_omic <- function(
tidy_omic,
feature_var,
sample_var,
value_var,
cluster_dim,
distance_measure = "dist",
hclust_method = "ward.D2") {
tidy_omic,
feature_var,
sample_var,
value_var,
cluster_dim,
distance_measure = "dist",
hclust_method = "ward.D2"
) {

check_tidy_omic(tidy_omic)

checkmate::assertChoice(feature_var, tidy_omic$design$features$variable)
Expand Down Expand Up @@ -37,7 +39,7 @@ hclust_tidy_omic <- function(
cluster_orders$columns <-
coerce_to_classes(
cluster_orders$columns,
tidy_omic$data[[tidy_omic$design$feature_pk]]
tidy_omic$data[[tidy_omic$design$sample_pk]]
)

# order rows and columns
Expand All @@ -49,7 +51,7 @@ hclust_tidy_omic <- function(
)

if (cluster_dim == "columns") {
# order by factor or alpha-numerically
# order features by factor or alpha-numerically

if (
any(class(distinct_features[[feature_var]]) %in% c("factor", "ordered"))
Expand Down Expand Up @@ -93,7 +95,7 @@ hclust_tidy_omic <- function(
)

if (cluster_dim == "rows") {
# order by factor or alpha-numerically
# order samples by factor or alpha-numerically

if (any(class(distinct_samples[[sample_var]]) %in% c("factor", "ordered"))) {
# retain previous ordering
Expand Down Expand Up @@ -208,13 +210,15 @@ hclust_tidy_omic <- function(
#' hclust_order(df, "letters", "numbers", "noise", "rows")
#' @export
hclust_order <- function(
df,
feature_pk,
sample_pk,
value_var,
cluster_dim,
distance_measure = "dist",
hclust_method = "ward.D2") {
df,
feature_pk,
sample_pk,
value_var,
cluster_dim,
distance_measure = "dist",
hclust_method = "ward.D2"
) {

checkmate::assertDataFrame(df)
checkmate::assertChoice(feature_pk, colnames(df))
checkmate::assertChoice(sample_pk, colnames(df))
Expand Down
15 changes: 7 additions & 8 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,18 @@ format_names_for_plotting <- function(chars, width = 40, truncate_at = 80) {
}

coerce_to_classes <- function(obj, reference_obj) {
reference_obj_class <- class(reference_obj)
reference_obj_class <- class(reference_obj)[1]

if (any(reference_obj_class %in% "glue")) {
out <- glue::as_glue(obj)
} else if (any(reference_obj_class %in% c("factor", "ordered"))) {
out <-
do.call(
reference_obj_class,
list(
x = obj,
levels = levels(reference_obj)
)
out <- do.call(
reference_obj_class,
list(
x = obj,
levels = levels(reference_obj)
)
)
} else if (reference_obj_class == "character") {
out <- as.character(obj)
} else if (reference_obj_class == "numeric") {
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ three_col_df <- tidyr:::expand_grid(
samples = 1:10
) %>%
dplyr::mutate(
measurement = 1
measurement = 1:100
)

simple_tidy <- create_tidy_omic(
Expand Down
3 changes: 1 addition & 2 deletions tests/testthat/test-data_classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,9 @@ test_that("Test check_triple_omic edge cases", {

expect_error(
check_triple_omic(simple_triple_class_inconsistency_samples),
"sample_id classes differ between the features"
"sample_id classes differ between the samples"
)


# degenerate entries
nonunique_feature_ids <- simple_triple
nonunique_feature_ids$features <- dplyr::bind_rows(
Expand Down
177 changes: 177 additions & 0 deletions tests/testthat/test-hclust.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,180 @@ test_that("downsampling features (for creating a heatmap works)", {
expect_equal(nrow(downsampled_df), 3600)
expect_equal(length(unique(downsampled_df$name)), 100)
})

test_that("hclust_tidy_omic() tests all logic branches", {

simple_tidy_w_factors <- simple_tidy
simple_tidy_w_factors$data <- simple_tidy_w_factors$data %>%
dplyr::mutate(
features = factor(features),
samples = ordered(samples)
) %>%
# shuffle so we can test ordering
dplyr::sample_frac(1)

hclust_w_fct_coercion <- hclust_tidy_omic(
simple_tidy_w_factors,
feature_var = simple_tidy_w_factors$design$feature_pk,
sample_var = simple_tidy_w_factors$design$sample_pk,
value_var = "measurement",
cluster_dim = "both"
)

expect_s3_class(
hclust_w_fct_coercion$data$features,
"factor"
)

expect_s3_class(
hclust_w_fct_coercion$data$samples,
"ordered"
)

# catch corner cases

expect_error(
hclust_tidy_omic(
simple_tidy_w_factors,
feature_var = "features",
sample_var = "samples",
value_var = "features",
cluster_dim = "both"
),
"feature_pk, sample_pk, and value_var must all be different"
)

# preserve default feature ordering
hclust_w_default_feature_orders <- hclust_tidy_omic(
simple_tidy_w_factors,
feature_var = simple_tidy_w_factors$design$feature_pk,
sample_var = simple_tidy_w_factors$design$sample_pk,
value_var = "measurement",
cluster_dim = "columns"
)

expect_equal(
class(hclust_w_default_feature_orders$data$features),
class(hclust_w_default_feature_orders$data$ordered_featureId)
)

# factor ordering defined by original orders
expect_equal(
as.character(levels(hclust_w_default_feature_orders$data$features)),
levels(hclust_w_default_feature_orders$data$ordered_featureId)
)

# preserve default sample orders
hclust_w_default_sample_orders <- hclust_tidy_omic(
simple_tidy_w_factors,
feature_var = simple_tidy_w_factors$design$feature_pk,
sample_var = simple_tidy_w_factors$design$sample_pk,
value_var = "measurement",
cluster_dim = "rows"
)

expect_equal(
class(hclust_w_default_sample_orders$data$samples),
class(hclust_w_default_sample_orders$data$ordered_sampleId)
)

# factor ordering defined by original orders
expect_equal(
as.character(levels(hclust_w_default_sample_orders$data$samples)),
levels(hclust_w_default_sample_orders$data$ordered_sampleId)
)

# sort features by non-factor feature variable when clustering just columns

simple_tidy_shuffle <- simple_tidy
simple_tidy_shuffle$data <- dplyr::sample_frac(simple_tidy_shuffle$data)

sorted_tidy_omic <- hclust_tidy_omic(
simple_tidy_shuffle,
feature_var = "features",
sample_var = "samples",
value_var = "measurement",
cluster_dim = "columns",
)

expect_s3_class(sorted_tidy_omic$data$ordered_featureId, "factor")
expect_equal(levels(sorted_tidy_omic$data$ordered_featureId), as.character(1:10))
})


test_that("hclust_tidy_omic() runs even if clustering initially fails due to missing values", {

# create a dataset where distances between features and samples will create
# some NAs
disjoint_tomic <- simple_tidy
disjoint_tomic$data <- dplyr::bind_rows(
simple_tidy$data %>%
dplyr::filter(
features <= 5,
samples <= 5
),
simple_tidy$data %>%
dplyr::filter(
features > 5,
samples > 5
)
)

clustered_disjoint_tomic <- hclust_tidy_omic(
disjoint_tomic,
feature_var = "features",
sample_var = "samples",
value_var = "measurement",
cluster_dim = "both"
)

expect_s3_class(clustered_disjoint_tomic, "tomic")
})

test_that("Catch apply_hclust() corner cases", {

invalid_matrix <- matrix(1:4, nrow = 2)
invalid_matrix <- invalid_matrix[-c(1:2),]

expect_error(
apply_hclust(invalid_matrix, "foo", "bar"),
"contained zero rows"
)

# create data which will generate NAs in distance matrix
disjoint_data_matrix <- dplyr::bind_rows(
simple_tidy$data %>%
dplyr::filter(
features <= 5,
samples <= 5
),
simple_tidy$data %>%
dplyr::filter(
features > 5,
samples > 5
)
) %>%
reshape2::acast(features ~ samples, value.var = "measurement")

expect_error(
apply_hclust(disjoint_data_matrix, "dist", "ward.D2"),
"NA/NaN/Inf in foreign function call"
)

expect_error(
apply_hclust(disjoint_data_matrix, "corr", "ward.D2"),
"NA distances are not allowed with hierarchical clustering"
)

expect_error(
apply_hclust(disjoint_data_matrix, "baz", "ward.D2"),
"baz is not a defined distance_measure"
)

})

test_that("collapse_feature_vars() is well behaved", {
expect_equal(collapse_feature_vars("A"), "A")
expect_equal(collapse_feature_vars(c("B", "A")), "B & A")
expect_equal(collapse_feature_vars(1:5), 1)
})
6 changes: 5 additions & 1 deletion tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ library(dplyr)

test_that("Coercing to original classes works", {
simple_factor <- factor(c("B", "A"), levels = c("A", "B"))

# -> factor
expect_equal(coerce_to_classes(c("B", "A"), simple_factor), simple_factor)

simple_ordered <- ordered(c("B", "A"), levels = c("A", "B"))
# -> ordered
expect_equal(coerce_to_classes(c("B", "A"), simple_ordered), simple_ordered)

# should throw an error when NAs are introduced for non-NAs
expect_error(coerce_to_classes(c("B", "A", "C"), simple_factor), "reference object")
# -> character
Expand Down

0 comments on commit 205ab5a

Please sign in to comment.