From 0df1502817ed36cab7e856c793adb314306225ce Mon Sep 17 00:00:00 2001 From: Sean Hackett Date: Thu, 19 Sep 2024 09:37:53 -0700 Subject: [PATCH 1/3] cleanup syntax --- R/data_classes.R | 30 ++++++++++++++++-------------- R/filters.R | 19 +++++++++++++------ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/R/data_classes.R b/R/data_classes.R index f17ce37..8bf2ca6 100644 --- a/R/data_classes.R +++ b/R/data_classes.R @@ -59,14 +59,14 @@ #' ) #' @export create_tidy_omic <- function( - df, - feature_pk, - feature_vars = NULL, - sample_pk, - sample_vars = NULL, - omic_type_tag = "general", - verbose = TRUE - ) { + df, + feature_pk, + feature_vars = NULL, + sample_pk, + sample_vars = NULL, + omic_type_tag = "general", + verbose = TRUE + ) { checkmate::assertDataFrame(df) checkmate::assertString(omic_type_tag) @@ -367,12 +367,14 @@ check_tidy_omic <- function(tidy_omic, fast_check = TRUE) { #' "feature_id", "sample_id" #' ) #' @export -create_triple_omic <- function(measurement_df, - feature_df = NULL, - sample_df = NULL, - feature_pk, - sample_pk, - omic_type_tag = "general") { +create_triple_omic <- function( + measurement_df, + feature_df = NULL, + sample_df = NULL, + feature_pk, + sample_pk, + omic_type_tag = "general" + ) { # testing checkmate::assertClass(measurement_df, "data.frame") diff --git a/R/filters.R b/R/filters.R index e0c35dd..b1a1d7e 100644 --- a/R/filters.R +++ b/R/filters.R @@ -9,11 +9,13 @@ #' filter_value} #' \item{range}{filter filter_variable to using the range (i.e., lower and #' upper limit) provided in filter_value} -#' \item{apply}{a quosure as a \code{filter_value} to a table of interest} +#' \item{quo}{a quosure as a \code{filter_value} to a table of interest} #' } #' @param filter_table table where the filter should be applied #' @param filter_variable variable to apply the filter to #' @param filter_value values to filter based on +#' @param invert If FALSE (default) entities will be retained; if TRUE, they +#' will be removed. #' #' @returns A \code{tomic} object where a subset of features, samples or #' measurmenets have been filtered. @@ -51,17 +53,22 @@ #' filter_value = rlang::quo(BP == "biological process unknown") #' ) #' @export -filter_tomic <- function(tomic, - filter_type, - filter_table, - filter_value, - filter_variable = NULL) { +filter_tomic <- function( + tomic, + filter_type, + filter_table, + filter_value, + filter_variable = NULL, + invert = FALSE + ) { + checkmate::assertClass(tomic, "tomic") checkmate::assertChoice(filter_type, c("category", "range", "quo")) checkmate::assertChoice( filter_table, c("features", "samples", "measurements") ) + checkmate::assertLogical(invert, len = 1) # convert to triple_omic triple_omic <- tomic_to(tomic, "triple_omic") From 7500bdcb9d5f15c652b2b15b77e235ee0b261d0e Mon Sep 17 00:00:00 2001 From: Sean Hackett Date: Thu, 19 Sep 2024 09:52:09 -0700 Subject: [PATCH 2/3] added support for inverting filters --- R/filters.R | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/R/filters.R b/R/filters.R index b1a1d7e..334182e 100644 --- a/R/filters.R +++ b/R/filters.R @@ -63,7 +63,7 @@ filter_tomic <- function( ) { checkmate::assertClass(tomic, "tomic") - checkmate::assertChoice(filter_type, c("category", "range", "quo")) + checkmate::assertString(filter_type) checkmate::assertChoice( filter_table, c("features", "samples", "measurements") @@ -73,18 +73,16 @@ filter_tomic <- function( # convert to triple_omic triple_omic <- tomic_to(tomic, "triple_omic") + VALID_FILTER_TYPES <- c("category", "range", "quo") if (filter_type %in% c("category", "range")) { checkmate::assertString(filter_variable) valid_variables <- colnames(triple_omic[[filter_table]]) if (!(filter_variable %in% valid_variables)) { - stop( - filter_variable, - " is not a valid value for \"filter_type\", - valid values are all variables within the \"", - filter_table, - "\" table: ", - paste(valid_variables, collapse = ", ") + cli::cli_abort( + "{.field {filter_variable}} is is not a valid value for {.arg filter_type}, + valid values are all variables within the {filter_table} table: + {.field {valid_variables}}" ) } @@ -94,19 +92,19 @@ filter_tomic <- function( filter_var_type <- filter_var_type$type[1] } else if (filter_type == "quo") { if (!("NULL" %in% class(filter_variable))) { - warning( - "filter_variable was provided when filter_type is quo + cli::cli_alert_warning( + "{.arg filter_variable} was provided when {.arg filter_type} is {.field quo} only a filter_value should be passed. filter_variable will be ignored" ) } } else { - stop("invalid filter type") + cli::cli_abort("{filter_type} is not a valid {.arg filter_type}. Valid types are {.field {VALID_FILTER_TYPES}}") } if (filter_type == "category") { checkmate::assertVector(filter_value) - triple_omic[[filter_table]] <- triple_omic[[filter_table]] %>% + updated_filtered_table <- triple_omic[[filter_table]] %>% dplyr::filter( !!rlang::sym(filter_variable) %in% !!rlang::quo(filter_value) ) @@ -123,7 +121,7 @@ filter_tomic <- function( ) } - triple_omic[[filter_table]] <- triple_omic[[filter_table]] %>% + updated_filtered_table <- triple_omic[[filter_table]] %>% dplyr::filter( !!rlang::sym(filter_variable) >= !!rlang::quo(filter_value[1]), !!rlang::sym(filter_variable) <= !!rlang::quo(filter_value[2]) @@ -131,12 +129,27 @@ filter_tomic <- function( } else if (filter_type == "quo") { checkmate::assertClass(filter_value, "quosure") - triple_omic[[filter_table]] <- triple_omic[[filter_table]] %>% + updated_filtered_table <- triple_omic[[filter_table]] %>% dplyr::filter(!!filter_value) } else { - stop("invalid filter_type") + stop("Unexpected behavior") + } + + # invert filter if invert is TRUE + if (invert) { + join_keys <- triple_omic$design[[filter_table]] %>% + dplyr::filter(type %in% c("feature_primary_key", "sample_primary_key")) %>% + dplyr::pull(variable) + + updated_filtered_table <- anti_join( + triple_omic[[filter_table]], + updated_filtered_table, + by = join_keys + ) } + triple_omic[[filter_table]] <- updated_filtered_table + # clear out data impacted by filters triple_omic <- reconcile_triple_omic(triple_omic) From d24d732a66f12a0c130d8332e0dd0e5a6e7c450d Mon Sep 17 00:00:00 2001 From: Sean Hackett Date: Thu, 19 Sep 2024 10:06:53 -0700 Subject: [PATCH 3/3] invert tested with new tests --- DESCRIPTION | 2 +- R/filters.R | 2 +- man/filter_tomic.Rd | 8 +- tests/testthat/_snaps/filters.md | 438 ++++++++++++++++++++++++++++++- tests/testthat/test-filters.R | 22 +- 5 files changed, 461 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4fbc607..90df3bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: romic Type: Package Title: R for High-Dimensional Omic Data -Version: 1.2.3 +Version: 1.2.4 Authors@R: c( person( given = "Sean", diff --git a/R/filters.R b/R/filters.R index 334182e..bc52963 100644 --- a/R/filters.R +++ b/R/filters.R @@ -141,7 +141,7 @@ filter_tomic <- function( dplyr::filter(type %in% c("feature_primary_key", "sample_primary_key")) %>% dplyr::pull(variable) - updated_filtered_table <- anti_join( + updated_filtered_table <- dplyr::anti_join( triple_omic[[filter_table]], updated_filtered_table, by = join_keys diff --git a/man/filter_tomic.Rd b/man/filter_tomic.Rd index 38ddc63..3ab3468 100644 --- a/man/filter_tomic.Rd +++ b/man/filter_tomic.Rd @@ -9,7 +9,8 @@ filter_tomic( filter_type, filter_table, filter_value, - filter_variable = NULL + filter_variable = NULL, + invert = FALSE ) } \arguments{ @@ -20,7 +21,7 @@ filter_tomic( filter_value} \item{range}{filter filter_variable to using the range (i.e., lower and upper limit) provided in filter_value} - \item{apply}{a quosure as a \code{filter_value} to a table of interest} + \item{quo}{a quosure as a \code{filter_value} to a table of interest} }} \item{filter_table}{table where the filter should be applied} @@ -28,6 +29,9 @@ filter_tomic( \item{filter_value}{values to filter based on} \item{filter_variable}{variable to apply the filter to} + +\item{invert}{If FALSE (default) entities will be retained; if TRUE, they +will be removed.} } \value{ A \code{tomic} object where a subset of features, samples or diff --git a/tests/testthat/_snaps/filters.md b/tests/testthat/_snaps/filters.md index ba9f2bb..f8d9d6f 100644 --- a/tests/testthat/_snaps/filters.md +++ b/tests/testthat/_snaps/filters.md @@ -1,4 +1,4 @@ -# Try all of the filters +# Try all of the filters [plain] Code . @@ -15,6 +15,438 @@ filter_variable = "bar", filter_value = "biological process unknown") Condition Error in `filter_tomic()`: - ! bar is not a valid value for "filter_type", - valid values are all variables within the "features" table: name, BP, MF, systematic_name + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 x 4 + name BP MF systematic_name + + 1 YOL029C biological process unknown molecular function unknown YOL029C + 2 YHR036W biological process unknown molecular function unknown YHR036W + 3 YAL046C biological process unknown molecular function unknown YAL046C + 4 YHR151C biological process unknown molecular function unknown YHR151C + 5 YKL027W biological process unknown molecular function unknown YKL027W + 6 YBR220C biological process unknown molecular function unknown YBR220C + 7 YLR057W biological process unknown molecular function unknown YLR057W + 8 YDR239C biological process unknown molecular function unknown YDR239C + 9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # i 100 more rows + + $samples + # A tibble: 36 x 3 + sample nutrient DR + + 1 G0.05 G 0.05 + 2 G0.1 G 0.1 + 3 G0.15 G 0.15 + 4 G0.2 G 0.2 + 5 G0.25 G 0.25 + 6 G0.3 G 0.3 + 7 N0.05 N 0.05 + 8 N0.1 N 0.1 + 9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # i 26 more rows + + $measurements + # A tibble: 3,960 x 3 + name sample expression + + 1 YOL029C G0.05 -0.22 + 2 YHR036W G0.05 -0.91 + 3 YAL046C G0.05 0.05 + 4 YHR151C G0.05 -0.53 + 5 YKL027W G0.05 -0.52 + 6 YBR220C G0.05 -1.06 + 7 YLR057W G0.05 -0.42 + 8 YDR239C G0.05 -0.55 + 9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # i 3,950 more rows + + $design + $design$features + # A tibble: 4 x 2 + variable type + + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 x 2 + variable type + + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 x 2 + variable type + + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [ansi] + + Code + . + Output + # A tibble: 1 x 1 + BP +  + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 x 4 + name BP MF systematic_name +     +  1 YOL029C biological process unknown molecular function unknown YOL029C +  2 YHR036W biological process unknown molecular function unknown YHR036W +  3 YAL046C biological process unknown molecular function unknown YAL046C +  4 YHR151C biological process unknown molecular function unknown YHR151C +  5 YKL027W biological process unknown molecular function unknown YKL027W +  6 YBR220C biological process unknown molecular function unknown YBR220C +  7 YLR057W biological process unknown molecular function unknown YLR057W +  8 YDR239C biological process unknown molecular function unknown YDR239C +  9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # i 100 more rows + + $samples + # A tibble: 36 x 3 + sample nutrient DR +    +  1 G0.05 G 0.05 +  2 G0.1 G 0.1 +  3 G0.15 G 0.15 +  4 G0.2 G 0.2 +  5 G0.25 G 0.25 +  6 G0.3 G 0.3 +  7 N0.05 N 0.05 +  8 N0.1 N 0.1 +  9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # i 26 more rows + + $measurements + # A tibble: 3,960 x 3 + name sample expression +    +  1 YOL029C G0.05 -0.22 +  2 YHR036W G0.05 -0.91 +  3 YAL046C G0.05 0.05 +  4 YHR151C G0.05 -0.53 +  5 YKL027W G0.05 -0.52 +  6 YBR220C G0.05 -1.06 +  7 YLR057W G0.05 -0.42 +  8 YDR239C G0.05 -0.55 +  9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # i 3,950 more rows + + $design + $design$features + # A tibble: 4 x 2 + variable type +   + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 x 2 + variable type +   + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 x 2 + variable type +   + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [unicode] + + Code + . + Output + # A tibble: 1 × 1 + BP + + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 × 4 + name BP MF systematic_name + + 1 YOL029C biological process unknown molecular function unknown YOL029C + 2 YHR036W biological process unknown molecular function unknown YHR036W + 3 YAL046C biological process unknown molecular function unknown YAL046C + 4 YHR151C biological process unknown molecular function unknown YHR151C + 5 YKL027W biological process unknown molecular function unknown YKL027W + 6 YBR220C biological process unknown molecular function unknown YBR220C + 7 YLR057W biological process unknown molecular function unknown YLR057W + 8 YDR239C biological process unknown molecular function unknown YDR239C + 9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # ℹ 100 more rows + + $samples + # A tibble: 36 × 3 + sample nutrient DR + + 1 G0.05 G 0.05 + 2 G0.1 G 0.1 + 3 G0.15 G 0.15 + 4 G0.2 G 0.2 + 5 G0.25 G 0.25 + 6 G0.3 G 0.3 + 7 N0.05 N 0.05 + 8 N0.1 N 0.1 + 9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # ℹ 26 more rows + + $measurements + # A tibble: 3,960 × 3 + name sample expression + + 1 YOL029C G0.05 -0.22 + 2 YHR036W G0.05 -0.91 + 3 YAL046C G0.05 0.05 + 4 YHR151C G0.05 -0.53 + 5 YKL027W G0.05 -0.52 + 6 YBR220C G0.05 -1.06 + 7 YLR057W G0.05 -0.42 + 8 YDR239C G0.05 -0.55 + 9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # ℹ 3,950 more rows + + $design + $design$features + # A tibble: 4 × 2 + variable type + + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 × 2 + variable type + + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 × 2 + variable type + + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [fancy] + + Code + . + Output + # A tibble: 1 × 1 + BP +  + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 × 4 + name BP MF systematic_name +     +  1 YOL029C biological process unknown molecular function unknown YOL029C +  2 YHR036W biological process unknown molecular function unknown YHR036W +  3 YAL046C biological process unknown molecular function unknown YAL046C +  4 YHR151C biological process unknown molecular function unknown YHR151C +  5 YKL027W biological process unknown molecular function unknown YKL027W +  6 YBR220C biological process unknown molecular function unknown YBR220C +  7 YLR057W biological process unknown molecular function unknown YLR057W +  8 YDR239C biological process unknown molecular function unknown YDR239C +  9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # ℹ 100 more rows + + $samples + # A tibble: 36 × 3 + sample nutrient DR +    +  1 G0.05 G 0.05 +  2 G0.1 G 0.1 +  3 G0.15 G 0.15 +  4 G0.2 G 0.2 +  5 G0.25 G 0.25 +  6 G0.3 G 0.3 +  7 N0.05 N 0.05 +  8 N0.1 N 0.1 +  9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # ℹ 26 more rows + + $measurements + # A tibble: 3,960 × 3 + name sample expression +    +  1 YOL029C G0.05 -0.22 +  2 YHR036W G0.05 -0.91 +  3 YAL046C G0.05 0.05 +  4 YHR151C G0.05 -0.53 +  5 YKL027W G0.05 -0.52 +  6 YBR220C G0.05 -1.06 +  7 YLR057W G0.05 -0.42 +  8 YDR239C G0.05 -0.55 +  9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # ℹ 3,950 more rows + + $design + $design$features + # A tibble: 4 × 2 + variable type +   + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 × 2 + variable type +   + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 × 2 + variable type +   + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" diff --git a/tests/testthat/test-filters.R b/tests/testthat/test-filters.R index adc1332..b619911 100644 --- a/tests/testthat/test-filters.R +++ b/tests/testthat/test-filters.R @@ -1,4 +1,4 @@ -test_that("Try all of the filters", { +cli::test_that_cli("Try all of the filters", { filter_tomic( brauer_2008_triple, @@ -53,15 +53,29 @@ test_that("Try all of the filters", { error = TRUE ) - expect_warning( + expect_snapshot( filter_tomic( brauer_2008_triple, filter_type = "quo", filter_table = "features", filter_variable = "bar", filter_value = rlang::quo(BP == "biological process unknown") - ), - regexp = "filter_type is quo" + ) ) }) + +test_that("Validate that filtering works with invert = TRUE", { + + retained_samples <- filter_tomic( + brauer_2008_triple, + filter_type = "category", + filter_table = "samples", + filter_variable = "nutrient", + filter_value = c("U", "L"), + invert = TRUE + )$samples + + expect_equal(nrow(retained_samples), 24) + expect_equal(unique(retained_samples$nutrient), c("G", "N", "P", "S")) +})