diff --git a/DESCRIPTION b/DESCRIPTION index cfb6b655..e6745576 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: effectsize Title: Indices of Effect Size -Version: 0.8.5 +Version: 0.8.6 Authors@R: c(person(given = "Mattan S.", family = "Ben-Shachar", @@ -66,16 +66,16 @@ BugReports: https://github.com/easystats/effectsize/issues/ Depends: R (>= 3.6) Imports: - bayestestR (>= 0.13.0), - insight (>= 0.19.3.2), - parameters (>= 0.20.2), - performance (>= 0.10.2), - datawizard (>= 0.6.5), + bayestestR (>= 0.13.1), + insight (>= 0.19.5), + parameters (>= 0.21.1), + performance (>= 0.10.5), + datawizard (>= 0.8.0), stats, utils Suggests: - correlation (>= 0.8.3), - see (>= 0.7.4), + correlation (>= 0.8.4), + see (>= 0.8.0), afex, BayesFactor, boot, @@ -107,4 +107,3 @@ Config/Needs/website: rstudio/bslib, r-lib/pkgdown, easystats/easystatstemplate -Remotes: easystats/insight diff --git a/NEWS.md b/NEWS.md index 86f3331d..ebb789db 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# effectsize 0.8.6 + +This is a minor update to bring `effectsize` in-line with the formula methods +in `t.test()` and `wilcox.test()` in `R>=4.4.0`. + +## Breaking Changes + +- `cohens_d()`, `hedges_g()`, `p_superiority()`, `wmw_odds()`, `means_ratio()` and `rank_biserial()` no longer support setting `paired = TRUE` when using the formula method. + +## Bug fixes + +- `eta_squared()` returns (approximate) effect sizes for smooths. + # effectsize 0.8.5 ## New features diff --git a/R/cohens_d.R b/R/cohens_d.R index 1c35ed51..5069f144 100644 --- a/R/cohens_d.R +++ b/R/cohens_d.R @@ -206,6 +206,7 @@ glass_delta <- function(x, y = NULL, data = NULL, out <- .get_data_2_samples(x, y, data, paired = paired, verbose = verbose, ...) x <- out[["x"]] y <- out[["y"]] + paired <- out[["paired"]] if (is.null(y)) { if (type == "delta") { @@ -217,6 +218,10 @@ glass_delta <- function(x, y = NULL, data = NULL, # Compute index if (paired) { + if (type == "delta") { + insight::format_error("This effect size is only applicable for two independent samples.") + } + d <- mean(x - y) n <- length(x) s <- stats::sd(x - y) diff --git a/R/common_language.R b/R/common_language.R index d6b92e57..9b8ef22f 100644 --- a/R/common_language.R +++ b/R/common_language.R @@ -123,6 +123,7 @@ p_superiority <- function(x, y = NULL, data = NULL, ) x <- data[["x"]] y <- data[["y"]] + paired <- data[["paired"]] if (parametric) { d <- cohens_d( @@ -162,14 +163,15 @@ cohens_u1 <- function(x, y = NULL, data = NULL, return(effectsize(x, type = "u1", ci = ci, verbose = verbose, ...)) } - data <- .get_data_2_samples(x, y, data, allow_ordered = !parametric, verbose = verbose, ... ) x <- data[["x"]] y <- data[["y"]] - if (is.null(y)) insight::format_error("cohens_u3 only applicable to two sample case.") + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } if (!parametric) { insight::format_error("Cohen's U1 only available for parametric estimation.") @@ -202,14 +204,15 @@ cohens_u2 <- function(x, y = NULL, data = NULL, return(effectsize(x, type = "u2", ci = ci, verbose = verbose, ...)) } - data <- .get_data_2_samples(x, y, data, allow_ordered = !parametric, verbose = verbose, ... ) x <- data[["x"]] y <- data[["y"]] - if (is.null(y)) insight::format_error("cohens_u3 only applicable to two sample case.") + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } if (parametric) { d <- cohens_d( @@ -253,7 +256,9 @@ cohens_u3 <- function(x, y = NULL, data = NULL, ) x <- data[["x"]] y <- data[["y"]] - if (is.null(y)) insight::format_error("cohens_u3 only applicable to two sample case.") + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } if (parametric) { d <- cohens_d( @@ -289,14 +294,15 @@ p_overlap <- function(x, y = NULL, data = NULL, return(effectsize(x, type = "overlap", ci = ci, verbose = verbose, ...)) } - data <- .get_data_2_samples(x, y, data, allow_ordered = !parametric, verbose = verbose, ... ) x <- data[["x"]] y <- data[["y"]] - if (is.null(y)) insight::format_error("Overlap only applicable to two sample case.") + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } if (parametric) { d <- cohens_d( diff --git a/R/eta_squared-methods.R b/R/eta_squared-methods.R index 2e1cf856..bee08cac 100644 --- a/R/eta_squared-methods.R +++ b/R/eta_squared-methods.R @@ -354,13 +354,16 @@ model <- stats::anova(model) p.table <- as.data.frame(model$pTerms.table) + p.table$Component <- "conditional" s.table <- as.data.frame(model$s.table) + s.table$Component <- "smooth_terms" + colnames(s.table)[colnames(s.table) == "Ref.df"] <- "df" s.table[setdiff(colnames(p.table), colnames(s.table))] <- NA p.table[setdiff(colnames(s.table), colnames(p.table))] <- NA tab <- rbind(p.table, s.table) - colnames(tab)[colnames(tab) == "F"] <- "F-value" colnames(tab)[colnames(tab) == "df"] <- "npar" tab$df_error <- model$residual.df + # tab$df_error <- Inf out <- .anova_es.anova( @@ -371,6 +374,8 @@ ci = ci, alternative = alternative, verbose = verbose ) + out$Component <- tab$Component + out <- datawizard::data_relocate(out, select = "Component", before = 1) attr(out, "anova_type") <- 3 attr(out, "approximate") <- TRUE diff --git a/R/means_ratio.R b/R/means_ratio.R index 639c490e..fb883f23 100644 --- a/R/means_ratio.R +++ b/R/means_ratio.R @@ -76,6 +76,7 @@ means_ratio <- function(x, y = NULL, data = NULL, ) x <- out[["x"]] y <- out[["y"]] + paired <- out[["paired"]] if (is.null(y)) { insight::format_error("Only one sample provided. y or data must be provided.") diff --git a/R/pooled.R b/R/pooled.R index 391e1bfa..44776477 100644 --- a/R/pooled.R +++ b/R/pooled.R @@ -28,6 +28,9 @@ sd_pooled <- function(x, y = NULL, data = NULL, verbose = TRUE, ...) { data <- .get_data_2_samples(x, y, data, verbose = verbose, ...) x <- data[["x"]] y <- data[["y"]] + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } V <- cov_pooled( data.frame(x = x), @@ -46,6 +49,9 @@ mad_pooled <- function(x, y = NULL, data = NULL, data <- .get_data_2_samples(x, y, data, verbose = verbose, ...) x <- data[["x"]] y <- data[["y"]] + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { + insight::format_error("This effect size is only applicable for two independent samples.") + } n1 <- length(x) n2 <- length(y) diff --git a/R/rank_diff.R b/R/rank_diff.R index a6ea009c..e252656b 100644 --- a/R/rank_diff.R +++ b/R/rank_diff.R @@ -62,6 +62,7 @@ #' # Same as: #' # rank_biserial("mpg", "am", data = mtcars) #' # rank_biserial(mtcars$mpg[mtcars$am=="0"], mtcars$mpg[mtcars$am=="1"]) +#' # cliffs_delta(mpg ~ am, data = mtcars) #' #' # More options: #' rank_biserial(mpg ~ am, data = mtcars, mu = -5) @@ -69,21 +70,26 @@ #' #' #' # One Sample ---------- -#' rank_biserial(wt ~ 1, data = mtcars, mu = 3) +#' # from help("wilcox.test") +#' x <- c(1.83, 0.50, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.30) +#' y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29) +#' depression <- data.frame(first = x, second = y, change = y - x) +#' +#' rank_biserial(change ~ 1, data = depression) +#' #' # same as: -#' # rank_biserial("wt", data = mtcars, mu = 3) -#' # rank_biserial(mtcars$wt, mu = 3) +#' # rank_biserial("change", data = depression) +#' # rank_biserial(mtcars$wt) +#' +#' # More options: +#' rank_biserial(change ~ 1, data = depression, mu = -0.5) #' #' #' # Paired Samples ---------- -#' dat <- data.frame( -#' Cond1 = c(1.83, 0.5, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.3), -#' Cond2 = c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29) -#' ) -#' (rb <- rank_biserial(Pair(Cond1, Cond2) ~ 1, data = dat, paired = TRUE)) +#' (rb <- rank_biserial(Pair(first, second) ~ 1, data = depression)) #' #' # same as: -#' # rank_biserial(dat$Cond1, dat$Cond2, paired = TRUE) +#' # rank_biserial(depression$first, depression$second, paired = TRUE) #' #' interpret_rank_biserial(0.78) #' interpret(rb, rules = "funder2019") @@ -127,8 +133,9 @@ rank_biserial <- function(x, y = NULL, data = NULL, allow_ordered = TRUE, verbose = verbose, ... ) - x <- out$x - y <- out$y + x <- out[["x"]] + y <- out[["y"]] + paired <- out[["paired"]] if (is.null(y)) { y <- 0 @@ -208,7 +215,7 @@ cliffs_delta <- function(x, y = NULL, data = NULL, ) x <- data$x y <- data$y - if (is.null(y) || isTRUE(eval.parent(cl$paired))) { + if (is.null(y) || isTRUE(match.call()$paired) || isTRUE(data[["paired"]])) { insight::format_error("This effect size is only applicable for two independent samples.") } diff --git a/R/utils_validate_input_data.R b/R/utils_validate_input_data.R index 786c3edc..30c488b0 100644 --- a/R/utils_validate_input_data.R +++ b/R/utils_validate_input_data.R @@ -3,11 +3,18 @@ paired = FALSE, allow_ordered = FALSE, verbose = TRUE, ...) { if (inherits(x, "formula")) { + if (isTRUE(paired)) { + # This is to be consistent with R>=4.4.0 + insight::format_error("cannot use 'paired = TRUE' in formula method.") + } + # Validate: if (length(x) != 3L) { insight::format_error( "Formula must have one of the following forms:", - "\n\ty ~ group,\n\ty ~ 1,\n\tPair(x,y) ~ 1" + " y ~ group (independant samples)", + " y ~ 1 (one sample)", + " Pair(x,y) ~ 1 (paired samples)" ) } @@ -49,6 +56,7 @@ } else if (inherits(x, "Pair")) { x <- x[, 1] - x[, 2] y <- NULL + paired <- TRUE } @@ -90,7 +98,7 @@ } - list(x = x, y = y) + list(x = x, y = y, paired = paired) } diff --git a/inst/WORDLIST b/inst/WORDLIST index a7ad5f31..56e5a589 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -83,6 +83,7 @@ Koo Kruschke LMM Labelled +labelled Lajeunesse Lakens Landis diff --git a/man/rank_biserial.Rd b/man/rank_biserial.Rd index bb84a12d..68252c34 100644 --- a/man/rank_biserial.Rd +++ b/man/rank_biserial.Rd @@ -131,6 +131,7 @@ mtcars$cyl <- factor(mtcars$cyl) # Same as: # rank_biserial("mpg", "am", data = mtcars) # rank_biserial(mtcars$mpg[mtcars$am=="0"], mtcars$mpg[mtcars$am=="1"]) +# cliffs_delta(mpg ~ am, data = mtcars) # More options: rank_biserial(mpg ~ am, data = mtcars, mu = -5) @@ -138,21 +139,26 @@ print(rb, append_CLES = TRUE) # One Sample ---------- -rank_biserial(wt ~ 1, data = mtcars, mu = 3) +# from help("wilcox.test") +x <- c(1.83, 0.50, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.30) +y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29) +depression <- data.frame(first = x, second = y, change = y - x) + +rank_biserial(change ~ 1, data = depression) + # same as: -# rank_biserial("wt", data = mtcars, mu = 3) -# rank_biserial(mtcars$wt, mu = 3) +# rank_biserial("change", data = depression) +# rank_biserial(mtcars$wt) + +# More options: +rank_biserial(change ~ 1, data = depression, mu = -0.5) # Paired Samples ---------- -dat <- data.frame( - Cond1 = c(1.83, 0.5, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.3), - Cond2 = c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29) -) -(rb <- rank_biserial(Pair(Cond1, Cond2) ~ 1, data = dat, paired = TRUE)) +(rb <- rank_biserial(Pair(first, second) ~ 1, data = depression)) # same as: -# rank_biserial(dat$Cond1, dat$Cond2, paired = TRUE) +# rank_biserial(depression$first, depression$second, paired = TRUE) interpret_rank_biserial(0.78) interpret(rb, rules = "funder2019") diff --git a/tests/testthat/test-eta_squared.R b/tests/testthat/test-eta_squared.R index c48bc56e..56a9f07f 100644 --- a/tests/testthat/test-eta_squared.R +++ b/tests/testthat/test-eta_squared.R @@ -660,6 +660,7 @@ test_that("ets_squared | gam", { b <- mgcv::gam(y ~ x0 + s(x1) + s(x2) + t2(x1, x2) + s(x3), data = dat) expect_error(out <- eta_squared(b), regexp = NA) + expect_warning(eta_squared(b), regexp = NA) expect_output(print(out), "Type III") }) diff --git a/tests/testthat/test-rom.R b/tests/testthat/test-rom.R index b9741b39..1e3a5290 100644 --- a/tests/testthat/test-rom.R +++ b/tests/testthat/test-rom.R @@ -73,7 +73,14 @@ test_that("means_ratio paired - adjusted", { expect_error(means_ratio(extra ~ group, data = sleep), "negative") sleep$y <- sleep$extra + 4 - x <- means_ratio(y ~ group, + sleep_wide <- datawizard::data_to_wide(sleep, + id_cols = "ID", + values_from = "y", + names_from = "group", + names_prefix = "extra_" + ) + + x <- means_ratio(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], data = sleep, adjust = TRUE, paired = TRUE ) @@ -86,7 +93,13 @@ test_that("means_ratio paired - adjusted", { test_that("means_ratio paired - not adjusted", { data(sleep) sleep$y <- sleep$extra + 4 - x <- means_ratio(y ~ group, + sleep_wide <- datawizard::data_to_wide(sleep, + id_cols = "ID", + values_from = "y", + names_from = "group", + names_prefix = "extra_" + ) + x <- means_ratio(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], data = sleep, adjust = FALSE, paired = TRUE ) diff --git a/vignettes/standardized_differences.Rmd b/vignettes/standardized_differences.Rmd index c5c7eca5..a1aa33e1 100644 --- a/vignettes/standardized_differences.Rmd +++ b/vignettes/standardized_differences.Rmd @@ -108,11 +108,18 @@ hedges_g(mtcars$wt, mu = 2.7) For paired-samples, the difference is standardized by the variation in the differences. This effect size, known as Cohen's $d_z$, represents the difference in terms of its homogeneity (a small but stable difference will have a large $d_z$). ```{r} -t.test(extra ~ group, data = sleep, paired = TRUE) +sleep_wide <- datawizard::data_to_wide(sleep, + id_cols = "ID", + values_from = "extra", + names_from = "group", + names_prefix = "extra_" +) + +t.test(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], paired = TRUE) -cohens_d(extra ~ group, data = sleep, paired = TRUE) +cohens_d(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], paired = TRUE) -hedges_g(extra ~ group, data = sleep, paired = TRUE) +hedges_g(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], paired = TRUE) ``` ## For a Bayesian *t*-test @@ -297,13 +304,11 @@ p_superiority(mtcars$wt, mu = 2.75, parametric = FALSE) For paired samples, *probability of superiority* is the probability that, when sampling an observation at random, its *difference* will be larger than $\mu$. ```{r} -p_superiority(extra ~ group, - data = sleep, +p_superiority(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], paired = TRUE, mu = -1 ) -p_superiority(extra ~ group, - data = sleep, +p_superiority(sleep_wide[["extra_1"]], sleep_wide[["extra_2"]], paired = TRUE, mu = -1, parametric = FALSE )