diff --git a/DESCRIPTION b/DESCRIPTION index 75443d45..9d80fe02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: bayesnec Title: A Bayesian No-Effect- Concentration (NEC) Algorithm Version: 2.1.0.3 Authors@R: c(person("Rebecca", "Fisher", email = "r.fisher@aims.gov.au", role = c("aut", "cre")), person("Diego","Barneche",role="aut"), person("Gerard","Ricardo",role="aut"), person("David","Fox",role="aut")) -Description: Implementation of No-Effect-Concentration estimation that uses 'brms' (see Burkner (2017); Burkner (2018); Carpenter 'et al.' (2017) to fit concentration(dose)-response data using Bayesian methods for the purpose of estimating 'ECX' values, but more particularly 'NEC' (see Fox (2010). This package expands and supersedes an original version implemented in R2jags, see Fisher, Ricardo and Fox (2020). +Description: Implementation of No-Effect-Concentration estimation that uses 'brms' (see Burkner (2017); Burkner (2018); Carpenter 'et al.' (2017) to fit concentration(dose)-response data using Bayesian methods for the purpose of estimating 'ECx' values, but more particularly 'NEC' (see Fox (2010)), 'NSEC' (see Fisher and Fox (2023)<10.1002/etc.5610>), and 'N(S)EC (see Fisher et al. 2023<10.1002/ieam.4809>). This package expands and supersedes an original version implemented in R2jags, see Fisher, Ricardo and Fox (2020). Depends: R (>= 4.1), brms, @@ -18,6 +18,7 @@ Imports: dplyr, tidyr, purrr, + tibble, tidyselect, evaluate, rlang, diff --git a/NAMESPACE b/NAMESPACE index 5f6951ae..9621a6b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -103,6 +103,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,filter) +importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(evaluate,evaluate) @@ -146,6 +147,7 @@ importFrom(loo,loo_model_weights) importFrom(purrr,map) importFrom(purrr,map_dfr) importFrom(rlang,.data) +importFrom(rlang,.env) importFrom(stats,acf) importFrom(stats,as.formula) importFrom(stats,binomial) @@ -163,6 +165,7 @@ importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,terms) importFrom(stats,update) +importFrom(tibble,rownames_to_column) importFrom(tidyr,pivot_longer) importFrom(tidyselect,everything) importFrom(tidyselect,starts_with) diff --git a/R/autoplot.R b/R/autoplot.R index 205679a8..f2a181a8 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -45,24 +45,23 @@ NULL #' #' @importFrom dplyr mutate #' @importFrom chk chk_lgl +#' @importFrom rlang .env #' #' @export autoplot.bayesnecfit <- function(object, ..., nec = TRUE, ecx = FALSE, xform = identity) { - x <- object - if(!inherits(x, "bnecfit")){ - stop("x is not of class bnecfit. x should be an object returned from a call to the function bnec.") - } chk_lgl(nec) chk_lgl(ecx) - if(!inherits(xform, "function")){ + if (!inherits(xform, "function")) { stop("xform must be a function.") - } - + } + summ <- summary(x, ecx = FALSE) |> + suppressWarnings() |> + suppressMessages() ggbnec_data(x, add_nec = nec, add_ecx = ecx, xform = xform, ...) |> - mutate(model = x$model) |> + mutate(model = x$model, tag = rownames(.env$summ$nec_vals)) |> ggbnec(nec = nec, ecx = ecx) } @@ -85,41 +84,46 @@ autoplot.bayesnecfit <- function(object, ..., nec = TRUE, ecx = FALSE, #' #' @inherit autoplot description return examples #' -#' @importFrom dplyr mutate +#' @importFrom dplyr mutate left_join #' @importFrom purrr map_dfr +#' @importFrom tibble rownames_to_column #' @importFrom grDevices devAskNewPage #' @importFrom chk chk_lgl +#' @importFrom rlang .env #' #' @export autoplot.bayesmanecfit <- function(object, ..., nec = TRUE, ecx = FALSE, xform = identity, all_models = FALSE, plot = TRUE, ask = TRUE, newpage = TRUE, multi_facet = TRUE) { - x <- object - - if(!inherits(x, "bnecfit")){ - stop("x is not of class bnecfit. x should be an object returned from a call to the function bnec.") - } chk_lgl(nec) chk_lgl(ecx) - if(!inherits(xform, "function")){ + if (!inherits(xform, "function")) { stop("xform must be a function.") - } - chk_lgl(all_models) - chk_lgl(plot) - chk_lgl(ask) - chk_lgl(newpage) - chk_lgl(multi_facet) - + } + chk_lgl(all_models) + chk_lgl(plot) + chk_lgl(ask) + chk_lgl(newpage) + chk_lgl(multi_facet) if (all_models) { all_fits <- lapply(x$success_models, pull_out, manec = x) |> suppressMessages() |> suppressWarnings() if (multi_facet) { names(all_fits) <- x$success_models + nec_labs <- map_dfr(all_fits, function(x) { + summ <- summary(x, ecx = FALSE) |> + suppressWarnings() |> + suppressMessages() + summ$nec_vals |> + data.frame() |> + rownames_to_column(var = "tag") + }, .id = "model") map_dfr(all_fits, ggbnec_data, add_nec = nec, add_ecx = ecx, xform = xform, ..., .id = "model") |> + left_join(y = nec_labs, by = "model") |> ggbnec(nec = nec, ecx = ecx) } else { if (plot) { @@ -129,9 +133,13 @@ autoplot.bayesmanecfit <- function(object, ..., nec = TRUE, ecx = FALSE, } plots <- vector(mode = "list", length = length(all_fits)) for (i in seq_along(all_fits)) { + summ_i <- summary(all_fits[[i]], ecx = FALSE) |> + suppressWarnings() |> + suppressMessages() plots[[i]] <- ggbnec_data(all_fits[[i]], add_nec = nec, add_ecx = ecx, xform = xform, ...) |> - mutate(model = x$success_models[i]) |> + mutate(model = x$success_models[i], + tag = rownames(.env$summ_i$nec_vals)) |> ggbnec(nec = nec, ecx = ecx) plot(plots[[i]], newpage = newpage || i > 1) if (i == 1) { @@ -141,8 +149,12 @@ autoplot.bayesmanecfit <- function(object, ..., nec = TRUE, ecx = FALSE, invisible(plots) } } else { + summ <- summary(x, ecx = FALSE) |> + suppressWarnings() |> + suppressMessages() ggbnec_data(x, add_nec = nec, add_ecx = ecx, xform = xform, ...) |> - mutate(model = "Model averaged predictions") |> + mutate(model = "Model averaged predictions", + tag = rownames(.env$summ$nec_vals)) |> ggbnec(nec = nec, ecx = ecx) } } @@ -218,7 +230,7 @@ bind_ecx <- function(data, ecx_vals) { df <- data[1:3, ] df[ ] <- NA df$ecx_vals <- ecx_vals - df$ecx_int[1] <- strsplit(names(ecx_vals)[1], "_", fixed = TRUE)[[1]][2] + df$ecx_int[1] <- attr(ecx_vals, "ecx_val") df$ecx_labs[1] <- rounded(ecx_vals[[1]], 2) df$ecx_labs_l[1] <- rounded(ecx_vals[[2]], 2) df$ecx_labs_u[1] <- rounded(ecx_vals[[3]], 2) @@ -386,10 +398,12 @@ ggbnec <- function(x, nec = TRUE, ecx = FALSE) { linetype = ltys, colour = "grey50", lwd = lwds) + geom_text(data = x |> filter(!is.na(.data$nec_labs)), - mapping = aes(label = paste0("N(S)EC: ", .data$nec_labs, " (", - .data$nec_labs_l, "-", - .data$nec_labs_u, ")")), - x = Inf, y = Inf, hjust = 1.1, vjust = 1.5, size = 3, + mapping = aes( + label = paste0( + .data$tag, ": ", .data$nec_labs, " (", .data$nec_labs_l, + "-", .data$nec_labs_u, ")" + ) + ), x = Inf, y = Inf, hjust = 1.1, vjust = 1.5, size = 3, colour = "grey50") } if (ecx) { diff --git a/R/bayesnec-package.R b/R/bayesnec-package.R index 38ef76e2..3ad77989 100644 --- a/R/bayesnec-package.R +++ b/R/bayesnec-package.R @@ -1,9 +1,11 @@ #' The 'bayesnec' package. #' -#' @description A No-Effect-Concentration (NEC) estimation package that uses brms -#' (https://github.com/paul-buerkner/brms) to fit concentration +#' @description A No-Effect toxicity estimation package that uses brms (Bürkner +#' (2018), https://github.com/paul-buerkner/brms) to fit concentration #' (dose)-response data using Bayesian methods for the purpose of estimating -#' both Effect Concentration (ECx) values, but more particularly NEC. Please see ?bnec +#' both Effect Concentration (ECx) values, but more particularly NEC, but more +#' particularly 'NEC' (Fox 2010), 'NSEC' (Fisher and Fox 2023), and 'N(S)EC +#' (Fisher et al. 2023). Please see ?bnec #' for more details. #' #' @docType package @@ -15,5 +17,16 @@ #' @references #' Bürkner P-C (2018) Advanced Bayesian Multilevel Modeling with the R Package #' brms. The R Journal, 10: 395-411. doi:10.32614/RJ-2018-017. +#' +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. #' +#' Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +#' estimating no-effect toxicity concentrations in ecotoxicology. Integrated +#' Environmental Assessment and Management. doi:10.1002/ieam.4809. +#' +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. NULL diff --git a/R/bnec.R b/R/bnec.R index 231c1ac4..82780671 100644 --- a/R/bnec.R +++ b/R/bnec.R @@ -103,19 +103,20 @@ #' use the function \code{\link{models}} or to check the parameters of a #' specific model use the function \code{\link{show_params}}. #' -#' All models provide an estimate for NEC. For model types with "nec" as a -#' prefix, NEC is directly estimated as parameter "nec" -#' in the model. Models with "ecx" as a prefix are continuous curve models, -#' typically used for extracting ECx values -#' from concentration response data. In this instance the NEC value is defined -#' as the concentration at which there is a user supplied -#' (see argument \code{sig_val}) percentage certainty -#' (based on the Bayesian posterior estimate) that the response -#' falls below the estimated value of the upper asymptote (top) of the -#' response (i.e. the response value is significantly -#' lower than that expected in the case of no exposure). -#' The default value for \code{sig_val} is 0.01, which corresponds to an alpha -#' value of 0.01 for a one-sided test of significance. +#' \bold{No-effect toxicity estimates} +#' +#' Regardless of the model(s) fitted, the resulting object will contain a +#' no-effect toxicity estimate. Where the fitted model(s) are NEC models (threshold +#' models, containing a step function - all models with "nec" as a +#' prefix) the no-effect estimate is a true +#' no-effect-concentration (NEC, see Fox 2010). Where the fitted model(s) are +#' smooth ECx models with no step function (all models with "ecx" as a +#' prefix), the no-effect estimate is a no-significant-effect-concentration +#' (NSEC, see Fisher and Fox 2023). +#' In the case of a \code{\link{bayesmanecfit}} that contains a mixture of both +#' NEC and ECx models, the no-effect estimate is a model averaged combination of +#' the NEC and NSEC estimates, and is reported as the N(S)EC +#' (see Fisher et al. 2023). #' #' \bold{Further argument to \code{\link[brms]{brm}}} #' @@ -174,6 +175,19 @@ #' \code{\link{check_formula}}, #' \code{\link{models}}, #' \code{\link{show_params}} +#' +#' @references +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. +#' +#' Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +#' estimating no-effect toxicity concentrations in ecotoxicology. Integrated +#' Environmental Assessment and Management. doi:10.1002/ieam.4809. +#' +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. #' #' @examples #' \dontrun{ diff --git a/R/check_priors.R b/R/check_priors.R index dbd6371d..74251ffc 100644 --- a/R/check_priors.R +++ b/R/check_priors.R @@ -34,7 +34,7 @@ check_priors <- function(object, filename = NA) { #' #' @inherit check_priors examples return #' -#' @importFrom ggplot2 ggplot geom_density facet_wrap scale_fill_manual theme_bw +#' @importFrom ggplot2 ggplot geom_density facet_wrap scale_fill_manual theme_bw labs #' @importFrom brms hypothesis #' @importFrom rlang .data #' @@ -60,6 +60,7 @@ check_priors.bayesnecfit <- function(object, filename = NA) { alpha = 0.5) + facet_wrap(~.data$ind, scales = "free") + scale_fill_manual(values = c(Prior = "grey90", Posterior = "grey30")) + + labs(x = "Value", y = "Density") + theme_bw() } diff --git a/R/helpers.R b/R/helpers.R index 334f6a82..21f786eb 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -271,9 +271,21 @@ clean_mod_weights <- function(x) { } #' @noRd -clean_nec_vals <- function(x) { - mat <- t(as.matrix(x$w_nec)) - rownames(mat) <- "NEC" +clean_nec_vals <- function(x, all_models, ecx_models) { + if (is_bayesnecfit(x)) { + mat <- t(as.matrix(x$nec)) + } else if (is_bayesmanecfit(x)) { + mat <- t(as.matrix(x$w_nec)) + } else { + stop("Wrong input class.") + } + neclab <- "NEC" + if (all(all_models %in% ecx_models)) { + neclab <- "NSEC" + } else if (!is.null(ecx_models)) { + neclab <- "N(S)EC" + } + rownames(mat) <- neclab mat } diff --git a/R/manecsummary-class.R b/R/manecsummary-class.R index daa5fe25..dee3088c 100644 --- a/R/manecsummary-class.R +++ b/R/manecsummary-class.R @@ -33,6 +33,8 @@ #' \code{bayesnec:::summary.bayesnecfit} help file for details). Different #' from the single-model case of class \code{\link{bayesnecfit}}, these ECx #' estimates will be based on the model weights. +#' @slot bayesr2 A table containing the Bayesian R2 for all models, as +#' calculated by \code{\link[brms]{bayes_R2}}. #' @slot rhat_issues A \code{\link[base]{list}} detailing whether each fitted #' model exhibited convergence issues based on the Rhat evaluation. #' @@ -43,6 +45,19 @@ #' \code{\link{bayesmanecfit}}, #' \code{\link{necsummary}} #' +#' @references +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. +#' +#' Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +#' estimating no-effect toxicity concentrations in ecotoxicology. Integrated +#' Environmental Assessment and Management. doi:10.1002/ieam.4809. +#' +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. +#' NULL #' Checks if argument is a \code{manecsummary} object diff --git a/R/nec.R b/R/nec.R index 019c3d0f..321d7e86 100644 --- a/R/nec.R +++ b/R/nec.R @@ -13,10 +13,20 @@ #' 0.975 (95 percent credible intervals). #' #' @seealso \code{\link{bnec}} +#' +#' @details The NEC is a parameter in a threshold model (for example, +#' see Fox 2010), and is a true measure +#' of No-effect-concentration (the minimum concentration above which an effect +#' is predicted to occur. #' #' @return A vector containing the estimated NEC value, including upper and -#' lower 95% credible interval bounds -#' (or other interval as specified by prob_vals). +#' lower 95% credible interval bounds (or other interval as specified by +#' prob_vals). +#' +#' @references +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. #' #' @examples #' library(bayesnec) diff --git a/R/necsummary-class.R b/R/necsummary-class.R index 021fd9ff..937e9f48 100644 --- a/R/necsummary-class.R +++ b/R/necsummary-class.R @@ -22,9 +22,13 @@ #' the fitted non-linear model. #' @slot is_ecx A \code{\link[base]{logical}} indicating whether \code{model} #' is an ECx-type model. +#' @slot nec_vals The NEC values. Note that if model is an ECx-type model, +#' this estimate will be a NSEC proxy. #' @slot ecs A \code{\link[base]{list}} containing the ECx values #' should the user decide to calculate them (see the non-exported #' \code{bayesnec:::summary.bayesnecfit} help file for details). +#' @slot bayesr2 The model Bayesian R2 as calculated by +#' \code{\link[brms]{bayes_R2}}. #' #' @seealso #' \code{\link{bayesnec}}, @@ -33,6 +37,19 @@ #' \code{\link{bayesmanecfit}}, #' \code{\link{manecsummary}} #' +#' @references +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. +#' +#' Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +#' estimating no-effect toxicity concentrations in ecotoxicology. Integrated +#' Environmental Assessment and Management. doi:10.1002/ieam.4809. +#' +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. +#' NULL #' Checks if argument is a \code{necsummary} object diff --git a/R/nsec.R b/R/nsec.R index b52830b6..cb9cbe98 100644 --- a/R/nsec.R +++ b/R/nsec.R @@ -1,12 +1,10 @@ -#' Extracts the predicted NSEC value as desired from an object of class -#' \code{\link{bayesnecfit}} or \code{\link{bayesmanecfit}}. +#' Extracts the predicted NSEC value as desired from an +#' object of class \code{\link{bayesnecfit}} or \code{\link{bayesmanecfit}}. #' #' @param object An object of class \code{\link{bayesnecfit}} or #' \code{\link{bayesmanecfit}} returned by \code{\link{bnec}}. #' @param sig_val Probability value to use as the lower quantile to test #' significance of the predicted posterior values. -#' against the lowest observed concentration (assumed to be the control), to -#' estimate NEC as an interpolated NOEC value from smooth ECx curves. #' @param resolution The number of unique x values over which to find NSEC - #' large values will make the NSEC estimate more precise. #' @param hormesis_def A \code{\link[base]{character}} vector, taking values @@ -19,7 +17,12 @@ #' 0.975 (95 percent credible intervals). #' @param ... Further arguments to pass to class specific methods. #' -#' @details For \code{hormesis_def}, if "max", then NSEC values are calculated +#' @details NSEC is no-effect toxicity metric that estimates the concentration +#' at which the modeled mean response is statistically indistinguishable from +#' the mean control response. See the detailed derivation in +#' Fisher and Fox (2023). +#' +#' For \code{hormesis_def}, if "max", then NSEC values are calculated #' as a decline from the maximum estimates (i.e. the peak at NEC); #' if "control", then NSEC values are calculated relative to the control, which #' is assumed to be the lowest observed concentration. @@ -42,6 +45,11 @@ #' #' @return A vector containing the estimated NSEC value, including upper and #' lower 95% credible interval bounds. +#' +#' @references +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. #' #' @examples #' \donttest{ @@ -96,11 +104,6 @@ nsec.bayesnecfit <- function(object, sig_val = 0.01, resolution = 1000, stop("prob_vals must include central, lower and upper quantiles,", " in that order.") } - if (length(grep("ecx", object$model)) > 0) { - mod_class <- "ecx" - } else { - mod_class <- "nec" - } newdata_list <- newdata_eval( object, resolution = resolution, x_range = x_range ) diff --git a/R/plot.R b/R/plot.R index 7c46b021..9fb5ff39 100644 --- a/R/plot.R +++ b/R/plot.R @@ -137,13 +137,18 @@ plot.bayesnecfit <- function(x, ..., CI = TRUE, add_nec = TRUE, plot(x_dat, y_dat, ylab = ylab, xlab = xlab, pch = 16, xaxt = "n", cex = 1.5, col = adjustcolor(1, alpha.f = 0.25), ...) + nec_tag <- summary(x, ecx = FALSE) |> + (`[[`)("nec_vals") |> + rownames() |> + suppressWarnings() |> + suppressMessages() if (!inherits(lxform, "function")) { if (length(xticks) == 1) { axis(side = 1) } else { axis(side = 1, at = signif(xticks, 2)) } - legend_nec <- paste("NEC: ", signif(nec["Estimate"], 2), + legend_nec <- paste(nec_tag, ": ", signif(nec["Estimate"], 2), " (", signif(nec["Q2.5"], 2), "-", signif(nec["Q97.5"], 2), ")", sep = "") legend_ec10 <- paste("EC10: ", signif(ec10[1], 2), @@ -152,7 +157,7 @@ plot.bayesnecfit <- function(x, ..., CI = TRUE, add_nec = TRUE, } else { x_labs <- signif(lxform(x_ticks), 2) axis(side = 1, at = x_ticks, labels = x_labs) - legend_nec <- paste("NEC: ", signif(lxform(nec["Estimate"]), 2), + legend_nec <- paste(nec_tag, ": ", signif(lxform(nec["Estimate"]), 2), " (", signif(lxform(nec["Q2.5"]), 2), "-", signif(lxform(nec["Q97.5"]), 2), ")", sep = "") legend_ec10 <- paste("EC10: ", signif(lxform(ec10[1]), 2), @@ -228,10 +233,9 @@ plot.bayesmanecfit <- function(x, ..., CI = TRUE, add_nec = TRUE, par(mfrow = c(ceiling(length(mod_fits) / 2), 2), mar = c(1.5, 1.5, 1.5, 1.5), oma = c(3, 3, 0, 0)) for (m in seq_along(mod_fits)) { - mod_fits[[m]] <- suppressMessages(suppressWarnings(expand_and_assign_nec( - x = mod_fits[[m]], formula = mod_fits[[m]]$bayesnecformula, - model = names(mod_fits)[m] - ))) + mod_fits[[m]] <- pull_out(x, model = names(mod_fits)[m]) |> + suppressWarnings() |> + suppressMessages() plot(x = mod_fits[[m]], CI = CI, add_nec = add_nec, position_legend = position_legend, add_ec10 = add_ec10, xform = xform, lxform = lxform, @@ -285,13 +289,18 @@ plot.bayesmanecfit <- function(x, ..., CI = TRUE, add_nec = TRUE, plot(x_dat, y_dat, ylab = ylab, xlab = xlab, pch = 16, xaxt = "n", cex = 1.5, col = adjustcolor(1, alpha.f = 0.25), ...) + nec_tag <- summary(x, ecx = FALSE) |> + (`[[`)("nec_vals") |> + rownames() |> + suppressWarnings() |> + suppressMessages() if (!inherits(lxform, "function")) { if (length(xticks) == 1) { axis(side = 1) } else { axis(side = 1, at = signif(xticks, 2)) } - legend_nec <- paste("NEC: ", signif(nec["Estimate"], 2), + legend_nec <- paste(nec_tag, ": ", signif(nec["Estimate"], 2), " (", signif(nec["Q2.5"], 2), "-", signif(nec["Q97.5"], 2), ")", sep = "") legend_ec10 <- paste("EC10: ", signif(ec10[1], 2), @@ -300,7 +309,7 @@ plot.bayesmanecfit <- function(x, ..., CI = TRUE, add_nec = TRUE, } else { x_labs <- signif(lxform(x_ticks), 2) axis(side = 1, at = x_ticks, labels = x_labs) - legend_nec <- paste("NEC: ", signif(lxform(nec["Estimate"]), 2), + legend_nec <- paste(nec_tag, ": ", signif(lxform(nec["Estimate"]), 2), " (", signif(lxform(nec["Q2.5"]), 2), "-", signif(lxform(nec["Q97.5"]), 2), ")", sep = "") legend_ec10 <- paste("EC10: ", signif(lxform(ec10[1]), 2), diff --git a/R/print.R b/R/print.R index 75bf3ba0..e0faac68 100644 --- a/R/print.R +++ b/R/print.R @@ -59,13 +59,15 @@ print.bayesmanecfit <- function(x, ...) { #' @export #' @noRd print.necsummary <- function(x, ...) { - cat("Object of class bayesnecfit containing the following", - " non-linear model: ", x$model, "\n\n", sep = "") + cat("Object of class bayesnecfit containing the", x$model, + "model\n\n", sep = " ") print(x$brmssummary) + cat("\n\n") if (x$is_ecx) { - cat("\nNB: Model ", x$model, " is an ECX model and so ", - "the NEC estimate is an NSEC surrogate.\n", sep = "") + cat("NB: Model", x$model, "is an ECx model, thus", + "the NEC estimate is an\n", " NSEC surrogate.\n", sep = " ") } + print_mat(x$nec_vals) if (!is.null(x$ecs)) { cat("\n\n") for (i in seq_along(x$ecs)) { @@ -102,19 +104,21 @@ print.manecsummary <- function(x, ...) { cat("Model weights (Method: ", x$mod_weights_method, "):\n", sep = "") print_mat(x$mod_weights) cat("\n\n") - cat("Summary of weighted NEC posterior estimates:\n") - if (!is.null(x$ecx_mods)) { - cat("NB: Model set contains the ECX models: ", - paste0(x$ecx_mods, collapse = ";"), - "; weighted NEC estimates include NSEC surrogates for NEC\n", sep = "") + neclab <- rownames(x$nec_vals) + cat("Summary of weighted", neclab, "posterior estimates:\n", sep = " ") + if (neclab == "N(S)EC") { + cat("NB: Model set contains a combination of ECx and NEC\n", + " models, and is therefore a model averaged\n", + " combination of NEC and NSEC estimates.\n", sep = "") } print_mat(x$nec_vals) cat("\n\n") if (!is.null(x$ecs)) { for (i in seq_along(x$ecs)) { nice_ecx_out(x$ecs[[i]], names(x$ecs)[i]) - "\n\n" + cat("\n") } + cat("\n") } cat("Bayesian R2 estimates:\n") print_mat(x$bayesr2) diff --git a/R/sample_priors.R b/R/sample_priors.R index 600644d3..30b083d0 100644 --- a/R/sample_priors.R +++ b/R/sample_priors.R @@ -11,7 +11,7 @@ #' #' @importFrom stats rgamma rnorm rbeta runif #' @importFrom graphics hist -#' @importFrom ggplot2 ggplot aes geom_histogram facet_wrap +#' @importFrom ggplot2 ggplot aes geom_histogram facet_wrap theme_bw labs #' @importFrom tidyr pivot_longer #' @importFrom tidyselect starts_with #' @importFrom dplyr filter mutate @@ -83,6 +83,8 @@ sample_priors <- function(priors, n_samples = 10000, plot = "ggplot") { mutate(param = gsub("^b\\_", "", .data$param)) |> ggplot(mapping = aes(x = .data$value)) + geom_histogram() + - facet_wrap(~.data$param, scales = "free_x") + labs(x = "Value", y = "Count") + + facet_wrap(~.data$param, scales = "free_x") + + theme_bw() } } diff --git a/R/summary.R b/R/summary.R index 05574a9f..b5cbb656 100644 --- a/R/summary.R +++ b/R/summary.R @@ -16,9 +16,33 @@ #' contents of a \code{\link[brms]{brmsfit}} object with the addition of #' an R2. In the case of a \code{\link{bayesmanecfit}} object, summary #' displays the family distribution information, model weights and averaging -#' method, the estimated model-averaged NEC, and R2 estimates for each -#' individual model. Warning messages are also printed to screen in case +#' method, and Bayesian R2 estimates for each individual model. +#' Warning messages are also printed to screen in case #' model fits are not satisfactory with regards to their Rhats. +#' +#' @details The summary method for both \code{\link{bayesnecfit}} and +#' \code{\link{bayesmanecfit}} also returns a no-effect toxicity +#' estimate. Where the fitted model(s) are NEC models (threshold models, +#' containing a step function) the no-effect estimate is a true +#' no-effect-concentration (NEC, see Fox 2010). Where the fitted model(s) are +#' smooth ECx models with no step function, the no-effect estimate is a +#' no-significant-effect-concentration (NSEC, see Fisher and Fox 2023). In the +#' case of a \code{\link{bayesmanecfit}} that contains a mixture of both NEC and +#' ECx models, the no-effect estimate is a model averaged combination of the NEC +#' and NSEC estimates, and is reported as the N(S)EC (see Fisher et al. 2023). +#' +#' @references +#' Fisher R, Fox DR (2023). Introducing the no significant effect concentration +#' (NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +#' doi: 10.1002/etc.5610. +#' +#' Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +#' estimating no-effect toxicity concentrations in ecotoxicology. Integrated +#' Environmental Assessment and Management. doi:10.1002/ieam.4809. +#' +#' Fox DR (2010). A Bayesian Approach for Determining the No Effect +#' Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +#' and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. #' #' @examples #' \donttest{ @@ -39,7 +63,7 @@ NULL #' #' @method summary bayesnecfit #' -#' @inherit summary description return examples +#' @inherit summary description return details examples #' #' @importFrom brms bayes_R2 #' @importFrom chk chk_numeric chk_lgl @@ -48,21 +72,27 @@ NULL summary.bayesnecfit <- function(object, ..., ecx = FALSE, ecx_vals = c(10, 50, 90)) { chk_lgl(ecx) - chk_numeric(ecx_vals) + chk_numeric(ecx_vals) x <- object ecs <- NULL if (ecx) { - message("ECX calculation takes a few seconds per model, calculating...\n") + message("ECx calculation takes a few seconds per model, calculating...\n") ecs <- list() for (i in seq_along(ecx_vals)) { ecs[[i]] <- ecx(x, ecx_val = ecx_vals[i]) } names(ecs) <- paste0("ECx (", ecx_vals, "%) estimate:") } + is_ecx <- x$model %in% mod_groups$ecx + ecx_mod <- NULL + if (is_ecx) { + ecx_mod <- x$model + } out <- list( brmssummary = cleaned_brms_summary(x$fit), model = x$model, - is_ecx = x$model %in% mod_groups$ecx, + is_ecx = is_ecx, + nec_vals = clean_nec_vals(x, x$model, ecx_mod), ecs = ecs, bayesr2 = bayes_R2(x$fit) ) @@ -74,7 +104,7 @@ summary.bayesnecfit <- function(object, ..., ecx = FALSE, #' #' @method summary bayesmanecfit #' -#' @inherit summary description return examples +#' @inherit summary description return details examples #' #' @importFrom purrr map #' @importFrom brms bayes_R2 @@ -88,7 +118,7 @@ summary.bayesmanecfit <- function(object, ..., ecx = FALSE, x <- object ecs <- NULL if (ecx) { - message("ECX calculation takes a few seconds per model, calculating...\n") + message("ECx calculation takes a few seconds per model, calculating...\n") ecs <- list() for (i in seq_along(ecx_vals)) { ecs[[i]] <- ecx(x, ecx_val = ecx_vals[i]) @@ -106,7 +136,7 @@ summary.bayesmanecfit <- function(object, ..., ecx = FALSE, mod_weights = clean_mod_weights(x), mod_weights_method = class(x$mod_stats$wi), ecx_mods = ecx_mods, - nec_vals = clean_nec_vals(x), + nec_vals = clean_nec_vals(x, x$success_models, ecx_mods), ecs = ecs, bayesr2 = x$mod_fits |> lapply(function(y)bayes_R2(y$fit)) |> diff --git a/README.Rmd b/README.Rmd index da01efc0..296c3ad3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -38,10 +38,11 @@ Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103) ## Overview -`bayesnec` is a toxicity estimate and No-Effect-Concentration estimation package that uses +`bayesnec` is a toxicity estimate and No-Effect toxicity estimation package that uses [`brms`](https://github.com/paul-buerkner/brms) to fit concentration(dose)-response data using Bayesian methods for the purpose of -estimating both ECX values, but more particularly NEC. Please see `?bnec` for a +estimating both ECX values, but more particularly 'NEC' (see [Fox (2010)](doi:10.1016/j.ecoenv.2009.09.012), +'NSEC' (see [Fisher and Fox (2023)](10.1002/etc.5610)), and 'N(S)EC (see [Fisher et al. (2023)](10.1002/ieam.4809)). Please see `?bnec` for a more detailed help file. ## Installation diff --git a/article/article.Rmd b/article/article.Rmd index fd12fe73..c3662a6c 100644 --- a/article/article.Rmd +++ b/article/article.Rmd @@ -10,49 +10,30 @@ title: author: - name: Rebecca Fisher address: - - Australian Institute of Marine Science, - - Crawley, WA, Australia - address2: - - The Indian Ocean Marine Research Centre, - - University of Western Australia, - - Crawley, WA, Australia - affiliation: - 'Australian Institute of Marine Science' + - "\\textit{and}\\linebreak Oceans Institute, The University of Western Australia, Crawley, WA, Australia" + affiliation: 'Australian Institute of Marine Science' affiliation2: - 'University of Western Australia' + - Australian Institute of Marine Science, Crawley, WA, Australia email: \email{r.fisher@aims.gov.au} orcid: 0000-0001-5148-6731 - name: Diego R. Barneche address: - - Australian Institute of Marine Science, - - Crawley, WA, Australia - address2: - - The Indian Ocean Marine Research Centre University of Western Australia, - - Crawley, WA, Australia + - "\\textit{and}\\linebreak Oceans Institute, The University of Western Australia, Crawley, WA, Australia" affiliation: 'Australian Institute of Marine Science' - affiliation2: 'University of Western Australia' + affiliation2: + - Australian Institute of Marine Science, Crawley, WA, Australia orcid: 0000-0002-4568-2362 - name: Gerard Ricardo address: - - The University of Queensland, - - St Lucia, Qld, Australia - address2: - - Australian Institute of Marine Science, - - Townsville, Qld, Australia - affiliation: 'University of Queensland' - affiliation2: - 'Australian Institute of Marine Science' + - "\\textit{and}\\linebreak Australian Institute of Marine Science, Townsville, Qld, Australia" + affiliation: 'The University of Queensland' + affiliation2: 'The University of Queensland, St Lucia, Qld, Australia' orcid: 0000-0002-7761-0806 - name: David Fox address: - - University of Melbourne, - - Parkville, Victoria, Australia - address2: - - Environmetrics Australia, - - Beaumaris, Victoria, Australia - affiliation: 'University of Melbourne' - affiliation2: - 'Environmetrics Australia' + - "\\textit{and}\\linebreak Environmetrics Australia, Beaumaris, Victoria, Australia" + affiliation: 'The University of Melbourne' + affiliation2: 'The University of Melbourne, Parkville, Victoria, Australia' orcid: 0000-0002-3178-7243 abstract: > The \pkg{bayesnec} package has been developed for \proglang{R} to fit concentration(dose) - response curves (CR) to toxicity data for the purpose of deriving no-effect-concentration (NEC), no-significant-effect-concentration (NSEC), and effect-concentration (of specified percentage 'x', ECx) thresholds from non-linear models fitted using Bayesian Hamiltonian Monte Carlo (HMC) via \pkg{brms} [@Burkner2017; @Burkner2018] and \pkg{rstan} [@rstan2021] or \pkg{cmdstanr} [@cmdstanr2022]. In \pkg{bayesnec} it is possible to fit a single model, custom models-set, specific model-set or all of the available models. When multiple models are specified, the \code{bnec} function returns a model weighted average estimate of predicted posterior values. A range of support functions and methods is also included to work with the returned single, or multi-model objects that allow extraction of raw, or model averaged predicted, NEC, NSEC and ECx values and to interrogate the fitted model or model-set. By combining Bayesian methods with model averaging, \pkg{bayesnec} provides a single estimate of toxicity and associated uncertainty that can be directly integrated into risk assessment frameworks. @@ -75,7 +56,7 @@ df_print: kable ```{r setup, include = FALSE} library("bayesnec") options(prompt = 'R> ', continue = '+ ') -knitr::opts_chunk$set(purl = TRUE, warning = FALSE, message = FALSE, echo = TRUE, cache = TRUE, include = TRUE, eval = TRUE, fig.align = "centre", fig.pos = "!ht") +knitr::opts_chunk$set(purl = TRUE, warning = FALSE, message = FALSE, echo = TRUE, cache = TRUE, include = TRUE, eval = TRUE, fig.align = "center", fig.pos = "!ht") fix_form <- function(x) { gsub("~", "\\textasciitilde{}", x, fixed = TRUE) |> gsub("^", "\\^{}", x = _, fixed = TRUE) @@ -84,9 +65,9 @@ fix_form <- function(x) { # Introduction -Concentration-response (CR) modelling (also known as dose-response modelling or dose-response analysis) is a key tool for assessing toxicity and deriving the toxicity thresholds used in the risk assessments that underpin protection of human health and the environment. It is widely used in the disciplines of pharmacology, toxicology and ecotoxicology, where parametric non-linear regression methods are used to model response data of interest, with the resulting fitted models used to estimate critical thresholds of concern. These thresholds may be used directly to assess risk [e.g. see @fisher2018c], or are subsequently incorporated into a broader population-level risk assessment framework [e.g. @Warne2015]. Typical thresholds derived from CR modelling include the effect-concentration of defined percentage 'x' (ECx) and the no-effect-concentration (NEC), the latter being the generally preferred option [@Fox2008; @Warne2015; @Warne2018c]. In addition, the no-significant-effect-concentration (NSEC) has also been recently defined [@Fisher2023], and may represent a good alternative estimate of "no-effect" concentrations when the threshold models required to derive true NEC estimates are not appropriate [@Fisher2023; @fisher2023ieam]. +Concentration-response (CR) modelling (also known as dose-response modelling or dose-response analysis) is a key tool for assessing toxicity and deriving the toxicity thresholds used in the risk assessments that underpin protection of human health and the environment. It is widely used in the disciplines of pharmacology, toxicology and ecotoxicology, where parametric non-linear regression methods are used to model response data of interest, with the resulting fitted models used to estimate critical thresholds of concern. These thresholds may be used directly to assess risk [e.g., see @fisher2018c], or are subsequently incorporated into a broader population-level risk assessment framework [e.g., @Warne2015]. Typical thresholds derived from CR modelling include the effect-concentration of defined percentage 'x' (ECx) and the no-effect-concentration (NEC), the latter being the generally preferred option [@Fox2008; @Warne2015; @Warne2018c]. In addition, the no-significant-effect-concentration (NSEC) has also been recently defined [@Fisher2023], and may represent a good alternative estimate of "no-effect" concentrations when the threshold models required to derive true NEC estimates are not appropriate [@Fisher2023; @fisher2023ieam]. -In qualitative terms, CR models are typically a decreasing function of concentration, whereby the response may remain relatively stable for the initial portion of the curve, and then decays at some rate to zero (or some other, lower bound at high concentration). An example is death of an organism resulting from ever increasing concentrations of a toxic pollutant. However, often the underlying mechanisms describing CR relationships are not known, and therefore numerous alternative non-linear CR equations have been proposed (e.g. models in \pkg{drc}, @Ritz2016). These can be broadly grouped into two main "model categories" (see below Section \ref{mdbnc}) for the mathematical definition of each model): NEC models---threshold models which contain a step function comprising the "break-point" after which the predictor systematically alters the response [@Fox2010]; and Smooth transition models that are typically used for estimating effect concentrations of a specified effect (e.g. ECx models). They may or may not encompass the EC~50~ as a parameter [@Ritz2016]. Each of these two groups can be further split into two categories depending on whether the initial portion of the curve is flat, or increasing---the latter being known as hormesis models [@Ritz2016]. +In qualitative terms, CR models are typically a decreasing function of concentration, whereby the response may remain relatively stable for the initial portion of the curve, and then decays at some rate to zero (or some other, lower bound at high concentration). An example is death of an organism resulting from ever increasing concentrations of a toxic pollutant. However, often the underlying mechanisms describing CR relationships are not known, and therefore numerous alternative non-linear CR equations have been proposed [e.g., models in \pkg{drc}, @Ritz2016]. These can be broadly grouped into two main "model categories" (see below Section \ref{mdbnc}) for the mathematical definition of each model): NEC models---threshold models which contain a step function comprising the "break-point" after which the predictor systematically alters the response [@Fox2010]; and Smooth transition models that are typically used for estimating effect concentrations of a specified effect (e.g., ECx models). They may or may not encompass the EC~50~ as a parameter [@Ritz2016]. Each of these two groups can be further split into two categories depending on whether the initial portion of the curve is flat, or increasing---the latter being known as hormesis models [@Ritz2016]. The above model categories mostly comprise non-linear equations, thereby increasing the technical complexity of model fitting. CR experimental designs are often also complex, and may require the addition of multi-level, hierarchical effects. Examples might include a random offset to model non-independence of replicates housed together in tanks, or where there are repeated measurements on individual genotypes of a test species. Additionally, CR data are of varied nature, with the measured response data taking a wide range of natural distributions. For instance, response data may be unbounded continuous (e.g., growth when shrinkage is possible), or positive continuous (e.g., growth in the absence of shrinkage), proportional (e.g., fertilization success), or binary (e.g., survival). To the best of our knowledge, there is no open-source statistical software dedicated to CR modelling which allows for appropriate model and error distribution specification depending on the input data. However, there is a wide array of multi-purpose packages for fitting non-linear generalised hierarchical models in \proglang{R}. For example, to list a few, \pkg{nlme} [@pinheiro2021], \pkg{lme4} [@pinheiro2021], \pkg{rjags} [@Su2015] and \pkg{rstan} [@rstan2021]. @@ -94,7 +75,7 @@ While CR modelling can be carried out using generic non-linear modelling softwar Estimates of uncertainty in parameters and derived thresholds are critical for effective integration of threshold estimates into risk assessment and formal decision frameworks [@fisher2018c]. Bayesian methods allow robust quantification of uncertainty with intuitive and direct probabilistic meaning [@Ellison1996], and are therefore a useful platform for CR modelling in most settings. Furthermore, the posterior draws generated through Bayesian model fitting methods provide a rich resource that can be used to explore synergistic and antagonistic impacts [@Fisher2019d; @flores2021], propagate toxicity estimate uncertainty [@Charles2020a; @Gottschalk2013], and test hypotheses of no-effect [@Thomas2006]. -There is a wide array of packages available for Bayesian model fitting via Markov chain Monte Carlo methods, including \proglang{WinBUGS} [@Lunn2000], \proglang{JAGS} [@Plummer2003] and \proglang{Stan} [@Carpenter2017], that seamlessly interface with R through packages such as \pkg{R2WinBUGS}, \pkg{R2jags} [@Su2015] and \pkg{rstan} [@rstan2021]. These packages require coding in \proglang{R} and additional model specification in custom languages---which might involve non-trivial coding, extensive debugging and optimisation that is often time consuming and requires specialist expertise, all of which might add a learning/application barrier to many potential users. Several extension packages which aim to reduce that barrier have recently become available, principally \pkg{brms} [@Burkner2017], that allows a broad range of models to be easily fitted using \pkg{rstan} [@rstan2021] or \pkg{cmdstanr} [@cmdstanr2022] through simpler \pkg{lme4}-like formula syntax. However, even with packages like \pkg{brms}, Bayesian model fitting can be difficult to automate across all potential use cases, particularly with respect to specifying valid initial parameter values and appropriate priors. In addition, as was the motivation for the development of the \pkg{drc} package in the frequentist setting, the \proglang{R} code required for fitting non-linear models and extracting the relevant outputs (e.g. NEC, ECx) and their standard errors can be cumbersome in practice, and even more so in the Bayesian setting where model fits contain multiple posterior draws. +There is a wide array of packages available for Bayesian model fitting via Markov chain Monte Carlo methods, including \proglang{WinBUGS} [@Lunn2000], \proglang{JAGS} [@Plummer2003] and \proglang{Stan} [@Carpenter2017], that seamlessly interface with R through packages such as \pkg{R2WinBUGS}, \pkg{R2jags} [@Su2015] and \pkg{rstan} [@rstan2021]. These packages require coding in \proglang{R} and additional model specification in custom languages---which might involve non-trivial coding, extensive debugging and optimisation that is often time consuming and requires specialist expertise, all of which might add a learning/application barrier to many potential users. Several extension packages which aim to reduce that barrier have recently become available, principally \pkg{brms} [@Burkner2017], that allows a broad range of models to be easily fitted using \pkg{rstan} [@rstan2021] or \pkg{cmdstanr} [@cmdstanr2022] through simpler \pkg{lme4}-like formula syntax. However, even with packages like \pkg{brms}, Bayesian model fitting can be difficult to automate across all potential use cases, particularly with respect to specifying valid initial parameter values and appropriate priors. In addition, as was the motivation for the development of the \pkg{drc} package in the frequentist setting, the \proglang{R} code required for fitting non-linear models and extracting the relevant outputs (e.g., NEC, ECx) and their standard errors can be cumbersome in practice, and even more so in the Bayesian setting where model fits contain multiple posterior draws. The greater complexity associated with Bayesian model fitting has likely hindered the uptake of Bayesian statistics for CR threshold derivation across the broader ecotoxicology and toxicology communities, who may not have access to specialist statistical expertise [@Fisher2019]. \pkg{bayesnec} version `r packageVersion("bayesnec")` (available on CRAN) builds upon an implementation of the NEC model described in @Fox2010 and @Pires2002. The \pkg{bayesnec} package provides an accessible interface specifically for fitting NEC and ECx and other CR models using Bayesian methods. A variety of models can be specified based on the known distribution of the "concentration" or "dose" variable (the predictor, x) as well as the "response" (y) variable. The model formula, including priors and initial values required to call \pkg{brms} are automatically generated based on information contained in the supplied data. A range of tools is supplied to aid the user in interrogating model fits, plotting and generating predicted values, as well as extracting the standard outputs, such as NEC and ECx, either as a full posterior draw or in summary form. @@ -195,7 +176,7 @@ $y_i = 0 + (\tau - 0 + e^{\alpha} x)/ (1 + e^{e^{\beta} (x_i - \omega)})$ with the following \code{brmsformula}: \code{`r fix_form(as.character(show_params("ecxhormebc4")[[1]][[1]]))`}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. \begin{figure}[ht] - \centreing + \centering \includegraphics[width=1\textwidth]{../vignettes/vignette-fig-exmp2b-theoretical_ecx_curves.png} \caption{Representative shapes of currently implemented \pkg{bayesnec} \textit{EC\textsubscript{x}} models.} \label{fig1} @@ -246,7 +227,7 @@ $y_i = \tau e^{-e^{\beta} ((x_i - \eta) f(x_i, \eta))^{e^\epsilon} f(x_i, \eta)} with the following \code{brmsformula}: \code{`r fix_form(as.character(show_params("necsigm")[[1]][[1]]))`}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. Estimation of No-Effect-Concentrations using this model are not currently recommended without further testing, as the behaviour is currently unstable, see supplementary material in @fisher2023ieam. \begin{figure}[ht] - \centreing + \centering \includegraphics[width=1\textwidth]{../vignettes/vignette-fig-exmp2b-theoretical_nec_curves.png} \caption{Representative shapes of currently implemented \pkg{bayesnec} \textit{NEC} models.} \label{fig2} @@ -346,7 +327,7 @@ timesimplefit <- system.time({ }) ``` -If a recognized model name is provided, a single model of the specified type is fit, and \code{bnec} returns an object of class \code{bayesnecfit}. If a vector of two or more of the available models are supplied, or if one of the model-sets is specified, \code{bnec} returns a model object of class \code{bayesmanecfit} containing Bayesian model averaged predictions for the supplied models, providing they were successfully fitted (see \ref{modavg} above, and the help file of \code{bnec} for further details). By default, \code{bnec} sets the number of chains to 4, the number of iterations per chain to 10,000, and the size of the warm-up period to 4/5 of the number of iterations (i.e 8,000 by default). +If a recognized model name is provided, a single model of the specified type is fit, and \code{bnec} returns an object of class \code{bayesnecfit}. If a vector of two or more of the available models are supplied, or if one of the model-sets is specified, \code{bnec} returns a model object of class \code{bayesmanecfit} containing Bayesian model averaged predictions for the supplied models, providing they were successfully fitted (see \ref{modavg} above, and the help file of \code{bnec} for further details). By default, \code{bnec} sets the number of chains to 4, the number of iterations per chain to 10,000, and the size of the warm-up period to 4/5 of the number of iterations (i.e., 8,000 by default). \code{bnec} will guess the most likely distribution for the response variable. This "guess" is achieved through the internal function \code{set_distribution}. This algorithm will assume a Binomial distribution for the response if data are integers and \code{trials(...)} is passed in the formula call; Poisson if data are integers and there are no \code{trials(...)}; Gamma if the data are continuous, zero bounded and contain values greater than one; Beta if data are continuous and bounded to zero and one; and Gaussian if data are continuous and contain negative values. The \code{family} can be set manually via the usual \proglang{R} syntax of calling the argument \code{family} and specifying the desired distribution. \pkg{bayesnec} currently supports the use of the above listed families, as well as the Negative-Binomial and Beta-binomial families that can be used in the case of over-dispersed Binomial and Poisson families respectively. @@ -356,13 +337,13 @@ In the example here, the model was fitted assuming a Beta distribution on an ide Models fitted by \code{bnec} will invariably inherit a class \code{bnecfit} which carries three exclusive methods: \code{+}, \code{c} and \code{update}. The first two are used to append one or multiple models to an existing fit, whereas the latter is used to update the fitting characteristics of an existing model (e.g., change the number of iterations or warm-up period, or simply change the HMC fitting parameters). -When \code{bnec} fits a single CR model type, the output object also inherits the \code{bayesnecfit} class. This class contains the \code{brmsfit} object in addition to the mean predicted values and summary estimates of each model parameter. Because the original motivation in the development of \pkg{bayesnec} was the estimation of no-effect toxicity values, by default \code{bnec} also returns a full posterior distribution of the either the NEC (for **nec** models) or the NSEC (for **ecx** models, see @Fisher2023) estimate. If \code{bnec} fits a custom set of models, or a package-pre-defined model-set (e.g. \code{model = "decline"} in the input formula), the output object is going to inherit the \code{bayesmanecfit} class. Differently from a \code{bayesnecfit} object, \code{bayesmanecfit} comprises a model weighted estimate of predicted posterior values of N(S)EC. This is a weighted combined average of the NEC or the NSEC values, for all **nec** and **ecx** models respectively, as described in @fisher2023ieam. +When \code{bnec} fits a single CR model type, the output object also inherits the \code{bayesnecfit} class. This class contains the \code{brmsfit} object in addition to the mean predicted values and summary estimates of each model parameter. Because the original motivation in the development of \pkg{bayesnec} was the estimation of no-effect toxicity values, by default \code{bnec} also returns a full posterior distribution of the either the NEC (for **nec** models) or the NSEC [for **ecx** models, see @Fisher2023] estimate. If \code{bnec} fits a custom set of models, or a package-pre-defined model-set (e.g., \code{model = "decline"} in the input formula), the output object is going to inherit the \code{bayesmanecfit} class. Differently from a \code{bayesnecfit} object, \code{bayesmanecfit} comprises a model weighted estimate of predicted posterior values of N(S)EC. This is a weighted combined average of the NEC or the NSEC values, for all **nec** and **ecx** models respectively, as described in @fisher2023ieam. Regardless of whether \code{bnec} generates a \code{bayesnecfit} or \code{bayesmanecfit} class, the underlying \code{brmsfit} object can be extracted using the function \code{pull_brmsfit}. The \code{brmsfit} contains all of the information usually returned from a call to \code{brm}, including the posterior samples of all parameters in the model, from which predictions can be made and custom probabilities calculated. Both \code{bayesnecfit} and \code{bayesmanecfit} classes contain methods for \code{summary}, \code{print}, \code{predict}, \code{model.frame}, \code{fitted}, \code{posterior_predict}, \code{posterior_edpred} and plotting (\code{plot} and \newline \code{autoplot}). Wherever possible, these methods have been implemented such they are consistent with other model fitting packages in \proglang{R}, and in particular \pkg{brms}. We have also implemented a range of custom functions for extracting effect concentrations and related threshold values (\code{nec}, \code{ecx} and \code{nsec}) that, in the case of a \code{bayesmanecfit}, return model weighted estimates. -The \code{summary} method provides the usual summary of model parameters and any relevant model fit statistics as returned in the underlying \code{brm} model fit(s). In the specific case of a \code{bayesmanecfit} object, the summary includes a list of fitted models, their respective model weights, and a model-averaged NEC---which is reported with a warning when it contains NSEC values (see below Section \ref{modsuit}). +The \code{summary} method provides the usual summary of model parameters and any relevant model fit statistics as returned in the underlying \code{brm} model fit(s). In the specific case of a \code{bayesmanecfit} object, the summary includes a list of fitted models, their respective model weights, and a model-averaged no-effect toxicity estimate. Where the fitted model(s) are NEC models (threshold models, containing a step function) the no-effect estimate is a true no-effect-concentration (NEC, see @Fox2010). Where the fitted model(s) are smooth *ECx* models with no step function, the no-effect estimate is a no-significant-effect-concentration (NSEC, see @Fisher2023). In the case of a \code{\link{bayesmanecfit}} that contains a mixture of both NEC and ECx models, the no-effect estimate is a model averaged combination of the NEC and NSEC estimates, and is reported as the N(S)EC (see @fisher2023ieam). ```{r summary, warning = FALSE, message = FALSE, dependson="example"} summary(fit) @@ -370,17 +351,17 @@ summary(fit) As mentioned above, the visualization of a particular model fit can be done via either \pkg{base} \proglang{R} (\code{plot}) and \pkg{ggplot2} [@ggplot] (\code{autoplot}). Here we pass the additional argument \code{xform} which ensures that the NEC estimate is transformed to the proper scale (because the original formula contained \code{log(concentration)}): -```{r base-plot, dependson="example", fig.height = 3.5, fig.width = 4, fig.cap = "\\pkg{ggplot2} \\code{autoplot} of the example fit. The solid black line is the fitted median of the posterior prediction, dashed black lines are the 95\\% credible intervals, and the vertical lines show the estimated \\textit{NEC} value."} +```{r base-plot, dependson="example", fig.height = 3.5, fig.width = 4, fig.cap = "\\pkg{ggplot2} \\code{autoplot} of the example fit. The solid black line is the fitted median of the posterior prediction, dashed black lines are the 95\\% credible intervals, and the vertical lines show the estimated \\textit{NEC} value.", out.width = "60%"} round_digits <- function(x) sprintf("%.2f", x) autoplot(fit, xform = exp) + scale_x_continuous(trans = "log", labels = round_digits) ``` -By default the plot shows the fitted posterior curve with 95% credible intervals, along with an estimate of the $\eta = \text{NEC}$ value. For more examples using \pkg{bayesnec} models for inference please see the on-line the vignettes at https://open-aims.github.io/bayesnec/articles/, as well as Section \ref{modsuit} below. +By default the plot shows the fitted posterior curve with 95\% credible intervals, along with an estimate of the $\eta = \text{NEC}$ value. For more examples using \pkg{bayesnec} models for inference please see the on-line the vignettes at https://open-aims.github.io/bayesnec/articles/, as well as Section \ref{modsuit} below. ## Model diagnostics {short-title="Model diagnostics" #moddiag} -\pkg{bayesnec} will return warning messages as part of the summary method where parts of the model have not converged (rhat, $\widehat{R}$ > 1.05, see @vehtari2021rank) and indicate the number of any divergent transitions (if any). These messages include guidance on running more iterations, adjusting priors and or adjusting other fitting criteria, such as adapt_delta. The summary method for a \code{bayesnecfit} object also indicates the effect sample size for estimates of each of the parameters, and for a \code{bayesmanecfit} a warning is returned if any of the models have parameters with an effective sample size of <100. +\pkg{bayesnec} will return warning messages as part of the summary method where parts of the model have not converged [rhat, $\widehat{R}$ > 1.05; see @vehtari2021rank] and indicate the number of any divergent transitions (if any). These messages include guidance on running more iterations, adjusting priors and or adjusting other fitting criteria, such as adapt_delta. The summary method for a \code{bayesnecfit} object also indicates the effect sample size for estimates of each of the parameters, and for a \code{bayesmanecfit} a warning is returned if any of the models have parameters with an effective sample size of <100. In addition to the diagnostic information reported by the summary method, a range of tools is available to assess model fit, including an estimate of overdispersion (for relevant families), an extension of the \pkg{brms} \code{rhat} function that can be applied to both \code{bayesnecfit} and \code{bayesmanecfit} model objects, and a function \code{check_chains} that can be used to visually assess chain mixing and stability. @@ -398,18 +379,19 @@ Several helper functions have been included that allow the user to add or drop m The priors used in the default model fit can be extracted using \code{pull_prior}, and a sample or plot of prior values can be obtained from the individual \pkg{brms} model fits through the function \code{sample_priors} which samples directly from the \code{prior} element in the \code{brmsfit} object (\code{pull_brmsfit(fit) |> prior_summary() |> sample_priors()}, see \autoref{fig:sampleprior}). -```{r sampleprior, dependson="example", echo=FALSE, fig.height = 3.5, fig.width = 4.5, fig.cap = "Frequency histograms of samples of the default priors used by \\pkg{bayesnec} for fitting the \\code{nec3param} model to the example data."} +```{r sampleprior, dependson="example", echo=FALSE, fig.height = 2.5, fig.width = 4.5, fig.cap = "Frequency histograms of samples of the default priors used by \\pkg{bayesnec} for fitting the \\code{nec3param} model to the example data.", out.width = "80%"} pull_brmsfit(fit) |> prior_summary() |> sample_priors() ``` + We can also use the function \code{check_priors} (based on the \code{hypothesis} function of \pkg{brms}) to assess how the posterior probability density for each parameter differs from that of the prior. Here we show the prior and posterior probability densities for the parameters in the `nec4param` model fit (`check_priors(fit)`, see \autoref{fig:checkpriorsingle}). There is also a \code{bayesmanecfit}-specific method that can be used to sequentially view all plots in a \code{bnec} call with multiple models, or write to a pdf as in \code{check_chains}. -```{r checkpriorsingle, dependson="example", echo=FALSE,fig.height = 3.5, fig.width = 6, fig.cap = "A comparison of the prior and posterior parameter probability densities for the \\code{nec3param} model fit to the example data."} +```{r checkpriorsingle, dependson="example", echo=FALSE, fig.height = 2.5, fig.width = 6, fig.cap = "A comparison of the prior and posterior parameter probability densities for the \\code{nec3param} model fit to the example data.", out.width = "100%"} check_priors(fit) ``` ## Model comparison -With \pkg{bayesnec} we have included a function (`compare_posterior`) that allows bootstrapped comparisons of posterior predictions. This function allows the user to fit several different \code{bnec} model fits and compare differences in their posterior predictions. Comparisons can be made across the model fits for individual threshold estimates (e.g. NEC, NSEC or ECx) or across a range of predictor values. Usage is demonstrated in the relevant vignette at https://open-aims.github.io/bayesnec/articles/example4.html by comparing different types of models and model-sets using a single data set. However, the intent of this function is to allow comparison across different data sets that might represent, for example, different levels of a fixed factor covariate. For example, this function has been used to compare toxicity of herbicides across three different climate scenarios, to examine the cumulative impacts of pesticides and global warming on corals [@flores2021]. +With \pkg{bayesnec} we have included a function (`compare_posterior`) that allows bootstrapped comparisons of posterior predictions. This function allows the user to fit several different \code{bnec} model fits and compare differences in their posterior predictions. Comparisons can be made across the model fits for individual threshold estimates (e.g., NEC, NSEC or ECx) or across a range of predictor values. Usage is demonstrated in the relevant vignette at https://open-aims.github.io/bayesnec/articles/example4.html by comparing different types of models and model-sets using a single data set. However, the intent of this function is to allow comparison across different data sets that might represent, for example, different levels of a fixed factor covariate. For example, this function has been used to compare toxicity of herbicides across three different climate scenarios, to examine the cumulative impacts of pesticides and global warming on corals [@flores2021]. At this time \code{bnec} does not allow for an inclusion of an interaction with a fixed factor. Including an interaction term within each of the non-linear models implemented in \pkg{bayesnec} is relatively straightforward, and may be introduced in future releases. However, in many cases the functional form of the response may change with different levels of a given factor. The substantial complexity of defining all possible non-linear model combinations at each factor level means it unlikely this could be feasibly implemented in \pkg{bayesnec} in the short term. In the meantime the greatest flexibility in the functional form of individual model fits can be readily obtained using models fitted independently to data within each factor level. @@ -526,7 +508,7 @@ plot_data <- rbind(bnec_plot_data, drc_plot_data) |> ) ``` -```{r drccomparisonplot, echo = FALSE, fig.width = 6, fig.height = 3, fig.cap = "A comparison of the \\pkg{bayesnec} and \\pkg{drc} model fits and estimated NEC values for the \\code{nec3param} model, fit to data on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 ug per L) for 10 h."} +```{r drccomparisonplot, echo = FALSE, fig.width = 6, fig.height = 3, fig.cap = "A comparison of the \\pkg{bayesnec} and \\pkg{drc} model fits and estimated NEC values for the \\code{nec3param} model, fit to data on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 $\\mu$g per L) for 10 h."} library("ggplot2") combine_fits <- function(x) { ltys <- rep(rep(c(1, 2, 2), length(unique(x$model))), 2)[-c(36:35)] @@ -576,7 +558,7 @@ combine_fits <- function(x) { combine_fits(plot_data) ``` -While \pkg{drc} is an excellent tool for fitting CR models using frequentist methods and is widely used (@Ritz2016 is cited nearly 2,000 times), \pkg{bayesnec} provides a Bayesian alternative using similarly simple syntax. The advantages of Bayesian methods in this setting are numerous, and include direct characterisation of parameter uncertainty and posterior predicted samples that provide a valuable resource for model inference (such as comparisons of relative toxicity, see Section \ref{iexample} and \autoref{fig:necplots}). In addition, we have observed that even the use of only weakly informative priors tends to improve the reliability of model fits compared to \pkg{drc}, and this may be true of MLE estimation more generally [@krull2020comparing]. +While \pkg{drc} is an excellent tool for fitting CR models using frequentist methods and is widely used [@Ritz2016 is cited nearly 2,000 times], \pkg{bayesnec} provides a Bayesian alternative using similarly simple syntax. The advantages of Bayesian methods in this setting are numerous, and include direct characterisation of parameter uncertainty and posterior predicted samples that provide a valuable resource for model inference (such as comparisons of relative toxicity, see Section \ref{iexample} and \autoref{fig:necplots}). In addition, we have observed that even the use of only weakly informative priors tends to improve the reliability of model fits compared to \pkg{drc}, and this may be true of MLE estimation more generally [@krull2020comparing]. # Illustrative example {short-title="Illustrative example" #iexample} @@ -584,7 +566,7 @@ So far we have demonstrated the basic usage of \pkg{bayesnec} and compared the r ## Example case study -In our case study, the no-effect-toxicity values of a range of herbicides are first estimated and then their relative toxicity is compared. The response data are the maximum effective quantum yield (${\Delta F / Fm'}$) of symbiotic dinoflagellates still in the host tissue of the coral *Seriatopora hystrix* (in hospite or in vivo). ${\Delta F / Fm'}$ was calculated from Chlorophyll fluorescence parameters measured using a DIVING-PAM chlorophyll fluorometer (Walz) as described in more detail in @jones2003meps and @jones2003effects. The corals were exposed to elevated levels of eight different herbicides (Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, tebuthiuron, ioxynil) at concentrations ranging from 0.3 to 1000 ${\mu g/L}$) for 10 h. Data for ioxynil were excluded from analysis here as this herbicide did not show sufficient response at the maximum concentration. +In our case study, the no-effect-toxicity values of a range of herbicides are first estimated and then their relative toxicity is compared. The response data are the maximum effective quantum yield (${\Delta F / Fm'}$) of symbiotic dinoflagellates still in the host tissue of the coral *Seriatopora hystrix* (in hospite or in vivo). ${\Delta F / Fm'}$ was calculated from Chlorophyll fluorescence parameters measured using a DIVING-PAM chlorophyll fluorometer (Walz) as described in more detail in @jones2003meps and @jones2003effects. The corals were exposed to elevated levels of eight different herbicides (Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, tebuthiuron, ioxynil) at concentrations ranging from 0.3 to 1000 $\mu$g / L) for 10 h. Data for ioxynil were excluded from analysis here as this herbicide did not show sufficient response at the maximum concentration. ## Single herbicide analysis {short-title="Single herbicide" #isingle} @@ -630,9 +612,9 @@ Once we are satisfied with the model fits, we can examine the model statistics u summary(manecfit_ametryn) ``` -For a \code{bayesmanecfit} object with multiple model fits, \code{summary} first displays the class, the \code{family} and links that have been used for the model fits, the number of posterior draws contained within each model fit, and a table of the model weights for each model, showing the Widely Applicable Information Criterion (waic, @watanabe2010asymptotic) from \pkg{loo} and weights (wi) which are based by default on the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}) (see above), but this can be easily modified via argument \code{loo_controls} using \code{amend}. For the ametryn data set, weights are highest for the **ecx4param** model, followed closely by the **ecxll4** model, with some lesser support for the \code{ecxwb2} model, and a very small amount of support for the two *NEC* models (*nec3param* and *nec4parm*). +For a \code{bayesmanecfit} object with multiple model fits, \code{summary} first displays the class, the \code{family} and links that have been used for the model fits, the number of posterior draws contained within each model fit, and a table of the model weights for each model, showing the Widely Applicable Information Criterion [waic, @watanabe2010asymptotic] from \pkg{loo} and weights (wi) which are based by default on the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}) (see above), but this can be easily modified via argument \code{loo_controls} using \code{amend}. For the ametryn data set, weights are highest for the **ecx4param** model, followed closely by the **ecxll4** model, with some lesser support for the \code{ecxwb2} model, and a very small amount of support for the two *NEC* models (*nec3param* and *nec4parm*). -Because \pkg{bayesnec} was primarily developed for estimating no-effect-concentrations, an estimate of model averaged NEC is also provided, and in this case with a note that the weighted model averaged estimate contains NSEC [@Fisher2023] values in the case of the fitted *ECx* models. In units of log concentration, the estimated no-effect toxicity value for ametryn is `r signif(manecfit_ametryn$w_nec["Estimate"], 3)`, which is equivalent to `r signif(exp(manecfit_ametryn$w_nec), 3)["Estimate"]` ${\mu}g/L$. The 95% credible intervals are also provided, based on the 0.025 and 97.5 quantiles of the weighted pooled posterior sample. +Because \pkg{bayesnec} was primarily developed for estimating no-effect-concentrations, an estimate of the model averaged no-effect toxicity estimate is also provided. In this case the no-effect toxicity estimate is reported as an N(S)EC value as the model set contains a combination of \code{nec} and \code{ecx} models. In units of log concentration, the N(S)EC value for ametryn is `r signif(manecfit_ametryn$w_nec["Estimate"], 3)`, which is equivalent to `r signif(exp(manecfit_ametryn$w_nec), 3)["Estimate"]` $\mu$g / L. The 95\% credible intervals are also provided, based on the 0.025 and 97.5 quantiles of the weighted pooled posterior sample. Finally, Bayesian $R^2$ estimates are also provided [@gelman2019], as an indicator of overall model fit. This is useful because model weights are always relative to the models actually included in the model set for the given data. The $R^2$ provides an indicator of model fit that can be compared objectively across data sets as an indication of the quality of the fit of any of the supplied models to the data. @@ -648,7 +630,7 @@ We can also plot the model averaged fit that is used to derive the model average ametryn_plot <- autoplot(manecfit_ametryn) ``` -```{r fullbayesmanecplotametrynALLplot, echo = FALSE, fig.width = 6, fig.height = 8, fig.cap = "Individual model fits to the ametryn data set, showing the estimated no effect concentration for each. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are the median and 95\\% credible intervals of the posterior estimates of the NEC parameter obtained for all \\code{NEC} models, and the posterior predicted NSEC values estimated from all smooth \\code{ECx} models."} +```{r fullbayesmanecplotametrynALLplot, echo = FALSE, fig.width = 6, fig.height = 11, fig.cap = "Individual model fits to the ametryn data set, showing the estimated no effect concentration for each. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 $\\mu$g per L) for 10 h. N(S)EC values presented are the median and 95\\% credible intervals of the posterior estimates of the NEC parameter obtained for all \\code{NEC} models, and the posterior predicted NSEC values estimated from all smooth \\code{ECx} models.", out.width = "65%"} ametryn_plot_all <- autoplot(manecfit_ametryn, all_models = TRUE, xform = exp) ametryn_plot_all + ylab(expression(""*Delta*"F"/"F m'")) + @@ -656,7 +638,7 @@ ametryn_plot_all + scale_x_continuous(trans = "log", labels = round_digits) ``` -```{r fullbayesmanecplotametrynplot, echo = FALSE, fig.width = 6, fig.height = 3, fig.cap = "Full model averaged bayesmanecfit to the ametryn data set, showing the estimated model-averaged no effect concentration. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \\code{NEC} models, and the NSEC values estimated from all smooth \\code{ECx} models, summarised as a median and 95\\% credible intervals. Only the \\code{decline} model set was used (i.e. hormesis models were excluded)."} +```{r fullbayesmanecplotametrynplot, echo = FALSE, fig.height = 3.5, fig.width = 4, fig.cap = "Full model averaged bayesmanecfit to the ametryn data set, showing the estimated model-averaged no effect concentration. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 $\\mu$g per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \\code{NEC} models, and the NSEC values estimated from all smooth \\code{ECx} models, summarised as a median and 95\\% credible intervals. Only the \\code{decline} model set was used (i.e., hormesis models were excluded).", out.width = "60%"} ametryn_plot <- autoplot(manecfit_ametryn, xform = exp) ametryn_plot + ylab(expression(""*Delta*"F"/"F m'")) + @@ -727,7 +709,7 @@ This collated table of model weights shows that the best fitting models vary sub ```{r weightsTab, echo=FALSE} -knitr::kable(modtab, caption="Fitted valid models and their relative pseudo-BMA weights for CR curves for the effects of seven herbicides on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \\textit{Seriatopora hystrix}.") +knitr::kable(modtab, caption="Fitted valid models and their relative pseudo-BMA weights for CR curves for the effects of seven herbicides on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \\textit{Seriatopora hystrix}. Ame. = Ametryn; Atr. = Atrazine; Diu. = Diuron; Hex. = Hexazinone; Irg. = Irgarol; Sim. = Simazine; Teb. = Tebuthiuron.") ``` We use the \pkg{bayesnec} \code{autoplot}, together with \pkg{ggpubr} [@ggpubr] to make a panel plot of the weighted model averaged predicted curves for all seven herbicides (\autoref{fig:fullbayesmanecplot}). @@ -749,7 +731,7 @@ figure <- ggpubr::ggarrange( ) ``` -```{r fullbayesmanecplot, echo = FALSE, fig.width = 6, fig.height = 8, fig.cap = "Full model averaged bayesmanecfits to seven phototoxicity data sets, showing estimated no effect concentrations. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, or tebuthiuron (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \\code{NEC} models, and the NSEC values estimated from all smooth \\code{ECx} models, summarised as a median and 95\\% credible intervals. Only the \\code{decline} model set was used (i.e. hormesis models were excluded)."} +```{r fullbayesmanecplot, echo = FALSE, fig.width = 6, fig.height = 8, fig.cap = "Full model averaged bayesmanecfits to seven phototoxicity data sets, showing estimated no effect concentrations. Data are the maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \\textit{Seriatopora hystrix} exposed to elevated Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, or tebuthiuron (range 0.3 to 1000 $\\mu$g per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \\code{NEC} models, and the NSEC values estimated from all smooth \\code{ECx} models, summarised as a median and 95\\% credible intervals. Only the \\code{decline} model set was used (i.e., hormesis models were excluded)."} ggpubr::annotate_figure( figure, left = ggpubr::text_grob(expression(""*Delta*"F"/"F m'"), rot = 90), bottom = ggpubr::text_grob(expression("Concentration "~"("*mu*"g/L)")) @@ -758,7 +740,7 @@ ggpubr::annotate_figure( Across the seven herbicides, the \code{bayesmanecfit} model averaged fits model the input data very well, with predictions generally very confident (\autoref{fig:fullbayesmanecplot}). The slight uncertainty in the appropriate model form for the ametryn data set is evident in the weighted average predicted values as a broader confidence band at the estimated position of the NEC threshold point (\autoref{fig:fullbayesmanecplot}). -The values presented on the plot in \autoref{fig:fullbayesmanecplot} as N(S)EC are model averaged posterior densities of the NEC parameter obtained from all fitted *NEC* models, and the NSEC values estimated from all smooth *ECx* models. These values are the \pkg{bayesnec} estimates for the no-(significant)-effect concentration required for the integration of this toxicity data into the relevant regulatory framework in Australia, the Australian and New Zealand Water Quality Guidelines [@anzg]. While the recommendation that NEC is the preferred toxicity estimate in this framework is well established [@Warne2015, @Warne2018c], use of the NSEC is relatively new [@Fisher2023] and while yet to gain formal approval for use in the Australian setting presents a potential alternative no-effect estimate for smooth curves. +The values presented on the plot in \autoref{fig:fullbayesmanecplot} as N(S)EC are model averaged posterior densities of the NEC parameter obtained from all fitted *NEC* models, and the NSEC values estimated from all smooth *ECx* models. These values are the \pkg{bayesnec} estimates for the no-(significant)-effect concentration required for the integration of this toxicity data into the relevant regulatory framework in Australia, the Australian and New Zealand Water Quality Guidelines [@anzg]. While the recommendation that NEC is the preferred toxicity estimate in this framework is well established [@Warne2015; @Warne2018c], use of the NSEC is relatively new [@Fisher2023] and while yet to gain formal approval for use in the Australian setting presents a potential alternative no-effect estimate for smooth curves. Finally, we also use the `compare_posterior` function to extract and plot the weighted averaged posterior samples for the N(S)EC toxicity values for all herbicides (\autoref{fig:necplots}). This shows clearly that irgarol, diuron and ametryn are the most toxic, and exhibit relatively similar toxicity, with their posterior densities substantially overlapping (\autoref{fig:necplots}). The herbicide tebuthiuron is the least toxic of these seven, followed by simazine, atrazine and finally hexazinone, which exhibits intermediate toxicity (\autoref{fig:necplots}). `compare_posterior` also calculates the probability of difference in toxicity across the herbicides, which confirm the visual results and can be used to infer significant differences in toxicity response (\autoref{tab:probdiffs}). @@ -771,20 +753,26 @@ colnames(prob_diff) <- stringr::str_to_sentence(colnames(prob_diff)) prob_diff$Herbicide <- stringr::str_to_sentence(prob_diff$Herbicide) ``` -```{r necplots, echo = FALSE, fig.width = 6, fig.height = 4, fig.cap = "Posterior distributions for N(S)EC toxicity estimates for the effect of seven herbicides on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \\textit{Seriatopora hystrix}. Shown are medians with 80\\% uncertainty intervals."} +```{r necplots, echo = FALSE, fig.width = 6, fig.height = 4, fig.cap = "Posterior distributions for N(S)EC toxicity estimates for the effect of seven herbicides on maximum effective quantum yield ($\\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \\textit{Seriatopora hystrix}. Shown are medians with 80\\% uncertainty intervals."} +cb_pal <- c( + "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7" +) post_comp$posterior_data |> dplyr::mutate(conc = exp(value)) |> ggplot(data = _, mapping = aes(x = conc)) + geom_density(mapping = aes(group = model, colour = model, fill = model), alpha = 0.3) + - xlab(expression("Concentration "~"("*mu*"g/L)")) + + labs(x = expression("Concentration "~"("*mu*"g/L)"), y = "Density", + colour = "Model", fill = "Model") + + scale_fill_manual(values = cb_pal) + + scale_colour_manual(values = cb_pal) + scale_x_continuous(trans = "log", breaks = c(0.3, 1, 3, 10, 30), labels = c(0.3, 1, 3, 10, 30)) + theme_classic() ``` ```{r probdiffs, echo=FALSE} -knitr::kable(prob_diff, caption="Probability of no-difference in no-effect toxicity for seven herbicides. Values are based on the proportional overlap in predicted posterior probability density of the N(S)EC estimates.") +knitr::kable(prob_diff, caption="Probability of no-difference in no-effect toxicity for seven herbicides. Values are based on the proportional overlap in predicted posterior probability density of the N(S)EC estimates. Ame. = Ametryn; Atr. = Atrazine; Diu. = Diuron; Hex. = Hexazinone; Irg. = Irgarol; Sim. = Simazine; Teb. = Tebuthiuron.") ``` @@ -799,9 +787,9 @@ The model averaging approach implemented in \pkg{bayesnec} is widely used in a r ## Model suitability for \textit{NEC} and \textit{EC\textsubscript{x}} estimation {short-title="Model suitability for NEC and ECx estimation" #modsuit} -In principle all models provide an estimate for "no-effect" toxicity concentration. As seen above, for model strings with **nec** as a prefix, the NEC is directly estimated as parameter $\eta = \text{NEC}$ in the model, as per @Fox2010. On the other hand, model strings with **ecx** as a prefix are continuous curve models with no threshold, typically used for extracting ECx values from concentration-response data. In this instance, the NEC reported is actually the No-Significant-Effect-Concentration (NSEC, see details in @Fisher2023), defined as the concentration at which there is a user supplied certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote ($\tau = \text{top}$) of the response (i.e., the response value is significantly lower than that expected in the case of no exposure). The default value for this NSEC proportion is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. The NSEC concept has been recently explored using simulation studies and case study examples, and when combined with the NEC estimates of threshold models within a model‐ +In principle all models provide an estimate for a "no-effect" toxicity concentration. As seen above, for model strings with **nec** as a prefix, the NEC is directly estimated as parameter $\eta = \text{NEC}$ in the model, as per @Fox2010. On the other hand, model strings with **ecx** as a prefix are continuous curve models with no threshold, typically used for extracting ECx values from concentration-response data. In this instance, the no-effect toxicity value reported is actually the No-Significant-Effect-Concentration [NSEC, see details in @Fisher2023], defined as the concentration at which there is a user supplied certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote ($\tau = \text{top}$) of the response (i.e., the response value is significantly lower than that expected in the case of no exposure). The default value for this NSEC proportion is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. The NSEC concept has been recently explored using simulation studies and case study examples, and when combined with the NEC estimates of threshold models within a model‐ averaging approach, can yield robust estimates of N(S)EC and of their uncertainty within a single -analysis framework [@fisher2023ieam]. Both NEC and NSEC can be calculated from fitted models using the functions \code{nec} and \code{nsec}. +analysis framework [@fisher2023ieam]. Both NEC and NSEC can be calculated from fitted models using the functions \code{nec} and \code{nsec}. The model averaged N(S)EC is automatically returned as part of the fitted model for any \code{bayesmanecfit} that contains a combination of both \code{"nec"} and \code{"ecx"} models. The significance level used can be adjusted from the default value of 0.01 using \code{amend}. ECx estimates can be equally obtained from both \code{"nec"} and \code{"ecx"} models. ECx estimates will usually be lower (more conservative) for \code{"ecx"} models fitted to the same data as \code{"nec"} models. There is ambiguity in the definition of ECx estimates from hormesis models---these allow an initial increase in the response [see @Mattson2008] and include models with the string **horme** in their name---as well as those that have no natural lower bound on the scale of the response (models with the string **lin** in their name, in the case of Gaussian response data). For this reason, the \code{ecx} function has arguments \code{hormesis_def} and \code{type}, both character vectors indicating the desired behaviour. For \code{hormesis_def = "max"}, ECx values are calculated as a decline from the maximum estimates (i.e., the peak at $\eta = \text{NEC}$); and \code{hormesis_def = "control"} (the default) indicates that ECx values should be calculated relative to the control, which is assumed to be the lowest observed concentration. For \code{type = "relative"} ECx is calculated as the percentage decrease from the maximum predicted value of the response ($\tau = \text{top}$) to the minimum predicted value of the response (i.e., *relative* to the observed data). For \code{type = "absolute"} (the default) ECx is calculated as the percentage decrease from the maximum value of the response ($\tau = \text{top}$) to 0. For \code{type = "direct"}, a direct interpolation of the response on the predictor is obtained. @@ -877,15 +865,13 @@ working, \pkg{bayesnec} will not work. Instructions for installing these two pac The \pkg{bayesnec} package is a work in progress, and we welcome suggestions and feedback that will improve the package performance and function. Our goal is to make \pkg{bayesnec} as user friendly as possible, and capable of dealing with most real world CR modelling applications in the hope that Bayesian statistics will become more widely used in applied risk assessment. Please submit requests through the package [Issues](https://github.com/open-AIMS/bayesnec/issues) on GitHub. Some suggested future enhancements include: - - The addition of other custom families, such as the Tweedie distribution and ordered beta model. Currently \pkg{bayesnec} implements adjustments away from 0 (Gamma, Beta) or 1 (Beta) as a strategy for allowing modelling with these types of data using the closest most convenient statistical distribution. - There are no readily available distributions able to model data that includes 0 and 1 on the continuous scale in \pkg{brms} and \pkg{bayesnec} currently does 0 and 1 adjustments followed by modelling using a Beta distribution. The ordered beta model has been suggested as a better method for modelling continuous data with lower an upper bounds (see @Kubinec) that could be readily implemented in the \pkg{brms} customs families framework. - For data that are 0 to $\infty$ on the continuous scale the Tweedie distribution may prove a much better option than the current zero-bounded Gamma, and has been used extensively in fisheries research for biomass data [@Shono2008]. As this \code{family} is not currently available in \pkg{brms} this would also need to be implemented as a custom \code{family}, which for the Tweedie is not trivial. + - The addition of other families to accommodate a broader range of response variables. This could include a range of distributions already available in \pkg{brms}, such as the zero-inflated Poisson or zero-truncated Gaussian. In addition, there are other useful distributions currently unavailable in \pkg{brms}, such as the Tweedie distribution and ordered beta model. Currently \pkg{bayesnec} implements adjustments away from 0 (Gamma, Beta) or 1 (Beta) as a strategy for allowing modelling with these types of data using the closest most convenient statistical distribution. There are no readily available distributions able to model data that includes 0 and 1 on the continuous scale in \pkg{brms} and \pkg{bayesnec} currently does 0 and 1 adjustments followed by modelling using a Beta distribution. The ordered beta model has been suggested as a better method for modelling continuous data with lower an upper bounds [see @Kubinec] that could be readily implemented in the \pkg{brms} customs families framework. For data that are 0 to $\infty$ on the continuous scale the Tweedie distribution may prove a much better option than the current zero-bounded Gamma, and has been used extensively in fisheries research for biomass data [@Shono2008]. As this \code{family} is not currently available in \pkg{brms} this would also need to be implemented as a custom \code{family}, which for the Tweedie is not trivial. - A hypothesis method for testing against toxicity thresholds. The \pkg{brms} package includes a \code{hypothesis} function that allows for testing parameter estimates against specified criteria. This is used in \pkg{bayesnec} in the \code{check_prior} function, which is a wrapper that examines the deviation of each parameter in the given model relative to 0 as a means of generating posterior and prior probability density plots for comparison. However, an additional wrapper function could be developed that allows toxicity to be assessed, as measured through NEC, or ECx for example, against a required pre-defined threshold. Such a feature may be useful where toxicity testing is used as a trigger in risk management [for example, using whole-effluent-toxicity (WET) testing, @Karman2019]. # Acknowledgements -The development of \pkg{bayesnec} was supported by an AIMS internal grant. Usage, testing and functionality of both the \pkg{jagsNEC} and \pkg{bayesnec} packages were substantially aided through input from Joost van Dam, Andrew Negri, Florita Flores, Heidi Luter, Marie Thomas and Mikaela Nordborg. Florita Flores and Murray Logan provided valuable comments on the manuscript text. Ron Jones provided technical computing support. +The development of \pkg{bayesnec} was supported by an AIMS internal grant. Usage, testing and functionality of both the \pkg{jagsNEC} and \pkg{bayesnec} packages were substantially aided through input from Joost van Dam, Andrew Negri, Florita Flores, Heidi Luter, Marie Thomas and Mikaela Nordborg. Florita Flores and Murray Logan provided valuable comments on the manuscript text. Ron Jones provided technical computing support. # References diff --git a/article/article.bib b/article/article.bib index 7994a3bb..5fccedf4 100644 --- a/article/article.bib +++ b/article/article.bib @@ -166,6 +166,7 @@ @article{Fox2010 number = {2}, pages = {123--131}, title = {A {Bayesian} Approach for Determining the No Effect Concentration and Hazardous Concentration in Ecotoxicology}, + doi = {10.1016/j.ecoenv.2009.09.012}, volume = {73}, year = {2010} } @@ -546,14 +547,12 @@ @article{fisher2023ieam number = {n/a}, pages = {}, keywords = {Concentration–response modeling, Ecosystem protection, No-effect-concentration, Statistical ecotoxicology, Toxicity estimate}, - doi = {https://doi.org/10.1002/ieam.4809}, - url = {https://setac.onlinelibrary.wiley.com/doi/abs/10.1002/ieam.4809}, - eprint = {https://setac.onlinelibrary.wiley.com/doi/pdf/10.1002/ieam.4809} + doi = {https://doi.org/10.1002/ieam.4809} } @Manual{cmdstanr2022, title = {\pkg{cmdstanr}: \proglang{R} Interface to \proglang{CmdStan}}, - author = {Jonah Gabry and Rok Češnovar}, + author = {Jonah Gabry and Rok \v{C}e\v{s}novar}, year = {2022}, note = {https://mc-stan.org/cmdstanr/, https://discourse.mc-stan.org}, } @@ -682,14 +681,13 @@ @article{chipman2010 number = {1}, publisher = {Institute of Mathematical Statistics}, pages = {266 -- 298}, - keywords = {Bayesian backfitting, boosting, CART, ‎classification‎, ensemble, MCMC, Nonparametric regression, probit model, random basis, regularizatio, sum-of-trees model, Variable selection, weak learner}, year = {2010}, doi = {10.1214/09-AOAS285}, URL = {https://doi.org/10.1214/09-AOAS285} } @article{vehtari2021rank, - title={Rank-normalization, folding, and localization: An improved R for assessing convergence of MCMC (with discussion)}, + title={Rank-normalization, folding, and localization: An improved $\widehat{\textrm{R}}$ for assessing convergence of MCMC {(with discussion)}}, author={Vehtari, Aki and Gelman, Andrew and Simpson, Daniel and Carpenter, Bob and Burkner, Paul-Christian}, journal={Bayesian analysis}, volume={16}, @@ -707,4 +705,3 @@ @article{watanabe2010asymptotic number={12}, year={2010} } - diff --git a/article/article.tex b/article/article.tex index afa3416e..a4eeada4 100644 --- a/article/article.tex +++ b/article/article.tex @@ -7,7 +7,7 @@ \usepackage[utf8]{inputenc} \author{ -Rebecca Fisher~\orcidlink{0000-0001-5148-6731}\\Australian Institute of Marine Science \And Diego R. Barneche~\orcidlink{0000-0002-4568-2362}\\Australian Institute of Marine Science \And Gerard Ricardo~\orcidlink{0000-0002-7761-0806}\\University of Queensland \And David Fox~\orcidlink{0000-0002-3178-7243}\\University of Melbourne +Rebecca Fisher~\orcidlink{0000-0001-5148-6731}\\Australian Institute of Marine Science \And Diego R. Barneche~\orcidlink{0000-0002-4568-2362}\\Australian Institute of Marine Science \And Gerard Ricardo~\orcidlink{0000-0002-7761-0806}\\The University of Queensland \And David Fox~\orcidlink{0000-0002-3178-7243}\\The University of Melbourne } \title{\pkg{bayesnec}: An \proglang{R} Package for CR Modelling and Estimation of toxicity metrics} @@ -17,7 +17,7 @@ \Abstract{ -The \pkg{bayesnec} package has been developed for \proglang{R} to fit concentration(dose) - response curves (CR) to toxicity data for the purpose of deriving no-effect-concentration (NEC), no-significant-effect-concentration (NSEC), and effect-concentration (of specified percentage `x', ECx) thresholds from non-linear models fitted using Bayesian Hamiltonian Monte Carlo (HMC) via \pkg{brms} \citep{Burkner2017, Burkner2018} and \pkg{rstan} \citep{rstan2021} or \pkg{cmdstanr} \citep{cmdstanr2022}. In \pkg{bayesnec} it is possible to fit a single model, custom models-set, specific model-set or all of the available models. When multiple models are specified the \code{bnec} function returns a model weighted average estimate of predicted posterior values. A range of support functions and methods is also included to work with the returned single, or multi-model objects that allow extraction of raw, or model averaged predicted, NEC, NSEC and ECx values and to interrogate the fitted model or model-set. By combing Bayesian methods with model averaging, \pkg{bayesnec} provides a single estimate of toxicity and assocaited uncertainty that can be directly integrated into risk assessment frameworks. +The \pkg{bayesnec} package has been developed for \proglang{R} to fit concentration(dose) - response curves (CR) to toxicity data for the purpose of deriving no-effect-concentration (NEC), no-significant-effect-concentration (NSEC), and effect-concentration (of specified percentage `x', ECx) thresholds from non-linear models fitted using Bayesian Hamiltonian Monte Carlo (HMC) via \pkg{brms} \citep{Burkner2017, Burkner2018} and \pkg{rstan} \citep{rstan2021} or \pkg{cmdstanr} \citep{cmdstanr2022}. In \pkg{bayesnec} it is possible to fit a single model, custom models-set, specific model-set or all of the available models. When multiple models are specified, the \code{bnec} function returns a model weighted average estimate of predicted posterior values. A range of support functions and methods is also included to work with the returned single, or multi-model objects that allow extraction of raw, or model averaged predicted, NEC, NSEC and ECx values and to interrogate the fitted model or model-set. By combining Bayesian methods with model averaging, \pkg{bayesnec} provides a single estimate of toxicity and associated uncertainty that can be directly integrated into risk assessment frameworks. } \Keywords{concentration-response, toxicity, no-effect-concentration, \proglang{R}, Bayesian, non-linear modelling} @@ -33,23 +33,23 @@ \Address{ Rebecca Fisher\\ - University of Western Australia\\ - Australian Institute of Marine Science,Crawley, WA, Australia\\ + Australian Institute of Marine Science, Crawley, WA, Australia\\ + \textit{and}\linebreak Oceans Institute, The University of Western Australia, Crawley, WA, Australia\\ E-mail: \email{r.fisher@aims.gov.au}\\ Diego R. Barneche\\ - University of Western Australia\\ - Australian Institute of Marine Science,Crawley, WA, Australia\\ + Australian Institute of Marine Science, Crawley, WA, Australia\\ + \textit{and}\linebreak Oceans Institute, The University of Western Australia, Crawley, WA, Australia\\ Gerard Ricardo\\ - Australian Institute of Marine Science\\ - The University of Queensland,St Lucia, Qld, Australia\\ + The University of Queensland, St Lucia, Qld, Australia\\ + \textit{and}\linebreak Australian Institute of Marine Science, Townsville, Qld, Australia\\ David Fox\\ - Environmetrics Australia\\ - University of Melbourne,Parkville, Victoria, Australia\\ + The University of Melbourne, Parkville, Victoria, Australia\\ + \textit{and}\linebreak Environmetrics Australia, Beaumaris, Victoria, Australia\\ } @@ -84,24 +84,24 @@ \hypertarget{introduction}{% \section{Introduction}\label{introduction}} -Concentration-response (CR) modelling (also known as dose-response modelling or dose-response analysis) is a key tool for assessing toxicity and deriving the toxicity thresholds used in the risk assessments that underpin protection of human health and the environment. It is widely used in the disciplines of pharmacology, toxicology and ecotoxicology, where parametric non-linear regression methods are used to model response data of interest, with the resulting fitted models used to estimate critical thresholds of concern. These thresholds may be used directly to assess risk \citep[e.g see][]{fisher2018c}, or are subsequently incorporated into a broader population-level risk assessment framework \citep[e.g.][]{Warne2015}. Typical thresholds derived from CR modelling include the effect-concentration of defined percentage `x' (ECx) and the no-effect-concentration (NEC), the latter being the generally preferred option \citep{Fox2008, Warne2015, Warne2018c}. In addition, the no-significant-effect-concentration (NSEC) has also been recently defined \citep{Fisher2023}, and may represent a good alternative estimate of ``no-effect'' concentrations when the threshold models required to derive true NEC estimates are not appropriate \citep{Fisher2023, fisher2023ieam}. +Concentration-response (CR) modelling (also known as dose-response modelling or dose-response analysis) is a key tool for assessing toxicity and deriving the toxicity thresholds used in the risk assessments that underpin protection of human health and the environment. It is widely used in the disciplines of pharmacology, toxicology and ecotoxicology, where parametric non-linear regression methods are used to model response data of interest, with the resulting fitted models used to estimate critical thresholds of concern. These thresholds may be used directly to assess risk \citep[e.g., see][]{fisher2018c}, or are subsequently incorporated into a broader population-level risk assessment framework \citep[e.g.,][]{Warne2015}. Typical thresholds derived from CR modelling include the effect-concentration of defined percentage `x' (ECx) and the no-effect-concentration (NEC), the latter being the generally preferred option \citep{Fox2008, Warne2015, Warne2018c}. In addition, the no-significant-effect-concentration (NSEC) has also been recently defined \citep{Fisher2023}, and may represent a good alternative estimate of ``no-effect'' concentrations when the threshold models required to derive true NEC estimates are not appropriate \citep{Fisher2023, fisher2023ieam}. -In qualitative terms, CR models are typically a decreasing function of concentration, whereby the response may remain relatively stable for the initial portion of the curve, and then decays at some rate to zero (or some other lower bound at high concentration). An example is death of an organism resulting from ever increasing concentrations of a toxic pollutant. However, often the underlying mechanisms describing CR relationships are not known, and therefore numerous alternative non-linear CR equations have been proposed (e.g.~models in \pkg{drc}, \citet{Ritz2016}). These can be broadly grouped into two main ``model categories'' (see below Section \ref{mdbnc}) for the mathematical definition of each model): NEC models---threshold models which contain a step function comprising the ``break-point'' after which the predictor systematically alters the response \citep{Fox2010}; and Smooth transition models that are typically used for estimating effect concentrations of a specified effect (e.g.~ECx- models). They may or may not encompass the EC\textsubscript{50} as a parameter \citep{Ritz2016}. Each of these two groups can be further split into two categories depending on whether the initial portion of the curve is flat, or increasing---the latter being known as hormesis models \citep{Ritz2016}. +In qualitative terms, CR models are typically a decreasing function of concentration, whereby the response may remain relatively stable for the initial portion of the curve, and then decays at some rate to zero (or some other, lower bound at high concentration). An example is death of an organism resulting from ever increasing concentrations of a toxic pollutant. However, often the underlying mechanisms describing CR relationships are not known, and therefore numerous alternative non-linear CR equations have been proposed \citep[e.g., models in \pkg{drc},][]{Ritz2016}. These can be broadly grouped into two main ``model categories'' (see below Section \ref{mdbnc}) for the mathematical definition of each model): NEC models---threshold models which contain a step function comprising the ``break-point'' after which the predictor systematically alters the response \citep{Fox2010}; and Smooth transition models that are typically used for estimating effect concentrations of a specified effect (e.g., ECx models). They may or may not encompass the EC\textsubscript{50} as a parameter \citep{Ritz2016}. Each of these two groups can be further split into two categories depending on whether the initial portion of the curve is flat, or increasing---the latter being known as hormesis models \citep{Ritz2016}. -The above model categories mostly comprise non-linear equations, thereby increasing the technical complexity of model fitting. CR experimental designs are often also complex, and may require the addition of multi-level, hierarchical effects. Examples might include a random offset to model non-independence of replicates housed together in tanks, or where there are repeated measurements on individual genotypes of a test species. Additionally, CR data are of varied nature, with the measured response data taking a wide range of natural distributions. For instance, response data may be unbounded continuous (e.g., growth when shrinkage is possible), or positive continuous (e.g., growth in the absence of shrinkage), proportional (e.g., fertilization success), or binary (e.g., survival). To the best of our knowledge, there is no open-source statistical software dedicated to CR modelling which allows for appropriate model and error distribution specification depending on the input data. However, there is a wide array of multi-purpose packages for fitting non-linear generalised hierarchical models in \proglang{R}. For example (to list a few), \pkg{nlme} \citep{pinheiro2021}, \pkg{lme4} \citep{pinheiro2021}, \pkg{rjags} \citep{Su2015} and \pkg{rstan} \citep{rstan2021}. +The above model categories mostly comprise non-linear equations, thereby increasing the technical complexity of model fitting. CR experimental designs are often also complex, and may require the addition of multi-level, hierarchical effects. Examples might include a random offset to model non-independence of replicates housed together in tanks, or where there are repeated measurements on individual genotypes of a test species. Additionally, CR data are of varied nature, with the measured response data taking a wide range of natural distributions. For instance, response data may be unbounded continuous (e.g., growth when shrinkage is possible), or positive continuous (e.g., growth in the absence of shrinkage), proportional (e.g., fertilization success), or binary (e.g., survival). To the best of our knowledge, there is no open-source statistical software dedicated to CR modelling which allows for appropriate model and error distribution specification depending on the input data. However, there is a wide array of multi-purpose packages for fitting non-linear generalised hierarchical models in \proglang{R}. For example, to list a few, \pkg{nlme} \citep{pinheiro2021}, \pkg{lme4} \citep{pinheiro2021}, \pkg{rjags} \citep{Su2015} and \pkg{rstan} \citep{rstan2021}. -While CR modelling can be carried out using generic non-linear modelling software packages, this can be cumbersome in practice, requiring extensive manual programming to obtain the necessary, often relatively standard outputs. The \pkg{drc} package \citep{Ritz2016} was developed as a user friendly frequentist based solution to CR modelling in \proglang{R}, and is currently widely used across a range of disciplines. \pkg{drc} implements a broad range of non-linear CR models, provides facilities for ranking model fits based on AIC \citep{Burnham2002}, joint modelling of multiple response curves, and supports a range of estimation procedures \citep{Ritz2016}. Section \ref{bnchmrk} provides a formal comparison between \pkg{drc} and \pkg{bayesnec} standard output using default argument values. +While CR modelling can be carried out using generic non-linear modelling software packages, this can be cumbersome in practice, requiring extensive manual programming to obtain the necessary, often relatively standard outputs. The \pkg{drc} package \citep{Ritz2016} was developed as a user friendly frequentist solution to CR modelling in \proglang{R}, and is currently widely used across a range of disciplines. \pkg{drc} implements a broad range of non-linear CR models, provides facilities for ranking model fits based on AIC \citep{Burnham2002}, joint modelling of multiple response curves, and supports a range of estimation procedures \citep{Ritz2016}. Section \ref{bnchmrk} provides a formal comparison between \pkg{drc} and \pkg{bayesnec} standard output using default argument values. -Estimates of uncertainty in parameters and derived thresholds are critical for effective integration of threshold estimates into risk assessment and formal decision frameworks \citep{fisher2018c}. Bayesian methods allow robust quantification of uncertainty with intuitive and direct probabilistic meaning \citep{Ellison1996}, and are therefore an excellent platform for CR modelling in most settings. Furthermore, the posterior draws generated through Bayesian model fitting methods provide a rich resource that can be used to explore synergistic and antagonistic impacts \citep{Fisher2019d, flores2021}, propagate toxicity estimate uncertainty \citep{Charles2020a, Gottschalk2013}, and test hypotheses of no-effect \citep{Thomas2006}. +Estimates of uncertainty in parameters and derived thresholds are critical for effective integration of threshold estimates into risk assessment and formal decision frameworks \citep{fisher2018c}. Bayesian methods allow robust quantification of uncertainty with intuitive and direct probabilistic meaning \citep{Ellison1996}, and are therefore a useful platform for CR modelling in most settings. Furthermore, the posterior draws generated through Bayesian model fitting methods provide a rich resource that can be used to explore synergistic and antagonistic impacts \citep{Fisher2019d, flores2021}, propagate toxicity estimate uncertainty \citep{Charles2020a, Gottschalk2013}, and test hypotheses of no-effect \citep{Thomas2006}. -There is a wide array of packages available for Bayesian model fitting via Markov chain Monte Carlo methods, including \proglang{WinBUGS} \citep{Lunn2000}, \proglang{JAGS} \citep{Plummer2003} and \proglang{Stan} \citep{Carpenter2017}, that seamlessly interface with R through packages such as \pkg{R2WinBUGS}, \pkg{R2jags} \citep{Su2015} and \pkg{rstan} \citep{rstan2021}. These packages require coding in \proglang{R} and additional model specification in custom languages---which might involve non trivial coding, extensive debugging and optimisation that is often time consuming and requires specialist expertise. All of which might add a learning/application barrier to many potential users. Several extension packages which aim to reduce that barrier have recently become available, principally \pkg{brms} \citep{Burkner2017}, that allows a broad range of models to be easily fitted using \pkg{rstan} \citep{rstan2021} or \pkg{cmdstanr} \citep{cmdstanr2022} through simpler \pkg{lme4}-like formula syntax. However, even with packages like \pkg{brms}, Bayesian model fitting can be difficult to automate across all potential usage cases, particularly with respect to specifying valid initial parameter values and appropriate priors. In addition, as was the motivation for the development of the \pkg{drc} package in the frequentist setting, the \proglang{R} code required for fitting non-linear models and extracting the relevant outputs (e.g.~NEC, ECx) and their standard errors can be cumbersome in practice, and even more so in the Bayesian setting where model fits contain multiple posterior draws. +There is a wide array of packages available for Bayesian model fitting via Markov chain Monte Carlo methods, including \proglang{WinBUGS} \citep{Lunn2000}, \proglang{JAGS} \citep{Plummer2003} and \proglang{Stan} \citep{Carpenter2017}, that seamlessly interface with R through packages such as \pkg{R2WinBUGS}, \pkg{R2jags} \citep{Su2015} and \pkg{rstan} \citep{rstan2021}. These packages require coding in \proglang{R} and additional model specification in custom languages---which might involve non-trivial coding, extensive debugging and optimisation that is often time consuming and requires specialist expertise, all of which might add a learning/application barrier to many potential users. Several extension packages which aim to reduce that barrier have recently become available, principally \pkg{brms} \citep{Burkner2017}, that allows a broad range of models to be easily fitted using \pkg{rstan} \citep{rstan2021} or \pkg{cmdstanr} \citep{cmdstanr2022} through simpler \pkg{lme4}-like formula syntax. However, even with packages like \pkg{brms}, Bayesian model fitting can be difficult to automate across all potential use cases, particularly with respect to specifying valid initial parameter values and appropriate priors. In addition, as was the motivation for the development of the \pkg{drc} package in the frequentist setting, the \proglang{R} code required for fitting non-linear models and extracting the relevant outputs (e.g., NEC, ECx) and their standard errors can be cumbersome in practice, and even more so in the Bayesian setting where model fits contain multiple posterior draws. -The greater complexity associated with Bayesian model fitting has likely hindered the uptake of Bayesian statistics for CR threshold derivation across the broader ecotoxicology and toxicology communities, who may not have access to specialist statistical expertise \citep{Fisher2019}. \pkg{bayesnec} version 2.1.0.3 (available on CRAN) builds upon an implementation of the NEC model described in \citet{Fox2010} and \citet{Pires2002}. The \pkg{bayesnec} package provides an accessible interface specifically for fitting NEC and ECx and other CR models using Bayesian methods. A variety of models can be specified based on the known distribution of the ``concentration'' or ``dose'' variable (the predictor, x) as well as the ``response'' (y) variable. The model formula, including priors and initial values required to call \pkg{brms} are automatically generated based on information contained in the supplied data. A range of tools is supplied to aid the user in interrogating model fits, plotting and generating predicted values, as well as extracting the standard outputs, such as NEC and ECx - either as a full posterior draw or in summary form. +The greater complexity associated with Bayesian model fitting has likely hindered the uptake of Bayesian statistics for CR threshold derivation across the broader ecotoxicology and toxicology communities, who may not have access to specialist statistical expertise \citep{Fisher2019}. \pkg{bayesnec} version 2.1.0.3 (available on CRAN) builds upon an implementation of the NEC model described in \citet{Fox2010} and \citet{Pires2002}. The \pkg{bayesnec} package provides an accessible interface specifically for fitting NEC and ECx and other CR models using Bayesian methods. A variety of models can be specified based on the known distribution of the ``concentration'' or ``dose'' variable (the predictor, x) as well as the ``response'' (y) variable. The model formula, including priors and initial values required to call \pkg{brms} are automatically generated based on information contained in the supplied data. A range of tools is supplied to aid the user in interrogating model fits, plotting and generating predicted values, as well as extracting the standard outputs, such as NEC and ECx, either as a full posterior draw or in summary form. \hypertarget{technical-details}{% \section{Technical details}\label{technical-details}} -In Bayesian inference, model parameters and their inherent uncertainty are estimated as statistical probability distributions. This is achieved by combining an a-priori probability distribution for each parameter (the `priors', \(p(\theta)\)) with the likelihood of the observed data, \(D\), given the model parameters, \(p(D | \theta)\), to yield a so-called posterior probability distribution, \(p(\theta | D)\) +In Bayesian inference, model parameters and their inherent uncertainty are estimated as statistical probability distributions. This is achieved by combining an a-priori probability distribution for each parameter (the `priors', \(p(\theta)\)) with the likelihood of the observed data, \(D\), given the model parameters, \(p(D | \theta)\), to yield a so-called posterior probability distribution, \(p(\theta | D)\): \begin{equation} p(\theta | D) = \frac{p(D | \theta) p(\theta)}{\int_{\theta} p(D | \theta) p(\theta) d \theta} \propto p(D | \theta) p(\theta). @@ -120,7 +120,7 @@ \section{Technical details}\label{technical-details}} \(\beta = \text{beta}\), generally the exponential decay rate of response, either from 0 concentration or from the estimated \(\eta\) value, with the exception of the \code{neclinhorme} model where it represents a linear decay from \(\eta\) because slope (\(\alpha\)) is required for the linear increase (see below); -\(\delta = \text{bottom}\), representing the lower asymptotic response at infinite concentration; +\(\delta = \text{bot}\), representing the lower asymptotic response at infinite concentration; \(\alpha = \text{slope}\), the linear decay rate in the models \code{neclin} and \code{ecxlin}, or the linear increase rate prior to \(\eta\) for all hormesis models; @@ -128,9 +128,9 @@ \section{Technical details}\label{technical-details}} \(\epsilon = \text{d}\), the exponent in the \code{ecxsigm} and \code{necisgm} models, and -\(\phi = \text{f}\) A scaling exponent exclusive to model \code{ecxll5}. +\(\phi = \text{f}\), A scaling exponent exclusive to model \code{ecxll5}. -In addition to the model parameters, all \emph{NEC}- models have a step (indicator) function used to define the breakpoint in the regression, which can be defined as +In addition to the model parameters, all \emph{NEC} models have a step (indicator) function used to define the breakpoint in the regression, which can be defined as \[ f(x_i, \eta) = \begin{cases} @@ -145,7 +145,7 @@ \section{Technical details}\label{technical-details}} \code{ecxlin} is a basic linear decay model, given by the equation: \(y_i = \tau - e^{\alpha} x_i\) -with the following \code{brmsformula}: \code{y \textasciitilde{} top - exp(slope) * x}. Because the model contains linear predictors it is not suitable for 0--1 bounded data (i.e., Binomial and Beta families with an \code{"identity"} link function). As the model includes a linear decline with concentration, it is also not suitable for 0 bounded data (Gamma, Poisson, Negative Binomial with an \code{"identity"} link). +with the following \code{brmsformula}: \code{y \textasciitilde{} top - exp(slope) * x}. Because the model contains linear, predictors it is not suitable for 0--1 bounded data (i.e., Binomial and Beta families with an \code{"identity"} link function). As the model includes a linear decline with concentration, it is also not suitable for 0 bounded data (Gamma, Poisson, Negative Binomial with an \code{"identity"} link). \code{ecxexp} is a basic exponential decay model, given by the equation: \(y_i = \tau e^{-e^{\beta} x_i}\) with the following \code{brmsformula}: \code{y \textasciitilde{} top * exp(-exp(beta) * x)}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. @@ -162,7 +162,7 @@ \section{Technical details}\label{technical-details}} \(y_i = \delta + (\tau - \delta) e^{-e^{e^{\beta} (x_i - \omega)}}\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + (top - bot) * exp(-exp(exp(beta) * (x - ec50)))}. -\code{ecxwb1p3} is a 3-parameter sigmoidal decay model, equivalent to the \code{ecxwb1} model where \(\delta\) (bottom) is defined as 0, given by the equation: +\code{ecxwb1p3} is a 3-parameter sigmoidal decay model, equivalent to the \code{ecxwb1} model where \(\delta\) (bot) is defined as 0, given by the equation: \(y_i = {0} + (\tau - {0}) e^{-e^{e^{\beta} (x_i - \omega)}}\) with the following \code{brmsformula}: \code{y \textasciitilde{} 0 + (top - 0) * exp(-exp(exp(beta) * (x - ec50)))}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. @@ -178,11 +178,11 @@ \section{Technical details}\label{technical-details}} \(y_i = \delta + (\tau - \delta) / (1 + e^{-e^{\beta} (x_i - \omega)})^{e^\phi}\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + (top - bot)/(1 + exp(exp(beta) * (x - ec50)))\^{}exp(f)}. -\code{ecxll4} is a 4-parameter sigmoidal log-logistic decay model, equivalent to the the \code{ecxll5} model, but where the parameter \(\phi\) (f) is defined as 0, given by the equation: +\code{ecxll4} is a 4-parameter sigmoidal log-logistic decay model, equivalent to the \code{ecxll5} model, but where the parameter \(\phi\) (f) is defined as 0, given by the equation: \(y_i = \delta + (\tau - \delta)/ (1 + e^{e^{\beta} (x_i - \omega)})\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + (top - bot)/(1 + exp(exp(beta) * (x - ec50)))}. -\code{ecxll3} is a 3-parameter sigmoidal log-logistic decay model, equivalent to the the \code{ecxll5} model, but where the parameters \(\phi\) (f) and \(\delta\) (bot) are both defined as 0, given by the equation: +\code{ecxll3} is a 3-parameter sigmoidal log-logistic decay model, equivalent to the \code{ecxll5} model, but where the parameters \(\phi\) (f) and \(\delta\) (bot) are both defined as 0, given by the equation: \(y_i = 0 + (\tau - 0)/ (1 + e^{e^{\beta} (x_i - \omega)})\) with the following \code{brmsformula}: \code{y \textasciitilde{} 0 + (top - 0)/(1 + exp(exp(beta) * (x - ec50)))}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. @@ -211,7 +211,7 @@ \section{Technical details}\label{technical-details}} \(y_i = \tau e^{-e^{\beta} \left(x_i - \eta \right) f(x_i, \eta)}\) with the following \code{brmsformula}: \code{y \textasciitilde{} top * exp(-exp(beta) * (x - nec) * step(x - nec))}. For Binomial-distributed response data in the case of \code{"identity"} link this model is equivalent to that in \citet{Fox2010}. The model is 0-bounded, thus not suitable for Gaussian response data or the use of a \code{"logit"} or \code{"log"} link function. -\code{nec4param} is a equivalent to the \code{nec3param} model, but with an additional parameter defining the lower bound (parameter \(\delta\) (bottom)), given by the equation: +\code{nec4param} is a equivalent to the \code{nec3param} model, but with an additional parameter defining the lower bound (parameter \(\delta\) (bot)), given by the equation: \(y_i = \delta + (\tau - \delta) e^{-e^{\beta} \left(x_i - \eta \right) f(x_i, \eta)}\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + (top - bot) * exp(-exp(beta) * (x - nec) * step(x - nec))}. @@ -227,11 +227,11 @@ \section{Technical details}\label{technical-details}} \(y_i = (\tau + e^{\alpha} x_i) - e^{\beta} \left(x_i - \eta \right) f(x_i, \eta)\) with the following \code{brmsformula}: \code{y \textasciitilde{} (top + exp(slope) * x) - exp(beta) * (x - nec) * step(x - nec)}. The \code{neclinhorme} model is a \emph{hormesis} model \citep{Mattson2008}, allowing an initial increase in the response variable at concentrations below \(\eta\). This model contains linear predictors and is not suitable for 0--1 bounded data (Binomial and Beta distributions with \code{"identity"} link). As the model includes a linear decline with concentration, it is also not suitable for 0--\(\infty\) bounded data (Gamma, Poisson, Negative Binomial with \code{"identity"} link). -\code{nechorme4} is 4 parameter decay model with an NEC step function equivalent to \code{nec4param} with the addition of a linear increase prior to \(\eta\), given by the equation: +\code{nechorme4} is five parameter decay model with an NEC step function equivalent to \code{nec4param} with the addition of a linear increase prior to \(\eta\), given by the equation: \(y_i = \delta + ((\tau + e^{\alpha} x_i) - \delta ) e^{-e^{\beta} \left(x_i - \eta \right) f(x_i, \eta)}\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + ((top + exp(slope) * x) - bot) * exp(-exp(beta) * (x - nec) * step(x - nec))}. The \code{nechorme4} model is a \emph{hormesis} model \citep{Mattson2008}, allowing an initial increase in the response variable at concentrations below \(\eta\). -\code{nechorme4pwr} is 4 parameter decay model with an NEC step function equivalent to \code{nec4param} with the addition of a power increase prior to \(\eta\), given by the equation: +\code{nechorme4pwr} is five parameter decay model with an NEC step function equivalent to \code{nec4param} with the addition of a power increase prior to \(\eta\), given by the equation: \(y_i = \delta + ((\tau + x_i^{1/(1+e^{\alpha})}) - \delta) e^{-e^{\beta} \left(x_i - \eta \right) f(x_i, \eta)}\) with the following \code{brmsformula}: \code{y \textasciitilde{} bot + ((top + x\^{}(1/(1 + exp(slope)))) - bot) * exp(-exp(beta) * (x - nec) * step(x - nec))}. The \code{nechorme4pwr} model is a \emph{hormesis} model \citep{Mattson2008}, allowing an initial power increase in the response variable at concentrations below \(\eta\). Because the model can generate predictions \textgreater{} 1 it should not be used for Binomial and Beta distributions with \code{"identity"} link. In this case the \code{nechromepwr01} model should be used. @@ -256,23 +256,23 @@ \subsection{Priors on model parameters}\label{priors-on-model-parameters}} To undertake a Bayesian analysis, prior probability densities of the parameters of the model must first be defined. Sometimes there may be substantial prior knowledge, for example when pilot data or data from a previous experiment exist for a given response curve. In this case the prior probability distribution may be quite narrow (highly ``informative'') and will therefore be influential in the characterisation of the posterior, especially when subsequent data are scarce or highly variable. However, in our experience in ecology and related disciplines, such prior knowledge is generally the exception. Where no quantitative prior information exists, it is common in Bayesian statistics to use either ``vague'' or ``weakly'' informative priors. The use of ``vague'', ``diffuse'', ``flat'' or otherwise so-called ``uninformative'' priors is no longer recommended \citep{Banner2020}. Such priors generally form the default for many Bayesian packages, and are often used in practice without critical thought or evaluation, possibly as a result of fear of being too subjective \citep{Banner2020}. However, even vague priors can have a substantial influence on the outcome of an analysis \citep{depaoli2020importance, gelman2017entropy}. Instead, it is better to use weakly informative, ``generative'' priors - that is priors that are based on probability distributions that interact sensibly with the likelihood to produce a meaningful data generating mechanism \citep{gelman2017entropy}. Considerable thought has gone into development of an algorithm to build default ``weakly'' informative priors for fitting models in \pkg{bayesnec}. The default priors are ``weakly'' informative in that in addition to specifying the relevant statistical \code{family} that appropriately captures the parameter's theoretical statistical distribution, when external -subjective information is unavailable we also use information contained within the observed data to center the probability density of the prior within a plausible region of the parameter space, ensure priors are appropriately scaled relative to the range of the response and predictor data, and/or constrain priors to sensible bounds (described in more detail below). These weakly informative priors are used to help constrain the underlying routines so that they are less likely to consider what the researcher would deem highly improbable estimates, that may also cause the routines to become unstable resulting in failed model fits. Weakly informative priors can be particularly helpful in complex non-linear modelling to ensure reliable convergence. These types of priors specify the general shape and bounds of the probability distribution for a given parameter, whilst remaining sufficiently broad so as not to influence the parameter's estimated posterior distribution (given a reasonable amount of observed data). In this sense, appropriately weak priors should yield analytical outcomes that share the same level of \emph{objectivity} as equivalent frequentist approaches, whilst yielding robust parameter estimates with probabilistically interpretable uncertainty bounds. +subjective information is unavailable (no priors are supplied by the user) we also use information contained within the observed data to build priors with appropriate scaling. The procedure is described in more detail below, but the algorithm effectively centres the probability density of the prior within a plausible region of the parameter space, ensures priors are appropriately scaled relative to the range of the response and predictor data, and/or priors are constrained to sensible bounds. These weakly informative priors are used to help constrain the underlying routines so that they are less likely to consider what the researcher would deem highly improbable estimates, that may also cause the routines to become unstable resulting in failed model fits. Weakly informative priors can be particularly helpful in complex non-linear modelling to ensure reliable convergence. These types of priors specify the general shape and bounds of the probability distribution for a given parameter, whilst remaining sufficiently broad so as not to influence the parameter's estimated posterior distribution (given a reasonable amount of observed data). In this sense, appropriately weak priors should yield analytical outcomes that share the same level of \emph{objectivity} as equivalent frequentist approaches, whilst yielding robust parameter estimates with probabilistically interpretable uncertainty bounds. -While we sacrifice Bayesian coherence by using features of the data to calibrate our default priors (see \citet{chipman2010} for another example of such an approach), our primary motivation is to facilitate easy implementation of \pkg{bayesnec} in practice, and to ensure model stability and reliable model fits. Note, however that it is critical for users to interrogate these default priors, using for example, sensitivity analysis \citep{depaoli2020importance} and ensure they are appropriate given the data \citep{gelman2017entropy}. Priors are automatically saved as part of the \code{brmsfit} and \code{bayesnecfit} models, and there are functions for extracting the priors used, as well as plotting these in comparison to the resulting posterior distribution (see Section \ref{moddiag}). Care should be taken to ensure that the default priors are sufficiently weak relative to the posterior +While we sacrifice Bayesian coherence by using features of the data to calibrate our default priors (see \citet{chipman2010} for another example of such an approach), our primary motivation is to facilitate easy implementation of \pkg{bayesnec} in practice, and to ensure model stability and reliable model fits. Note, however that it is critical for users to interrogate these default priors, using for example, sensitivity analysis \citep{depaoli2020importance} and ensure they are appropriate given the data \citep{gelman2017entropy}. Priors are automatically saved as part of the \code{brmsfit} and \code{bayesnecfit} models, and there are functions for extracting the priors used, as well as plotting these in comparison to the resulting posterior distribution (see Section \ref{moddiag}). Care should be taken to ensure that the default priors are sufficiently weak such that they have little influence on posterior estimates. \hypertarget{priors-for-response-scaled-parameters}{% \subsubsection{Priors for response-scaled parameters}\label{priors-for-response-scaled-parameters}} -Only the parameters \(\tau = \text{top}\) and \(\delta = \text{bottom}\) relate directly to the response variable's distribution. For Gaussian-distributed responses (or any response variable for which the link ensures valid values of the response can take from -\(\infty\) to \(\infty\), including \code{"log"} and \code{"logit"}) priors are Gaussian with a mean set at the 90\textsuperscript{th} and 10\textsuperscript{th} quantiles of the response for parameters \(\tau = \text{top}\) and \(\delta = \text{bottom}\), respectively, and a standard deviation of 2.5 times the standard deviation of the response (on the appropriate link scale). In this way \pkg{bayesnec} attempts to construct a prior that scales appropriately with the response data, with greatest density near the most likely region of the response for both \(\tau = \text{top}\) and \(\delta = \text{bottom}\). The priors for top and bottom can be set quite narrow and still remain relatively ``weak'' because \pkg{bayesnec} only allows models that represent an overall decrease from the start to the end of the concentration range. Because this directional relationship is pre-defined, it is reasonable to presume that the true value of \(\tau = \text{top}\), for example, should be relatively near the upper quantile of the observed response data, and a somewhat narrow prior on that assumption can be used without being strongly informative. In the context of a standard deviation across the whole response range, a value of 2.5 can still be considered relatively broad and should have little influence on the parameter's posterior density. +Only the parameters \(\tau = \text{top}\) and \(\delta = \text{bot}\) relate directly to the response variable's distribution. For Gaussian-distributed responses (or any response variable for which the link ensures valid values of the response can take from -\(\infty\) to \(\infty\), including \code{"log"} and \code{"logit"}) priors are Gaussian with a mean set at the 90\textsuperscript{th} and 10\textsuperscript{th} quantiles of the response for parameters \(\tau = \text{top}\) and \(\delta = \text{bot}\), respectively, and a standard deviation of 2.5 times the standard deviation of the response (on the appropriate link scale). In this way \pkg{bayesnec} attempts to construct a prior that scales appropriately with the response data, with greatest density near the most likely region of the response for both \(\tau = \text{top}\) and \(\delta = \text{bot}\). The priors for top and bot can be set quite narrow and still remain relatively ``weak'' because \pkg{bayesnec} only allows models that represent an overall decrease from the start to the end of the concentration range. Because this directional relationship is pre-defined, it is reasonable to presume that the true value of \(\tau = \text{top}\), for example, should be relatively near the upper quantile of the observed response data, and a somewhat narrow prior on that assumption can be used without being strongly informative. In the context of a standard deviation across the whole response range, a value of 2.5 can still be considered relatively broad and should have little influence on the parameter's posterior density. -For Poisson-, Negative-binomial- and Gamma-distributed response variables, the response cannot take negative values and therefore Gaussian priors are unsuitable. Instead, we use Gamma priors with a mean scaled to correspond to the 75\textsuperscript{th} and 25\textsuperscript{th} quantiles of the response for \(\tau = \text{top}\) and \(\delta = \text{bottom}\), respectively. The mean (\(\mu\)) is linked mathematically to the shape (s) and rate parameters (r) by the equation \[ \mu = s * (1/r) \] \citep{Becker1988} with the shape parameter being set to 2 by default. The value of 2 was selected based on trial and error through initial testing, as this appeared to produce relatively broad priors that were still centered around feasible values for these parameters. +For Poisson-, Negative-binomial- and Gamma-distributed response variables, the response cannot take negative values and therefore Gaussian priors are unsuitable. Instead, we use Gamma priors with a mean scaled to correspond to the 75\textsuperscript{th} and 25\textsuperscript{th} quantiles of the response for \(\tau = \text{top}\) and \(\delta = \text{bot}\), respectively. The mean (\(\mu\)) is linked mathematically to the shape (s) and rate parameters (r) by the equation \[ \mu = s * (1/r) \] \citep{Becker1988} with the shape parameter being set to 2 by default. The value of 2 was selected based on trial and error through initial testing, as this appeared to produce relatively broad priors that were still centred around feasible values for these parameters. -For the Binomial, Beta, and Beta-binomial families, estimates for \(\tau = \text{top}\) and \(\delta = \text{bottom}\) must necessarily be constrained between 0 and 1 when modelled on the identity link. Because of this constraint, there is no need to adjust scaling based on the response. In this case \pkg{bayesnec} uses \code{beta(5, 2)} and \code{beta(2, 5)} priors to provide a broad density centred across the upper and lower 0 to 1 range for the \(\tau = \text{top}\) and \(\delta = \text{bottom}\) parameters respectively. +For the Binomial, Beta, and Beta-binomial families, estimates for \(\tau = \text{top}\) and \(\delta = \text{bot}\) must necessarily be constrained between 0 and 1 when modelled on the identity link. Because of this constraint, there is no need to adjust scaling based on the response. In this case \pkg{bayesnec} uses \code{beta(5, 2)} and \code{beta(2, 5)} priors to provide a broad density centred across the upper and lower 0 to 1 range for the \(\tau = \text{top}\) and \(\delta = \text{bot}\) parameters respectively. \hypertarget{priors-for-predictor-scaled-parameters}{% \subsubsection{Priors for predictor-scaled parameters}\label{priors-for-predictor-scaled-parameters}} -The parameters \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) scale according to the predictor variable because both of these are estimated in units of the predictor (usually concentration). To stabilise model fitting, the \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) parameters are bounded to the upper and lower observed range in the predictor, under the assumption that the range of concentrations in the experiment were sufficient to cover the full range of the response outcomes. Note that this assumption may not always hold if the data are from an experiment that is badly designed, and the outcome of any analysis resulting in either \(\eta\) or \(\omega\) being estimated at the bounds of the predictor data range should be interpreted with caution. The priors used reflect the characteristics of the observed data that are used to predict the appropriate \code{family}. If the predictor variable is strictly positive, a Gamma prior is used, with maximum density (\(\mu\), see above) at the median value of the predictor, and a shape parameter of 5. If the predictor variable is truncated at both 0 and 1, a \code{beta(2, 2)} prior is used. For predictor variables ranging from -\(\infty\) to \(\infty\), a Gaussian prior is used, with a mean set at the median of the predictor values and a standard deviation of 10 times the standard deviation of the predictor variable. A much broader prior is required for the \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) estimates than for example \(\tau = \text{top}\) and \(\delta = \text{bottom}\), because depending on the curve that is fit estimates for these parameters may fall almost anywhere along the predictor range - especially if the CR experiment was badly designed. We set the maximum density for these parameters as the median of the predictor value (in the hope that the experiment has been well designed and the inflection point is somewhere in the center of the predictor range), but it is important that there is substantial range in the prior, because the true values may be quite low or high across the predictor range, thus a very broad standard deviation of 10 is used. +The parameters \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) scale according to the predictor variable because both of these are estimated in units of the predictor (usually concentration). To stabilise model fitting, the \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) parameters are bounded to the upper and lower observed range in the predictor, under the assumption that the range of concentrations in the experiment were sufficient to cover the full range of the response outcomes. Note that this assumption may not always hold if the data are from an experiment that is poorly designed, and the outcome of any analysis resulting in either \(\eta\) or \(\omega\) being estimated at the bounds of the predictor data range should be interpreted with caution. The priors used reflect the characteristics of the observed data that are used to predict the appropriate \code{family}. If the predictor variable is strictly positive, a Gamma prior is used, with maximum density (\(\mu\), see above) at the median value of the predictor, and a shape parameter of 5. If the predictor variable is truncated at both 0 and 1, a \code{beta(2, 2)} prior is used. For predictor variables ranging from -\(\infty\) to \(\infty\), a Gaussian prior is used, with a mean set at the median of the predictor values and a standard deviation of 10 times the standard deviation of the predictor variable. A much broader prior is required for the \(\eta = \text{NEC}\) and \(\omega\) = \(\text{EC\textsubscript{50}}\) estimates than for example \(\tau = \text{top}\) and \(\delta = \text{bot}\), because depending on the curve that is fit estimates for these parameters may fall almost anywhere along the predictor range - especially if the CR experiment was badly designed. We set the maximum density for these parameters as the median of the predictor value (in the hope that the experiment has been well designed and the inflection point is somewhere in the centre of the predictor range), but it is important that there is substantial range in the prior, because the true values may be quite low or high across the predictor range, thus a very broad standard deviation of 10 is used. \hypertarget{priors-for-other-parameters}{% \subsubsection{Priors for other parameters}\label{priors-for-other-parameters}} @@ -281,13 +281,13 @@ \subsubsection{Priors for other parameters}\label{priors-for-other-parameters}} \subsubsection[User-specified priors]{User-specified priors}\label{usp} -In \pkg{bayesnec} we chose to provide default weakly informative priors that are scaled according to the characteristics of the input data (discussed in detail above) in some cases. They were designed to be somewhat informative (relative to each parameter's region) but that would, in data-sufficient cases, return fits without HMC divergent transitions in \proglang{Stan}. Default ``blanket'' priors are not currently provided for non-linear models by the model-building underlying package \pkg{brms}, and we note that defining the extent to which a prior is vague or weakly/strongly informative ultimately depends on the likelihood \citep{gelman2017entropy}. Therefore, there may be situations where the default \pkg{bayesnec} priors do not behave as desired, or the user wants to provide customised priors. For example, the default priors may be too informative, yielding unreasonably tight confidence bands (although this is only likely where there are few data or unique values of the predictor variable). Conversely, priors may be too vague, leading to poor model convergence. Alternatively, the default priors may be of the wrong statistical \code{family} if there was insufficient information in the provided data for \pkg{bayesnec} to correctly predict the appropriate ones to use. The priors used in the default model fit can be extracted using \code{pull_prior}, and a sample or plot of prior values can be obtained from the individual \pkg{brms} model fits through the function \code{sample_priors} which samples directly from the \code{prior} element in the \code{brmsfit} object (\code{pull_brmsfit(fit) |> prior_summary() |> sample_priors()}, see \autoref{fig:sampleprior}). We show example usage of these functions under the Section \ref{moddiag} below. +In \pkg{bayesnec} we chose to provide default weakly informative priors that are scaled according to the characteristics of the input data (discussed in detail above) in some cases. They were designed to be somewhat informative (relative to each parameter's region) but that would, in data-sufficient cases, return fits without HMC divergent transitions in \proglang{Stan}. Default ``blanket'' priors are not currently provided for non-linear models by the model-building underlying package \pkg{brms}, and we note that defining the extent to which a prior is vague or weakly/strongly informative ultimately depends on the likelihood \citep{gelman2017entropy}. Therefore, there may be situations where the default \pkg{bayesnec} priors do not produce an appropriate fit, or the user wants to provide customised priors. For example, the default priors may be too informative, yielding unreasonably tight confidence bands (although this is only likely where there are few data or unique values of the predictor variable). Conversely, priors may be too vague, leading to poor model convergence. Alternatively, the default priors may be of the wrong statistical \code{family} if there was insufficient information in the provided data for \pkg{bayesnec} to correctly predict the appropriate ones to use. The priors used in the default model fit can be extracted using \code{pull_prior}, and a sample or plot of prior values can be obtained from the individual \pkg{brms} model fits through the function \code{sample_priors} which samples directly from the \code{prior} element in the \code{brmsfit} object (\code{pull_brmsfit(fit) |> prior_summary() |> sample_priors()}, see \autoref{fig:sampleprior}). We show example usage of these functions under the Section \ref{moddiag} below. \subsection[Model averaging]{Model averaging}\label{modavg} Multi-model inference can be useful where there is a range of plausible models that could be used \citep{Burnham2002} and has been recently adopted in ecotoxicology for Species Sensitivity Distribution (SSD) model inference \citep{Thorley2018, fox2020, Dalgarno}. The approach may have considerable value in CR modelling because there is often no a priori knowledge of the functional form that the response relationship should take. In this case, model averaging can be a useful way of allowing the data to drive the model selection process, with weights proportional to how well the individual models fit the data. Well-fitting models will have high weights, dominating the model averaged outcome. Conversely, poorly fitting models will have very low model weights and will therefore have little influence on the outcome. Where multiple models fit the data equally well, these can equally influence the outcome, and the resultant posterior predictions reflect that model uncertainty. -\pkg{bayesnec} adopts the weighting methods implemented in the \pkg{loo} \citep{vehtari2020} package in \proglang{R}. \pkg{loo} provides an efficient approximate leave-one-out cross-validation (LOO) algorithm for Bayesian models fit using Markov chain Monte Carlo, as described in \citep{vehtari2017} \url{doi:10.1007/s11222-016-9696-4}. The approximation uses Pareto smoothed importance sampling (PSIS), a new procedure for regularizing importance weights and follows the implementation described in \citep{Vehtari2019}. The \pkg{loo} package offers two weighting methods, the \code{"stacking"} method, aimed to minimise prediction error \citep{Yao2018}, and the \code{"pseudobma"} method, with and without Bayesian bootstrap \citep{vehtari2020, vehtari2017}. The stacking method (\code{method = "stacking"}), combines all models by maximizing the leave-one-out predictive density of the combination distribution, such that it finds the optimal linear combining weights for maximizing the leave-one-out log score \citep{vehtari2020}. The pseudo-BMA method (\code{method = "pseudobma"}) finds the relative weights proportional to the theoretical expected log pointwise predictive density of each model \citep{vehtari2020}. The Bayesian bootstrap (when using \code{method = "pseudobma"}) takes into account the uncertainty of finite data points and regularizes the weights away from the extremes of 0 and 1 \citep{vehtari2020}. \pkg{bayesnec} currently uses by default the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}), but this can be easily modified via argument \code{loo_controls}. +\pkg{bayesnec} adopts the weighting methods implemented via the \pkg{loo} \citep{vehtari2020} package in \proglang{R}. \pkg{loo} provides an efficient approximate leave-one-out cross-validation (LOO) algorithm for Bayesian models fit using Markov chain Monte Carlo, as described in \citep{vehtari2017} \url{doi:10.1007/s11222-016-9696-4}. The approximation uses Pareto smoothed importance sampling (PSIS), a new procedure for regularizing importance weights and follows the implementation described in \citep{Vehtari2019}. The \pkg{loo} package offers two weighting methods, the \code{"stacking"} method, aimed to minimise prediction error \citep{Yao2018}, and the \code{"pseudobma"} method, with and without Bayesian bootstrap \citep{vehtari2020, vehtari2017}. The stacking method (\code{method = "stacking"}), combines all models by maximizing the leave-one-out predictive density of the combination distribution, such that it finds the optimal linear combining weights for maximizing the leave-one-out log score \citep{vehtari2020}. The pseudo-BMA method (\code{method = "pseudobma"}) finds the relative weights proportional to the theoretical expected log pointwise predictive density of each model \citep{vehtari2020}. The Bayesian bootstrap (when using \code{method = "pseudobma"}) takes into account the uncertainty of finite data points and regularizes the weights away from the extremes of 0 and 1 \citep{vehtari2020}. \pkg{bayesnec} currently uses by default the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}), but this can be easily modified via argument \code{loo_controls}. \hypertarget{usage}{% \section{Usage}\label{usage}} @@ -315,7 +315,7 @@ \subsection{The input formula}\label{the-input-formula}} \end{CodeInput} \end{CodeChunk} -The input formula can either be a character string or a validated object of class \code{bayesnecformula}. Details about existing possibilities are detailed in the help files of \code{bayesnecformula} and \code{check_formula}. The argument \code{model} in the formula function \code{crf} is a character string indicating the name(s) of the desired model(s). Alternatively, it may also be one of \code{"all"}, meaning all of the available models will be fit; \code{"ecx"} meaning only models excluding the \(\eta = \text{NEC}\) step parameter will be fit; \code{"nec"} meaning only models with a specific \(\eta = \text{NEC}\) step parameter will be fit; \code{"bot_free"} meaning only models without a \(\delta\) (\code{bottom, "bot"}) parameter (without a lower plateau) will be fit; \code{"zero_bounded"} are models that are bounded to be zero; or \code{"decline"} excludes all hormesis models, i.e., only allows a strict decline in response across the whole predictor range (see above Section \ref{mdbnc}). +The input formula can either be a character string or a validated object of class \code{bayesnecformula}. Details about existing possibilities are detailed in the help files of \code{bayesnecformula} and \code{check_formula}. The argument \code{model} in the formula function \code{crf} is a character string indicating the name(s) of the desired model(s). Alternatively, it may also be one of \code{"all"}, meaning all of the available models will be fit; \code{"ecx"} meaning only models excluding the \(\eta = \text{NEC}\) step parameter will be fit; \code{"nec"} meaning only models with a specific \(\eta = \text{NEC}\) step parameter will be fit; \code{"bot_free"} meaning only models without a \(\delta\) (\code{bot, "bot"}) parameter (without a lower plateau) will be fit; \code{"zero_bounded"} are models that are bounded to be zero; or \code{"decline"} excludes all hormesis models, i.e., only allows a strict decline in response across the whole predictor range (see above Section \ref{mdbnc}). The class \code{bayesnecformula} (generated by the function \code{bayesnecformula} and its alias \code{"bnf"}) contains a \code{model.frame} method which can be employed to manually inspect the \code{data.frame} that will be used to run checks on the data suitability prior to model fitting, e.g. @@ -349,7 +349,7 @@ \subsection{The input formula}\label{the-input-formula}} \end{CodeInput} \end{CodeChunk} -If a recognized model name is provided, a single model of the specified type is fit, and \code{bnec} returns an object of class \code{bayesnecfit}. If a vector of two or more of the available models are supplied, or if one of the model-sets is specified, \code{bnec} returns a model object of class \code{bayesmanecfit} containing Bayesian model averaged predictions for the supplied models, providing they were successfully fitted (see \ref{modavg} above, and the help file of \code{bnec} for further details). By default, \code{bnec} sets the number of chains to 4, the number of iterations per chain to 10,000, and the size of the warm-up period to 4/5 of the number of iterations (i.e 8,000 by default). +If a recognized model name is provided, a single model of the specified type is fit, and \code{bnec} returns an object of class \code{bayesnecfit}. If a vector of two or more of the available models are supplied, or if one of the model-sets is specified, \code{bnec} returns a model object of class \code{bayesmanecfit} containing Bayesian model averaged predictions for the supplied models, providing they were successfully fitted (see \ref{modavg} above, and the help file of \code{bnec} for further details). By default, \code{bnec} sets the number of chains to 4, the number of iterations per chain to 10,000, and the size of the warm-up period to 4/5 of the number of iterations (i.e., 8,000 by default). \code{bnec} will guess the most likely distribution for the response variable. This ``guess'' is achieved through the internal function \code{set_distribution}. This algorithm will assume a Binomial distribution for the response if data are integers and \code{trials(...)} is passed in the formula call; Poisson if data are integers and there are no \code{trials(...)}; Gamma if the data are continuous, zero bounded and contain values greater than one; Beta if data are continuous and bounded to zero and one; and Gaussian if data are continuous and contain negative values. The \code{family} can be set manually via the usual \proglang{R} syntax of calling the argument \code{family} and specifying the desired distribution. \pkg{bayesnec} currently supports the use of the above listed families, as well as the Negative-Binomial and Beta-binomial families that can be used in the case of over-dispersed Binomial and Poisson families respectively. @@ -360,9 +360,9 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} Models fitted by \code{bnec} will invariably inherit a class \code{bnecfit} which carries three exclusive methods: \code{+}, \code{c} and \code{update}. The first two are used to append one or multiple models to an existing fit, whereas the latter is used to update the fitting characteristics of an existing model (e.g., change the number of iterations or warm-up period, or simply change the HMC fitting parameters). -When \code{bnec} fits a single CR model type, the output object also inherits the \code{bayesnecfit} class. This class contains the \code{brmsfit} object in addition to the mean predicted values and summary estimates of each model parameter. Because the original motivation in the development of \pkg{bayesnec} was the estimation of no-effect toxicity values, by default \code{bnec} also returns a full posterior distribution of the either the NEC (for \textbf{nec} models) or the NSEC (for \textbf{ecx} models, see \citet{Fisher2023}) estimate. If \code{bnec} fits a custom set of models, or a package-pre-defined model-set (e.g.~\code{model = "decline"} in the input formula), the output object is going to inherit the \code{bayesmanecfit} class. Differently from a \code{bayesnecfit} object, \code{bayesmanecfit} comprises a model weighted estimate of predicted posterior values of N(S)EC. This is a weighted combined average of the NEC or the NSEC values, for all \textbf{nec} and \textbf{ecx} models respectively, as described in \citet{fisher2023ieam}. +When \code{bnec} fits a single CR model type, the output object also inherits the \code{bayesnecfit} class. This class contains the \code{brmsfit} object in addition to the mean predicted values and summary estimates of each model parameter. Because the original motivation in the development of \pkg{bayesnec} was the estimation of no-effect toxicity values, by default \code{bnec} also returns a full posterior distribution of the either the NEC (for \textbf{nec} models) or the NSEC \citep[for \textbf{ecx} models, see][]{Fisher2023} estimate. If \code{bnec} fits a custom set of models, or a package-pre-defined model-set (e.g., \code{model = "decline"} in the input formula), the output object is going to inherit the \code{bayesmanecfit} class. Differently from a \code{bayesnecfit} object, \code{bayesmanecfit} comprises a model weighted estimate of predicted posterior values of N(S)EC. This is a weighted combined average of the NEC or the NSEC values, for all \textbf{nec} and \textbf{ecx} models respectively, as described in \citet{fisher2023ieam}. -Regardless of if \code{bnec} generates a \code{bayesnecfit} or \code{bayesmanecfit} class, the underlying \code{brmsfit} object can be extracted using the function \code{pull_brmsfit}. The \code{brmsfit} contains all of the information usually returned from a call to \code{brm}, including the posterior samples of all parameters in the model, from which predictions can be made and custom probabilities calculated. +Regardless of whether \code{bnec} generates a \code{bayesnecfit} or \code{bayesmanecfit} class, the underlying \code{brmsfit} object can be extracted using the function \code{pull_brmsfit}. The \code{brmsfit} contains all of the information usually returned from a call to \code{brm}, including the posterior samples of all parameters in the model, from which predictions can be made and custom probabilities calculated. Both \code{bayesnecfit} and \code{bayesmanecfit} classes contain methods for \code{summary}, \code{print}, \code{predict}, \code{model.frame}, \code{fitted}, \code{posterior_predict}, \code{posterior_edpred} and plotting (\code{plot} and \newline \code{autoplot}). Wherever possible, these methods have been implemented such they are consistent with other model fitting packages in \proglang{R}, and in particular \pkg{brms}. We have also implemented a range of custom functions for extracting effect concentrations and related threshold values (\code{nec}, \code{ecx} and \code{nsec}) that, in the case of a \code{bayesmanecfit}, return model weighted estimates. @@ -387,13 +387,13 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} Population-Level Effects: Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -top 0.89 0.01 0.88 0.90 1.00 7246 5463 -beta 0.54 0.06 0.42 0.66 1.00 5328 5067 -nec 1.54 0.02 1.50 1.57 1.00 5170 4733 +top 0.89 0.01 0.88 0.90 1.00 6929 5865 +beta 0.54 0.06 0.43 0.65 1.00 5867 5229 +nec 1.54 0.02 1.50 1.57 1.00 5770 4875 Family Specific Parameters: Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS -phi 51.89 7.51 38.89 68.20 1.00 7018 5731 +phi 51.99 7.32 38.83 67.80 1.00 6461 5368 Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS and Tail_ESS are effective sample size measures, and Rhat is the potential @@ -416,7 +416,7 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} \end{CodeInput} \begin{figure}[!ht] -{\centering \includegraphics{article_files/figure-latex/base-plot-1} +{\centering \includegraphics[width=0.6\linewidth]{article_files/figure-latex/base-plot-1} } @@ -424,11 +424,11 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} \end{figure} \end{CodeChunk} -By default the plot shows the fitted posterior curve with 95\% credible intervals, along with an estimate of the \(\eta = \text{NEC}\) value. Please see on-line the vignettes at \url{https://open-aims.github.io/bayesnec/articles/} for more examples using \pkg{bayesnec} models for inference, as well as Section \ref{modsuit} below. +By default the plot shows the fitted posterior curve with 95\% credible intervals, along with an estimate of the \(\eta = \text{NEC}\) value. For more examples using \pkg{bayesnec} models for inference please see the on-line the vignettes at \url{https://open-aims.github.io/bayesnec/articles/}, as well as Section \ref{modsuit} below. \subsection[Model diagnostics]{Model diagnostics}\label{moddiag} -\pkg{bayesnec} will return warning messages as part of the summary method where parts of the model have not converged (rhat, \(\widehat{R}\) \textgreater{} 1.05, see \citet{vehtari2021rank}) and indicate the number of any divergent transitions (if any). These messages include guidance on running more iterations, adjusting priors and or adjusting other fitting criteria, such as adapt\_delta. The summary method for a \code{bayesnecfit} object also indicates the effect sample size for estimates of each of the parameters, and for a \code{bayesmanecfit} a warning is returned if any of the models have parameters with an effective sample size of \textless100. +\pkg{bayesnec} will return warning messages as part of the summary method where parts of the model have not converged {[}rhat, \(\widehat{R}\) \textgreater{} 1.05; see \citet{vehtari2021rank}{]} and indicate the number of any divergent transitions (if any). These messages include guidance on running more iterations, adjusting priors and or adjusting other fitting criteria, such as adapt\_delta. The summary method for a \code{bayesnecfit} object also indicates the effect sample size for estimates of each of the parameters, and for a \code{bayesmanecfit} a warning is returned if any of the models have parameters with an effective sample size of \textless100. In addition to the diagnostic information reported by the summary method, a range of tools is available to assess model fit, including an estimate of overdispersion (for relevant families), an extension of the \pkg{brms} \code{rhat} function that can be applied to both \code{bayesnecfit} and \code{bayesmanecfit} model objects, and a function \code{check_chains} that can be used to visually assess chain mixing and stability. @@ -458,7 +458,7 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} \begin{CodeChunk} \begin{figure}[!ht] -{\centering \includegraphics{article_files/figure-latex/sampleprior-1} +{\centering \includegraphics[width=0.8\linewidth]{article_files/figure-latex/sampleprior-1} } @@ -471,7 +471,7 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} \begin{CodeChunk} \begin{figure}[!ht] -{\centering \includegraphics{article_files/figure-latex/checkpriorsingle-1} +{\centering \includegraphics[width=1\linewidth]{article_files/figure-latex/checkpriorsingle-1} } @@ -482,7 +482,7 @@ \subsection{Output classes and methods}\label{output-classes-and-methods}} \hypertarget{model-comparison}{% \subsection{Model comparison}\label{model-comparison}} -With \pkg{bayesnec} we have included a function (\texttt{compare\_posterior}) that allows bootstrapped comparisons of posterior predictions. This function allows the user to fit several different \code{bnec} model fits and compare differences in their posterior predictions. Comparisons can be made across the model fits for individual threshold estimates (e.g.~NEC, NSEC or ECx) or across a range of predictor values. Usage is demonstrated in the relevant vignette at \url{https://open-aims.github.io/bayesnec/articles/example4.html} by comparing different types of models and model-sets using a single data set. However, the intent of this function is to allow comparison across different data sets that might represent, for example, different levels of a fixed factor covariate. For example, this function has been used to compare toxicity of herbicides across three different climate scenarios, to examine the cumulative impacts of pesticides and global warming on corals \citep{flores2021}. +With \pkg{bayesnec} we have included a function (\texttt{compare\_posterior}) that allows bootstrapped comparisons of posterior predictions. This function allows the user to fit several different \code{bnec} model fits and compare differences in their posterior predictions. Comparisons can be made across the model fits for individual threshold estimates (e.g., NEC, NSEC or ECx) or across a range of predictor values. Usage is demonstrated in the relevant vignette at \url{https://open-aims.github.io/bayesnec/articles/example4.html} by comparing different types of models and model-sets using a single data set. However, the intent of this function is to allow comparison across different data sets that might represent, for example, different levels of a fixed factor covariate. For example, this function has been used to compare toxicity of herbicides across three different climate scenarios, to examine the cumulative impacts of pesticides and global warming on corals \citep{flores2021}. At this time \code{bnec} does not allow for an inclusion of an interaction with a fixed factor. Including an interaction term within each of the non-linear models implemented in \pkg{bayesnec} is relatively straightforward, and may be introduced in future releases. However, in many cases the functional form of the response may change with different levels of a given factor. The substantial complexity of defining all possible non-linear model combinations at each factor level means it unlikely this could be feasibly implemented in \pkg{bayesnec} in the short term. In the meantime the greatest flexibility in the functional form of individual model fits can be readily obtained using models fitted independently to data within each factor level. @@ -497,9 +497,9 @@ \subsection{Hierarchical effects}\label{hierarchical-effects}} \pkg{bayesnec} is built upon the precursor \proglang{R} package \pkg{jagsNEC} \citep{fisher2020}, which writes and fits CR models in \proglang{JAGS} \citep{Plummer2003}. \pkg{bayesnec} was then expanded to include several additional CR models and further generalised to allow a large range of response variables to be modelled using their appropriate statistical distribution. In addition, \pkg{bayesnec} allows the addition of hierarchical effects (see above). The simpler syntax of \pkg{brms} allows \pkg{bayesnec} to be more easily expanded to include additional response distributions as well as CR model formula. In addition, \pkg{brms} is well developed and comes with a large range of supporting functions not available to the \proglang{jags} equivalents. -While there are some commercial propriety software packages to support the analysis of toxicity data, such as graphpad (\url{https://www.graphpad.com}) and ToxCalc (\url{https://www.tidepool-scientific.com/ToxCalc/ToxCalc.html}), these provide limited flexibility and most importantly do not support fully reproducible analysis in an open-source environment. Ensuring that the raw data from the experiment are available; and that the statistical code and documentation to reproduce the analysis are also available are two major components to a reproducible study \citep{peng2015}. +While there are some commercial propriety software packages to support the analysis of toxicity data, such as graphpad (\url{https://www.graphpad.com}) and ToxCalc (\url{https://www.tidepool-scientific.com/ToxCalc/ToxCalc.html}), these provide limited flexibility and most importantly do not support fully reproducible analysis in an open-source environment. Ensuring that the raw data from the experiment are available, and that the statistical code and documentation to reproduce the analysis are also available are two major components to a reproducible study \citep{peng2015}. -The open-source flexible computing environment \proglang{R}, provides an ideal platform for reproducible analysis of toxicity data sets. The main existing tool in \proglang{R} that is widely used in ecotoxicology and toxicology is the frequentist-based package \pkg{drc} \citep{Ritz2016}. \pkg{drc} provides a suite of flexible and versatile model fitting and after-fitting functions for the analysis of dose-response data. The package includes a large range of built-in dose-reponse models that are parameterized using a unified structure with a coefficient b denoting the steepness of the dose-response curve (\(\beta = \text{beta}\) in \pkg{bayesnec}); c and d the lower and upper asymptotic limits of the response (\(\tau = \text{top}\) and \(\delta = \text{bottom}\) in \pkg{bayesnec}); and, for some models, e the effective dose ED50 (\(\omega\) = \(\text{EC\textsubscript{50}}\) in \pkg{bayesnec}) \citep{Ritz2016}. Estimation in \pkg{drc} is based on the maximum likelihood principle, which under the assumption of normally distributed response values simplifies to nonlinear least squares. \pkg{bayesnec} provides a Bayesian implementation of many of the non-linear models offered by \pkg{drc}. +The open-source flexible computing environment \proglang{R} provides an ideal platform for reproducible analysis of toxicity data sets. The main existing tool in \proglang{R} that is widely used in ecotoxicology and toxicology is the frequentist-based package \pkg{drc} \citep{Ritz2016}. \pkg{drc} provides a suite of flexible and versatile model fitting and after-fitting functions for the analysis of dose-response data. The package includes a large range of built-in dose-reponse models that are parameterized using a unified structure with a coefficient b denoting the steepness of the dose-response curve (\(\beta = \text{beta}\) in \pkg{bayesnec}); c and d the lower and upper asymptotic limits of the response (\(\tau = \text{top}\) and \(\delta = \text{bot}\) in \pkg{bayesnec}); and, for some models, e the effective dose ED50 (\(\omega\) = \(\text{EC\textsubscript{50}}\) in \pkg{bayesnec}) \citep{Ritz2016}. Estimation in \pkg{drc} is based on the maximum likelihood principle, which under the assumption of normally distributed response values simplifies to nonlinear least squares. \pkg{bayesnec} provides a Bayesian implementation of many of the non-linear models offered by \pkg{drc}. We compared the \pkg{drc} and \pkg{bayesnec} fits for the three parameter no-effect-concentration model implemented in WinBugs by \citet{Fox2010} (the \code{nec3param} model in \pkg{bayesnec}, see Section \ref{nec-models}) for two selected herbicides from the data from \citet{jones2003meps}. The data comprise assays of herbicide phytotoxicity on chlorophyll fluorescence measurements (Fv/Fm) of symbiotic dinoflagellates still in the host tissue of the coral. Full detail on this example data set is provided in Section \ref{iexample}. In \pkg{bayesnec} this model is fit with the call \texttt{bnec(fvfm\ \textasciitilde{}\ crf(log\_x,\ model\ =\ "nec3param"),\ data\ =\ .x)}, and in \pkg{drc} using \texttt{drm(fvfm\ \textasciitilde{}\ log\_x,\ fct\ =\ NEC.3(),\ data\ =\ .x)}. The herbicides hexazinone and tebuthiuron were selected specifically for this comparison as visual inspection indicated they should provide a reasonable fit to the \citet{Fox2010} model as there was some evidence of a threshold effect. For both herbicides, the predicted \pkg{drc} and \pkg{bayesnec} values were nearly identical using the default behaviour of each package (\autoref{fig:drccomparisonplot}). @@ -510,11 +510,11 @@ \subsection{Hierarchical effects}\label{hierarchical-effects}} } -\caption[A comparison of the \pkg{bayesnec} and \pkg{drc} model fits and estimated NEC values for the \code{nec3param} model, fit to data on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 ug per L) for 10 h]{A comparison of the \pkg{bayesnec} and \pkg{drc} model fits and estimated NEC values for the \code{nec3param} model, fit to data on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 ug per L) for 10 h.}\label{fig:drccomparisonplot} +\caption[A comparison of the \pkg{bayesnec} and \pkg{drc} model fits and estimated NEC values for the \code{nec3param} model, fit to data on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 $\mu$g per L) for 10 h]{A comparison of the \pkg{bayesnec} and \pkg{drc} model fits and estimated NEC values for the \code{nec3param} model, fit to data on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated hexazinone and tebuthiuron (range 0.3 to 1000 $\mu$g per L) for 10 h.}\label{fig:drccomparisonplot} \end{figure} \end{CodeChunk} -While \pkg{drc} is an excellent tool for fitting CR models using frequentist methods and is widely used (\citet{Ritz2016} currently has nearly 2,000 citations), \pkg{bayesnec} provides a Bayesian alternative using similarly simple syntax. The advantages of Bayesian methods in this setting are numerous, and include direct characterisation of parameter uncertainty and posterior predicted samples that provide a valuable resource for model inference (such as comparisons of relative toxicity, see Section \ref{iexample} and \autoref{fig:necplots}). In addition, we have observed that even the use of only weakly informative priors tends to improve the reliability of model fits compared to \pkg{drc}, and this may be true of MLE estimation more generally \citep{krull2020comparing}. +While \pkg{drc} is an excellent tool for fitting CR models using frequentist methods and is widely used \citep[ is cited nearly 2,000 times]{Ritz2016}, \pkg{bayesnec} provides a Bayesian alternative using similarly simple syntax. The advantages of Bayesian methods in this setting are numerous, and include direct characterisation of parameter uncertainty and posterior predicted samples that provide a valuable resource for model inference (such as comparisons of relative toxicity, see Section \ref{iexample} and \autoref{fig:necplots}). In addition, we have observed that even the use of only weakly informative priors tends to improve the reliability of model fits compared to \pkg{drc}, and this may be true of MLE estimation more generally \citep{krull2020comparing}. \section[Illustrative example]{Illustrative example}\label{iexample} @@ -523,11 +523,11 @@ \subsection{Hierarchical effects}\label{hierarchical-effects}} \hypertarget{example-case-study}{% \subsection{Example case study}\label{example-case-study}} -In our case study, the no-effect-toxicity values of a range of herbicides are first estimated and then their relative toxicity is compared. The response data are the maximum effective quantum yield (\({\Delta F / Fm'}\)) of symbiotic dinoflagellates still in the host tissue of the coral \emph{Seriatopora hystrix} (in hospite or in vivo). \({\Delta F / Fm'}\) was calculated from Chlorophyll fluorescence parameters measured using a DIVING-PAM chlorophyll fluorometer (Walz) as described in more detail in \citet{jones2003meps} and \citet{jones2003effects}. The corals were exposed to elevated levels of eight different herbicides (Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, tebuthiuron, ioxynil) at concentrations ranging from 0.3 to 1000 \({\mu g/L}\)) for 10 h. Data for ioxynil were excluded from analysis here as this herbicide did not show sufficient response at the maximum concentration. +In our case study, the no-effect-toxicity values of a range of herbicides are first estimated and then their relative toxicity is compared. The response data are the maximum effective quantum yield (\({\Delta F / Fm'}\)) of symbiotic dinoflagellates still in the host tissue of the coral \emph{Seriatopora hystrix} (in hospite or in vivo). \({\Delta F / Fm'}\) was calculated from Chlorophyll fluorescence parameters measured using a DIVING-PAM chlorophyll fluorometer (Walz) as described in more detail in \citet{jones2003meps} and \citet{jones2003effects}. The corals were exposed to elevated levels of eight different herbicides (Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, tebuthiuron, ioxynil) at concentrations ranging from 0.3 to 1000 \(\mu\)g / L) for 10 h. Data for ioxynil were excluded from analysis here as this herbicide did not show sufficient response at the maximum concentration. \subsection[Single herbicide]{Single herbicide analysis}\label{isingle} -We start by describing the analysis workflow for a single herbicide, ametryn. We first filter ametryn from the larger herbicide data set. The concentration data are log transformed prior to analysis to improve model stability and because this is the natural scaling of the concentration series for these data. As there was little evidence of hormesis (an initial increase in the response at low concentration) in these data (or in the other herbicides, see below \ref{iall}), we used only the \code{decline} model set as candidate models for the model averaging. Setting \code{model = "decline"} results in \pkg{bayesnec} attempting to fit a set of 14 models, and returning an object of class \texttt{bayesmancfit} for the herbicide ametryn. +We start by describing the analysis workflow for a single herbicide, ametryn. We first filter ametryn from the larger herbicide data set. The concentration data are log transformed prior to analysis to improve model stability and because this is the natural scaling of the concentration series for these data. As there was little evidence of hormesis (an initial increase in the response at low concentration) in these data (or in the other herbicides, see below \ref{iall}), we used only the \code{decline} model set as candidate models for the model averaging. Setting \code{model = "decline"} results in \pkg{bayesnec} attempting to fit a set of 14 models, and returning an object of class \texttt{bayesmancfit}. \begin{CodeChunk} \begin{CodeInput} @@ -540,7 +540,7 @@ \subsection{Example case study}\label{example-case-study}} \end{CodeInput} \end{CodeChunk} -Other than selecting a model set to use, here we leave all other \code{bnec} arguments as their default. In this case \pkg{bayesnec} correctly chooses a Beta distribution to use as the \code{family}, defaults to the identity link, and drops the models ``neclin'' and ``ecxlin'' from the complete list of ``decline'' models (as these are not appropriate for a zero bounded distribution, such as the Beta distribution, \ref{mdbnc}). Note that in this example (or the one below, \ref{iall}) we do not show all of the console output and messages generated by both the \code{bnec} and underlying \code{brm} functions, because across 14 models this results in substantial output. +Other than selecting a model set to use, here we leave all other \code{bnec} arguments as their default. In this case \pkg{bayesnec} correctly chooses a Beta distribution to use as the \code{family}, defaults to the identity link, and drops the models ``neclin'' and ``ecxlin'' from the complete list of ``decline'' models (as these are not appropriate for a zero bounded distribution, such as the Beta distribution, \ref{mdbnc}). Note that in this example (or in the one detailed in Section \ref{iall}) we do not show all of the console output and messages generated by both the \code{bnec} and underlying \code{brm} functions, because across 14 models this results in substantial output. Following model fitting, the quality of the fits should be examined using the function \code{check_chains}, as well as \code{check_priors} to ensure there is good chain mixing and that the default priors were suitable. The results from these checks are omitted here for brevity, but can be easily saved to pdf output for visual inspection and inclusion into any analysis supplementary material by setting the argument \texttt{filename} to any non empty string, as in the code below: @@ -559,7 +559,7 @@ \subsection{Example case study}\label{example-case-study}} \end{CodeInput} \end{CodeChunk} -Once we are happy with the model fits, we can examine the model statistics using \code{summary}: +Once we are satisfied with the model fits, we can examine the model statistics using \code{summary}: \begin{CodeChunk} \begin{CodeInput} @@ -575,23 +575,23 @@ \subsection{Example case study}\label{example-case-study}} Model weights (Method: pseudobma_bb_weights): waic wi -nec3param -450.78 0.02 -nec4param -456.58 0.03 -ecxexp -320.61 0.00 -ecx4param -465.28 0.32 -ecxwb1 -443.82 0.01 -ecxwb2 -461.45 0.08 -ecxwb1p3 -323.34 0.00 -ecxwb2p3 -446.92 0.02 -ecxll5 -464.57 0.24 -ecxll4 -465.10 0.29 -ecxll3 -431.95 0.00 +nec3param -450.89 0.02 +nec4param -456.61 0.03 +ecxexp -320.62 0.00 +ecx4param -465.07 0.30 +ecxwb1 -443.51 0.01 +ecxwb2 -461.38 0.08 +ecxwb1p3 -323.35 0.00 +ecxwb2p3 -446.89 0.02 +ecxll5 -464.40 0.23 +ecxll4 -465.19 0.31 +ecxll3 -431.80 0.00 Summary of weighted NEC posterior estimates: NB: Model set contains the ECX models: ecxexp;ecx4param;ecxwb1;ecxwb2;ecxwb1p3;ecxwb2p3;ecxll5;ecxll4;ecxll3; weighted NEC estimates include NSEC surrogates for NEC Estimate Q2.5 Q97.5 -NEC -1.61 -2.11 -0.48 +NEC -1.60 -2.09 -0.47 Bayesian R2 estimates: @@ -610,13 +610,13 @@ \subsection{Example case study}\label{example-case-study}} \end{CodeOutput} \end{CodeChunk} -For a \code{bayesmanecfit} object with multiple model fits, \code{summary} first displays the class, the \code{family} and links that have been used for the model fits, the number of posterior draws contained within each model fit, and a table of the model weights for each model, showing the Widely Applicable Information Criterion (waic, \citet{watanabe2010asymptotic}) from \pkg{loo} and weights (wi) which are based by default on the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}) (see above), but this can be easily modified via argument \code{loo_controls} using \code{amend}. For the amatryn data set, weights are highest for the \textbf{ecx4param} model, followed closely by tge \textbf{ecxll4} model, with some lesser support for the \code{ecxwb2} model, and very a small amount of support for the two \emph{NEC} models (\emph{nec3param} and \emph{nec4parm}). +For a \code{bayesmanecfit} object with multiple model fits, \code{summary} first displays the class, the \code{family} and links that have been used for the model fits, the number of posterior draws contained within each model fit, and a table of the model weights for each model, showing the Widely Applicable Information Criterion \citep[waic,][]{watanabe2010asymptotic} from \pkg{loo} and weights (wi) which are based by default on the \code{"pseudobma"} method (\code{method = "pseudobma"}) with Bayesian bootstrap (\code{BB = TRUE}) (see above), but this can be easily modified via argument \code{loo_controls} using \code{amend}. For the ametryn data set, weights are highest for the \textbf{ecx4param} model, followed closely by the \textbf{ecxll4} model, with some lesser support for the \code{ecxwb2} model, and a very small amount of support for the two \emph{NEC} models (\emph{nec3param} and \emph{nec4parm}). -Because \pkg{bayesnec} was primarily developed for estimating no-effect-concentrations, an estimate of model averaged NEC is also provided, in this case with a note that the weighted model averaged estimate contains NSEC \citep{Fisher2023} values in the case of the fitted \emph{ECx} models. In units of log concentration, the estimated no-effect toxicity value for ametryn is -1.61, which is equivalent to 0.201 \({\mu}gL^{-1}\). The 95\% credible intervals are also provided, based on the 0.025 and 97.5 quantiles of the weighted pooled posterior sample. +Because \pkg{bayesnec} was primarily developed for estimating no-effect-concentrations, an estimate of model averaged NEC is also provided, and in this case with a note that the weighted model averaged estimate contains NSEC \citep{Fisher2023} values in the case of the fitted \emph{ECx} models. In units of log concentration, the estimated no-effect toxicity value for ametryn is -1.6, which is equivalent to 0.202 \(\mu\)g / L. The 95\% credible intervals are also provided, based on the 0.025 and 97.5 quantiles of the weighted pooled posterior sample. Finally, Bayesian \(R^2\) estimates are also provided \citep{gelman2019}, as an indicator of overall model fit. This is useful because model weights are always relative to the models actually included in the model set for the given data. The \(R^2\) provides an indicator of model fit that can be compared objectively across data sets as an indication of the quality of the fit of any of the supplied models to the data. -We can plot all the models contained within the \code{bayesmanecfit} using the \texttt{autplot} function, with all\_models = TRUE (\autoref{fig:fullbayesmanecplotametrynALLplot}): +We can plot all the models contained within the \code{bayesmanecfit} using the \texttt{autoplot} function, with all\_models = TRUE (\autoref{fig:fullbayesmanecplotametrynALLplot}): \begin{CodeChunk} \begin{CodeInput} @@ -635,22 +635,22 @@ \subsection{Example case study}\label{example-case-study}} \begin{CodeChunk} \begin{figure}[!ht] -{\centering \includegraphics{article_files/figure-latex/fullbayesmanecplotametrynALLplot-1} +{\centering \includegraphics[width=0.65\linewidth]{article_files/figure-latex/fullbayesmanecplotametrynALLplot-1} } -\caption{Individual model fits to the ametryn data set, showing the estimated no effect concentration for each. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are the median and 95\% credible intervals of the posterior estimates of the NEC parameter obtained for all \code{NEC} models, and the posterior predicted NSEC values estimated from all smooth \code{ECx} models.}\label{fig:fullbayesmanecplotametrynALLplot} +\caption{Individual model fits to the ametryn data set, showing the estimated no effect concentration for each. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 $\mu$g per L) for 10 h. N(S)EC values presented are the median and 95\% credible intervals of the posterior estimates of the NEC parameter obtained for all \code{NEC} models, and the posterior predicted NSEC values estimated from all smooth \code{ECx} models.}\label{fig:fullbayesmanecplotametrynALLplot} \end{figure} \end{CodeChunk} \begin{CodeChunk} \begin{figure}[!ht] -{\centering \includegraphics{article_files/figure-latex/fullbayesmanecplotametrynplot-1} +{\centering \includegraphics[width=0.6\linewidth]{article_files/figure-latex/fullbayesmanecplotametrynplot-1} } -\caption{Full model averaged bayesmanecfit to the ametryn data set, showing the estimated model-averaged no effect concentration. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \code{NEC} models, and the NSEC values estimated from all smooth \code{ECx} models, summarised as a median and 95\% credible intervals. Only the \code{decline} model set was used (ie. hormesis models were excluded).}\label{fig:fullbayesmanecplotametrynplot} +\caption{Full model averaged bayesmanecfit to the ametryn data set, showing the estimated model-averaged no effect concentration. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated ametryn (range 0.3 to 1000 $\mu$g per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \code{NEC} models, and the NSEC values estimated from all smooth \code{ECx} models, summarised as a median and 95\% credible intervals. Only the \code{decline} model set was used (i.e., hormesis models were excluded).}\label{fig:fullbayesmanecplotametrynplot} \end{figure} \end{CodeChunk} @@ -675,7 +675,7 @@ \subsection{Example case study}\label{example-case-study}} Because we want to run the analysis for all seven herbicides separately we first split the data, then call the \code{bnec} function for each herbicide using \pkg{purrr} \citep{purrr}. We use the same settings and default arguments as for our single herbicide example above (ametryn, see \ref{isingle}). Note that fitting a large set of models using Bayesian methods can take some time (see below Section \ref{comptime}), and we recommend running the analysis at a convenient time, and saving the resulting output to a .RData file to work with later. -Once we have our list of fitted \code{bayesmanecfit} objects for each herbicide, we can use \code{rhat} to check that all models fitted successfully for each, as well as check the chains and priors for each fitted model, although we have skipped these steps here. It is also possible to simply remove any models that fail the \code{rhat} criteria of \textless1.05 using the function \code{amend}. Note for the decline model set there are no fits with bad \code{rhat} values for this example. If there are models that fail to converge (have high \code{rhat} values for some parameters, divergent transitions or issues identified with chain mixing) it may be possible to improve those fits by re-running the model with a greater number of iterations, modified priors, or adjusting other fitting options within \pkg{brms} such as \texttt{adapt\_delta}. Unfortunately, at this time \code{bnec} will not support model averaging across models fitted using varying numbers of iterations, so to improve the fit of a single model, all models in the set will need to be re-fit with the same (higher) number of iterations. We recommend exploring the required changes using a single \code{bayesnecfit} of any problematic models before re-running the complete \code{bayesmanecfit} set. +Once we have our list of fitted \code{bayesmanecfit} objects for each herbicide, we can use \code{rhat} to check that all models fitted successfully for each, as well as check the chains and priors for each fitted model, although we have skipped these steps here. It is also possible to simply remove any models that fail the \code{rhat} criteria of \textless1.05 using the function \code{amend}. Note for the ``decline'' model set, there are no fits with poor \code{rhat} values for this example. If there are models that fail to converge (have high \code{rhat} values for some parameters, divergent transitions or issues identified with chain mixing) it may be possible to improve those fits by re-running the model with a greater number of iterations, modified priors, or adjusting other fitting options within \pkg{brms} such as \texttt{adapt\_delta}. Unfortunately, at this time \code{bnec} will not support model averaging across models fitted using varying numbers of iterations, so to improve the fit of a single model, all models in the set will need to be re-fit with the same (higher) number of iterations. We recommend exploring the required changes using a single \code{bayesnecfit} of any problematic models before re-running the complete \code{bayesmanecfit} set. \begin{CodeChunk} \begin{CodeInput} @@ -707,38 +707,38 @@ \subsection{Example case study}\label{example-case-study}} \end{CodeInput} \end{CodeChunk} -This collated table of model weights shows that the best fitting models vary substantially across the CR curves for the seven herbicides (\autoref{tab:weightsTab}). Few herbicides showed any weight for the NEC threshold models, with the exception of ametryn which had some, albeit limited, support. The weights for the various \emph{ECx} models varied substantially, with at least some support for more than one model in all cases. This shows clearly the value of the model averaging approach adopted in \pkg{bayesnec}, which effectively accommodates this model uncertainty by seamlessly providing weighted model averaged inferences. Note that for all herbicides there were some models to did not fit successfully using the default \pkg{bayesnec} settings. +This collated table of model weights shows that the best fitting models vary substantially across the CR curves for the seven herbicides (\autoref{tab:weightsTab}). Few herbicides showed any weight for the NEC threshold models, with the exception of ametryn which had some, albeit limited, support. The weights for the various \emph{ECx} models varied substantially, with at least some support for more than one model in all cases. This shows clearly the value of the model averaging approach adopted in \pkg{bayesnec}, which effectively accommodates this model uncertainty by seamlessly providing weighted model averaged inferences. \begin{CodeChunk} \begin{table} -\caption{\label{tab:weightsTab}Fitted valid models and their relative pseudo-BMA weights for CR curves for the effects seven herbicides on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \textit{Seriatopora hystrix}.} +\caption{\label{tab:weightsTab}Fitted valid models and their relative pseudo-BMA weights for CR curves for the effects of seven herbicides on maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates of the coral \textit{Seriatopora hystrix}. Ame. = Ametryn; Atr. = Atrazine; Diu. = Diuron; Hex. = Hexazinone; Irg. = Irgarol; Sim. = Simazine; Teb. = Tebuthiuron.} \centering \begin{tabular}[t]{l|r|r|r|r|r|r|r} \hline Model & Ametryn & Atrazine & Diuron & Hexazinone & Irgarol & Simazine & Tebuthiuron\\ \hline -nec3param & 0.020 & 0.000 & 0.000 & 0.000 & 0.001 & 0.000 & 0.018\\ +nec3param & 0.021 & 0.000 & 0.000 & 0.000 & 0.001 & 0.000 & 0.018\\ \hline -nec4param & 0.027 & 0.001 & 0.000 & 0.000 & 0.001 & 0.000 & 0.003\\ +nec4param & 0.028 & 0.001 & 0.000 & 0.000 & 0.001 & 0.000 & 0.003\\ \hline ecxexp & 0.000 & 0.000 & 0.000 & 0.000 & 0.000 & 0.000 & 0.000\\ \hline -ecx4param & 0.318 & 0.131 & 0.228 & 0.118 & 0.255 & 0.188 & 0.001\\ +ecx4param & 0.296 & 0.142 & 0.251 & 0.113 & 0.255 & 0.175 & 0.001\\ \hline -ecxwb1 & 0.010 & 0.473 & 0.117 & 0.000 & 0.245 & 0.093 & 0.000\\ +ecxwb1 & 0.010 & 0.439 & 0.124 & 0.000 & 0.258 & 0.107 & 0.000\\ \hline -ecxwb2 & 0.083 & 0.001 & 0.001 & 0.014 & 0.027 & 0.017 & 0.168\\ +ecxwb2 & 0.082 & 0.001 & 0.001 & 0.016 & 0.028 & 0.018 & 0.165\\ \hline -ecxwb1p3 & 0.000 & 0.000 & 0.000 & 0.019 & 0.000 & 0.014 & 0.000\\ +ecxwb1p3 & 0.000 & 0.000 & 0.000 & 0.019 & 0.000 & 0.015 & 0.000\\ \hline -ecxwb2p3 & 0.017 & 0.001 & 0.001 & 0.060 & 0.001 & 0.052 & 0.501\\ +ecxwb2p3 & 0.017 & 0.001 & 0.001 & 0.057 & 0.001 & 0.053 & 0.502\\ \hline -ecxll5 & 0.239 & 0.267 & 0.310 & 0.022 & 0.203 & 0.142 & 0.308\\ +ecxll5 & 0.232 & 0.289 & 0.311 & 0.022 & 0.189 & 0.156 & 0.310\\ \hline -ecxll4 & 0.286 & 0.124 & 0.259 & 0.098 & 0.267 & 0.196 & 0.001\\ +ecxll4 & 0.314 & 0.127 & 0.224 & 0.115 & 0.268 & 0.175 & 0.001\\ \hline -ecxll3 & 0.000 & 0.000 & 0.085 & 0.670 & 0.000 & 0.297 & 0.000\\ +ecxll3 & 0.000 & 0.000 & 0.088 & 0.658 & 0.000 & 0.301 & 0.000\\ \hline \end{tabular} \end{table} @@ -773,15 +773,15 @@ \subsection{Example case study}\label{example-case-study}} } -\caption{Full model averaged bayesmanecfits to seven phototoxicity data sets, showing estimated no effect concentrations. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, or tebuthiuron (range 0.3 to 1000 ug per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \code{NEC} models, and the NSEC values estimated from all smooth \code{ECx} models, summarised as a median and 95\% credible intervals. Only the \code{decline} model set was used (ie. hormesis models were excluded).}\label{fig:fullbayesmanecplot} +\caption{Full model averaged bayesmanecfits to seven phototoxicity data sets, showing estimated no effect concentrations. Data are the maximum effective quantum yield ($\Delta F / Fm'$) of symbiotic dinoflagellates (in hospite) in \textit{Seriatopora hystrix} exposed to elevated Irgarol 1051, ametryn, diuron, hexazinone, atrazine, simazine, or tebuthiuron (range 0.3 to 1000 $\mu$g per L) for 10 h. N(S)EC values presented are model averaged posterior densities of the NEC parameter obtained from all fitted \code{NEC} models, and the NSEC values estimated from all smooth \code{ECx} models, summarised as a median and 95\% credible intervals. Only the \code{decline} model set was used (i.e., hormesis models were excluded).}\label{fig:fullbayesmanecplot} \end{figure} \end{CodeChunk} -Across the seven herbicides, they \code{bayesmanecfit} model averaged fits model the input data very well, with predictions generally very confident (\autoref{fig:fullbayesmanecplot}). The slight uncertainty in the appropriate model form for the ametryn data set is evident in the weighted average predicted values as a broader confidence band at the estimated position of the NEC threshold point (\autoref{fig:fullbayesmanecplot}). +Across the seven herbicides, the \code{bayesmanecfit} model averaged fits model the input data very well, with predictions generally very confident (\autoref{fig:fullbayesmanecplot}). The slight uncertainty in the appropriate model form for the ametryn data set is evident in the weighted average predicted values as a broader confidence band at the estimated position of the NEC threshold point (\autoref{fig:fullbayesmanecplot}). -The values presented on the plot in \autoref{fig:fullbayesmanecplot} as N(S)EC are model averaged posterior densities of the NEC parameter obtained from all fitted \emph{NEC} models, and the NSEC values estimated from all smooth \textbf{ecx} models. These values are the \pkg{bayesnec} estimates for the no-(significant)-effect concentration required for the integration of this toxicity data into the relevant regulatory framework in Australia, the Australian and New Zealand Water Quality Guidelines \citep{anzg}. While the recommendation that NEC is the preferred toxicity estimate in this framework is well established \citep[\citet{Warne2018c}]{Warne2015}, use of the NSEC is relatively new \citep{Fisher2023} and while yet to gain formal approval for use in the Australian setting presents a potential alternative no-effect estimates for smooth curves. +The values presented on the plot in \autoref{fig:fullbayesmanecplot} as N(S)EC are model averaged posterior densities of the NEC parameter obtained from all fitted \emph{NEC} models, and the NSEC values estimated from all smooth \emph{ECx} models. These values are the \pkg{bayesnec} estimates for the no-(significant)-effect concentration required for the integration of this toxicity data into the relevant regulatory framework in Australia, the Australian and New Zealand Water Quality Guidelines \citep{anzg}. While the recommendation that NEC is the preferred toxicity estimate in this framework is well established \citep{Warne2015, Warne2018c}, use of the NSEC is relatively new \citep{Fisher2023} and while yet to gain formal approval for use in the Australian setting presents a potential alternative no-effect estimate for smooth curves. -Finally, we also use the \texttt{compare\_posterior} function to extract and plot the weighted averaged posterior samples for the N(S)EC toxicity values for all herbicides (\autoref{fig:necplots}). This shows clearly that irgarol, diuron and ametryn are the most toxic, and exhibit relatively similar toxicity, with their posterior densities substantially overlapping (\autoref{fig:necplots}). The herbicide tebuthiuron is the least toxic of these seven, followed by simazine, atrazine and finally hexazinone, which exhibits intermediate toxicity (\autoref{fig:necplots}). \texttt{compare\_posterior} also calculates the probability of difference in toxicity across the herbicides, which confirm the visual results and can be used to infer significant differences in toxicity (\autoref{tab:probdiffs}). +Finally, we also use the \texttt{compare\_posterior} function to extract and plot the weighted averaged posterior samples for the N(S)EC toxicity values for all herbicides (\autoref{fig:necplots}). This shows clearly that irgarol, diuron and ametryn are the most toxic, and exhibit relatively similar toxicity, with their posterior densities substantially overlapping (\autoref{fig:necplots}). The herbicide tebuthiuron is the least toxic of these seven, followed by simazine, atrazine and finally hexazinone, which exhibits intermediate toxicity (\autoref{fig:necplots}). \texttt{compare\_posterior} also calculates the probability of difference in toxicity across the herbicides, which confirm the visual results and can be used to infer significant differences in toxicity response (\autoref{tab:probdiffs}). \begin{CodeChunk} \begin{CodeInput} @@ -808,23 +808,23 @@ \subsection{Example case study}\label{example-case-study}} \begin{CodeChunk} \begin{table} -\caption{\label{tab:probdiffs}Probability of no-difference in no-effect toxicity for seven herbicides. Values are based on the proportional overlap in predicted posterior probability density of the N(S)EC estimates.} +\caption{\label{tab:probdiffs}Probability of no-difference in no-effect toxicity for seven herbicides. Values are based on the proportional overlap in predicted posterior probability density of the N(S)EC estimates. Ame. = Ametryn; Atr. = Atrazine; Diu. = Diuron; Hex. = Hexazinone; Irg. = Irgarol; Sim. = Simazine; Teb. = Tebuthiuron.} \centering \begin{tabular}[t]{l|r|r|r|r|r|r} \hline Herbicide & Atrazine & Diuron & Hexazinone & Irgarol & Simazine & Tebuthiuron\\ \hline -Ametryn & 0.0198775 & 0.8613577 & 0.0502563 & 0.9371171 & 0.0105013 & 0.0096262\\ +Ametryn & 0.0185023 & 0.8663583 & 0.0535067 & 0.9398675 & 0.0120015 & 0.0107513\\ \hline -Atrazine & NA & 0.9873734 & 0.8499812 & 0.9881235 & 0.0336292 & 0.0105013\\ +Atrazine & NA & 0.9886236 & 0.8586073 & 0.9889986 & 0.0381298 & 0.0115014\\ \hline -Diuron & NA & NA & 0.0155019 & 0.7348419 & 0.0095012 & 0.0095012\\ +Diuron & NA & NA & 0.0150019 & 0.7174647 & 0.0115014 & 0.0108764\\ \hline -Hexazinone & NA & NA & NA & 0.9859982 & 0.0147518 & 0.0097512\\ +Hexazinone & NA & NA & NA & 0.9859982 & 0.0177522 & 0.0110014\\ \hline -Irgarol & NA & NA & NA & NA & 0.0093762 & 0.0095012\\ +Irgarol & NA & NA & NA & NA & 0.0111264 & 0.0108764\\ \hline -Simazine & NA & NA & NA & NA & NA & 0.0802600\\ +Simazine & NA & NA & NA & NA & NA & 0.0797600\\ \hline \end{tabular} \end{table} @@ -834,24 +834,24 @@ \subsection{Example case study}\label{example-case-study}} \hypertarget{discussion}{% \section{Discussion}\label{discussion}} -In order to be accessible to a broad community of varying statistical capabilities, we have simplified fitting a \pkg{bayesnec} model as much as possible, whilst retaining the ability to modify a wide range of arguments as necessary. Where possible we have tried to set default values to align with those in \pkg{brms}. Wherever we deviate, this is generally towards being more conservative and/or we have clearly explained our reasoning. Specific examples include: 1) \code{iter}, which we increased from the \pkg{brms} default of 2,000 to 10,000 as we found that a higher number of iterations are generally required for these non-linear models; and 2) the use of \code{pointwise = TRUE} (where possible) and \code{sample_prior = "yes"} to avoid excessive \proglang{R} crashes when used in the Windows operating system and allow the use of the \code{hypothesis} function respectively. We welcome constructive criticism of our selections and users must expect that default settings may change accordingly in later releases. We encourage users to modify these default values themselves whenever appropriate. +In order to be accessible to a broad community of varying statistical capabilities, we have simplified fitting a \pkg{bayesnec} model as much as possible, whilst retaining the ability to modify a wide range of arguments as necessary. Where possible we have tried to set default values to align with those in \pkg{brms}. Wherever we deviate, this is generally towards being more conservative and/or we have clearly explained our reasoning. Specific examples include: 1) \code{iter}, which we increased from the \pkg{brms} default of 2,000 to 10,000 as we found that a higher number of iterations are generally required for these non-linear models; and 2) the use of \code{pointwise = TRUE} (where possible) and \code{sample_prior = "yes"} to avoid excessive crashes in the \proglang{R} programming environment when used in the Windows operating system and allow the use of the \code{hypothesis} function respectively. We welcome constructive criticism of our selections and users must expect that default settings may change accordingly in later releases. We encourage users to modify these default values themselves whenever appropriate. We have made considerable effort to ensure that \pkg{bayesnec} makes a sensible prediction for the appropriate \code{family}, constructs appropriate weakly informative priors, and generates sensible initial values. However, this is a difficult task across such a broad range of non-linear models, and across the potential range of ecotoxicological data that may be used. The user must interrogate their model fits using the wide array of helper functions, and use their own judgement regarding the appropriateness of model inferences for their own application. Of particular importance are examination of model fit statistics through the \code{summary} and \code{rhat} methods, visual inspection of all model fits in \code{bayesmanecfit} objects (via \code{plot(..., all_models = TRUE)} and \code{check_chains(..., all_models = TRUE)}) and an assessment of the posterior versus prior probability densities to ensure default priors are appropriate (using \code{check_priors}). -The model averaging approach implemented in \pkg{bayesnec} is widely used in a range of settings \citep[in ecology for example, see][ for a thorough review]{Dormann2018}. However, model averaging is relatively new to ecotoxicology \citep[but see, for example,][]{Shao2014, Thorley2018, fox2020, Wheeler2009}. In \pkg{bayesnec} we have implemented a broad range of potential models, and the default behaviour is to fit them all (if appropriate for the natural range of the response). More research is required to understand how model-set selection influences model inference. While some studies suggest using a broad range of models may be optimal \citep{Wheeler2009}, others indicate that including multiple models of similar shape may overweight the representation of that shape in model averaged predictions \citep{fox2020}. In addition, it is important to understand that when models are added or removed from the model-set, this can sometimes have a substantial influence on model predictions (potentially changing estimated ECx values, for example). As the model-set in \pkg{bayesnec} may change through time it is important to keep a record of the models that were actually fitted in a given analysis, in the event it is necessary to reproduce a set of results. +The model averaging approach implemented in \pkg{bayesnec} is widely used in a range of settings \citep[in ecology for example, see][ for a thorough review]{Dormann2018}. However, model averaging is relatively new to ecotoxicology \citep[but see, for example,][]{Shao2014, Thorley2018, fox2020, Wheeler2009}. In \pkg{bayesnec} we have implemented a broad range of potential models, and the default behaviour is to fit them all (if appropriate for the natural range of the response). More research is required to understand how model-set selection influences model inference. While some studies suggest using a broad range of models may be optimal \citep{Wheeler2009}, others indicate that including multiple models of similar shape may overweight the representation of that shape in model averaged predictions \citep{fox2020}. In addition, it is important to understand that when models are added or removed from the model-set, this can sometimes have a substantial influence on model predictions (potentially changing estimated ECx values, for example). As the model-set in \pkg{bayesnec} may change with further package development it is important to keep a record of the models that were actually fitted in a given analysis, in the event it is necessary to reproduce a set of results. \subsection[Model suitability for NEC and ECx estimation]{Model suitability for \textit{NEC} and \textit{EC\textsubscript{x}} estimation}\label{modsuit} -In principle all models provide an estimate for ``no-effect'' toxicity concentration. As seen above, for model strings with \textbf{nec} as a prefix, the NEC is directly estimated as parameter \(\eta = \text{NEC}\) in the model, as per \citet{Fox2010}. On the other hand, models strings with \textbf{ecx} as a prefix are continuous curve models with no threshold, typically used for extracting ECx values from concentration-response data. In this instance, the NEC reported is actually the No-Significant-Effect-Concentration (NSEC, see details in \citet{Fisher2023}), defined as the concentration at which there is a user supplied certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote (\(\tau = \text{top}\)) of the response (i.e., the response value is significantly lower than that expected in the case of no exposure). The default value for this NSEC proportion is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. The NSEC concept has been recently explored using simulation studies and case study examples, and when combined with the NEC estimates of threshold models within a model‐ +In principle all models provide an estimate for ``no-effect'' toxicity concentration. As seen above, for model strings with \textbf{nec} as a prefix, the NEC is directly estimated as parameter \(\eta = \text{NEC}\) in the model, as per \citet{Fox2010}. On the other hand, model strings with \textbf{ecx} as a prefix are continuous curve models with no threshold, typically used for extracting ECx values from concentration-response data. In this instance, the NEC reported is actually the No-Significant-Effect-Concentration \citep[NSEC, see details in][]{Fisher2023}, defined as the concentration at which there is a user supplied certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote (\(\tau = \text{top}\)) of the response (i.e., the response value is significantly lower than that expected in the case of no exposure). The default value for this NSEC proportion is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. The NSEC concept has been recently explored using simulation studies and case study examples, and when combined with the NEC estimates of threshold models within a model‐ averaging approach, can yield robust estimates of N(S)EC and of their uncertainty within a single analysis framework \citep{fisher2023ieam}. Both NEC and NSEC can be calculated from fitted models using the functions \code{nec} and \code{nsec}. -ECx estimates can be equally obtained from both \code{"nec"} and \code{"ecx"} models. ECx estimates will usually be lower (more conservative) for \code{"ecx"} models fitted to the same data as \code{"nec"} models. There is ambiguity in the definition of ECx estimates from hormesis models---these allow an initial increase in the response \citep[see][]{Mattson2008} and include models with the string \textbf{horme} in their name---as well as those that have no natural lower bound on the scale of the response (models with the string \textbf{lin} in their name, in the case of Gaussian response data). For this reason the \code{ecx} function has arguments \code{hormesis_def} and \code{type}, both character vectors indicating the desired behaviour. For \code{hormesis_def = "max"}, ECx values are calculated as a decline from the maximum estimates (i.e., the peak at \(\eta = \text{NEC}\)); and \code{hormesis_def = "control"} (the default) indicates that ECx values should be calculated relative to the control, which is assumed to be the lowest observed concentration. For \code{type = "relative"} ECx is calculated as the percentage decrease from the maximum predicted value of the response (\(\tau = \text{top}\)) to the minimum predicted value of the response (i.e., \emph{relative} to the observed data). For \code{type = "absolute"} (the default) ECx is calculated as the percentage decrease from the maximum value of the response (\(\tau = \text{top}\)) to 0 (or \(\delta = \text{bottom}\) for models with that parameter). For \code{type = "direct"}, a direct interpolation of the response on the predictor is obtained. +ECx estimates can be equally obtained from both \code{"nec"} and \code{"ecx"} models. ECx estimates will usually be lower (more conservative) for \code{"ecx"} models fitted to the same data as \code{"nec"} models. There is ambiguity in the definition of ECx estimates from hormesis models---these allow an initial increase in the response \citep[see][]{Mattson2008} and include models with the string \textbf{horme} in their name---as well as those that have no natural lower bound on the scale of the response (models with the string \textbf{lin} in their name, in the case of Gaussian response data). For this reason, the \code{ecx} function has arguments \code{hormesis_def} and \code{type}, both character vectors indicating the desired behaviour. For \code{hormesis_def = "max"}, ECx values are calculated as a decline from the maximum estimates (i.e., the peak at \(\eta = \text{NEC}\)); and \code{hormesis_def = "control"} (the default) indicates that ECx values should be calculated relative to the control, which is assumed to be the lowest observed concentration. For \code{type = "relative"} ECx is calculated as the percentage decrease from the maximum predicted value of the response (\(\tau = \text{top}\)) to the minimum predicted value of the response (i.e., \emph{relative} to the observed data). For \code{type = "absolute"} (the default) ECx is calculated as the percentage decrease from the maximum value of the response (\(\tau = \text{top}\)) to 0. For \code{type = "direct"}, a direct interpolation of the response on the predictor is obtained. \hypertarget{model-suitability-for-response-types}{% \subsection{Model suitability for response types}\label{model-suitability-for-response-types}} -Models that have an exponential decay (most models with parameter \(\beta = \text{beta}\)) with no \(\delta = \text{bottom}\) parameter are 0-bounded and are not suitable for the Gaussian \code{family}, or any \code{family} modelled using a \code{"logit"} or \code{"log"} link because they cannot generate predictions of negative response values. Conversely, models with a linear decay (containing the string \textbf{lin} in their name) are not suitable for modelling families that are 0-bounded (Gamma, Poisson, Negative Binomial, Beta, Binomial, Beta-Binomial) using an \code{"identity"} link. These restrictions do not need to be controlled by the user, as a call to \code{bnec} with \code{models = "all"} in the formula will simply exclude inappropriate models, albeit with a message. +Models that have an exponential decay (most models with parameter \(\beta = \text{beta}\)) and lacking the \(\delta = \text{bot}\) parameter are 0-bounded and are not suitable for the Gaussian \code{family}, or any \code{family} modelled using a \code{"logit"} or \code{"log"} link because they cannot generate predictions of negative response values. Conversely, models with a linear decay (containing the string \textbf{lin} in their name) are not suitable for modelling families that are 0-bounded (Gamma, Poisson, Negative Binomial, Beta, Binomial, Beta-Binomial) using an \code{"identity"} link. These restrictions do not need to be controlled by the user, as a call to \code{bnec} with \code{models = "all"} in the formula will simply exclude inappropriate models, albeit with a message. Strictly speaking, models with a linear hormesis increase are not suitable for modelling responses that are 0, 1-bounded (Binomial-, Beta- and Beta-Binomial-distributed), however they are currently allowed in \pkg{bayesnec}, with a reasonable fit achieved through a combination of the appropriate distribution being applied to the response, and \pkg{bayesnec}'s \code{make_inits} function which ensures initial values passed to \pkg{brms} yield response values within the range of the user-supplied response data. @@ -864,6 +864,8 @@ \subsection{Analytical reproducibility}\label{analytical-reproducibility}} To help with reproducibility \pkg{bayesnec} now allows a seed to be passed to \pkg{brm} and \proglang{Stan}. If a seed is used in the \code{bnec} call, it will also be used internally to generate initial values. Although in \proglang{R} seeds are consistent across versions and operational systems, and therefore the initial values will be the same across different users for a given seed, the underlying \proglang{Stan} model fitting mechanism may yield slightly different parameter estimates for known reasons relating to floating point operations \citep[see chapter 20 in][]{stan2021}. A potentially better strategy for ensuring reproducibility is to build a docker (\url{https://docs.docker.com/get-docker/}) container, an approach representing one strategy towards overcoming the reproducibility crisis \citep{Baker2016}. Also note that while setting a seed can be useful to obtain consistent outputs it might be worth examining how robust the inference is across different seeds. +Due to the fact that the underlying \code{brmsfit} model fitted using \pkg{cmdstanr} does not retain initial values as part of the returned model object, reproducibility may be reduced when using \pkg{cmdstanr}. + \subsection[Computational detail]{Computational details}\label{compdetails} All computations in this paper were performed using rmarkdown \citep{allaire2023} with the following @@ -894,7 +896,7 @@ \subsection{Computation times}\label{computation-times}} 84.2 minutes to fit the full \code{decline} model set to all seven herbicides in the example herbicide data set (see Section \ref{iall}) \end{itemize} -Run times for the same analysis run on an Apple M1 Max with 68.7 Gb RAM were: +When the same analysis was performed on an Apple M1 Max with 68.7 Gb RAM, run times were: \begin{itemize} \item @@ -912,7 +914,7 @@ \subsection{Data requirements}\label{data-requirements}} Due to the relatively long compute times of \pkg{bayesnec} fits, especially when multiple models are fit at once, we recommend that when running \pkg{bayesnec} the resulting model fit is saved as an \code{.RData} object to be read in and used in later workflows to examine model diagnostics, plotting, parameter estimates and inference. -The data requirements for saving model fits can be relatively large. The single model fit (see \ref{example}) generates an object of 62.4 Mb; the full \code{decline} model set fit to the herbicide ametryn (see \ref{isingle}) and object of 74.7 Mb; and the full \code{decline} model set to all seven herbicides in the example data set (see \ref{iall}) and object of 522.9 Mb. +The data requirements for saving model fits can be relatively large. The single model fit (see \ref{example}) generates an object of 62.4 Mb; the full \code{decline} model set fit to the herbicide ametryn (see \ref{isingle}) an object of 74.7 Mb; and the full \code{decline} model set to all seven herbicides in the example data set (see \ref{iall}) an object of 522.9 Mb. \hypertarget{dependencies}{% \subsection{Dependencies}\label{dependencies}} @@ -922,22 +924,12 @@ \subsection{Dependencies}\label{dependencies}} (non-)linear multivariate multilevel models using \proglang{Stan} program \citep{stan2021}, a C++ package for performing full Bayesian inference (see \url{https://mc-stan.org/}). -\pkg{brms} can use two alternative interfaces to proglang\{Stan\}, including \pkg{rstan} \citep{rstan2021} and \pkg{cmdstanr} \citep{cmdstanr2022} +\pkg{brms} can use two alternative interfaces to \proglang{Stan}, including \pkg{rstan} \citep{rstan2021} and \pkg{cmdstanr} \citep{cmdstanr2022} both of which require \proglang{Rtools} and the g++ compiler to be properly configured in \proglang{R}. Making sure \pkg{brms} is properly working on your machine is essential before any attempt to use the \pkg{bayesnec} package for analyses, as if this dependency is not working, \pkg{bayesnec} will not work. Instructions for installing these two packages can be found for \pkg{rstan} at (\url{https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started}) and \pkg{cmdstanr} (\url{https://mc-stan.org/cmdstanr/articles/cmdstanr.html}). -Note that as of the current writing of this article, while \pkg{rstan} is available on CRAN, there is currently an issue running the CRAN version on some Windows machines. For this reason we recommend installing the development version of \pkg{rstan} using: - -\begin{CodeChunk} -\begin{CodeInput} -R> install.packages("rstan", -+ repos = c("https://mc-stan.org/r-packages/", -+ getOption("repos"))) -\end{CodeInput} -\end{CodeChunk} - \hypertarget{future-directions}{% \section{Future directions}\label{future-directions}} @@ -945,9 +937,7 @@ \section{Future directions}\label{future-directions}} \begin{itemize} \item - The addition of other custom families, such as the Tweedie distribution and ordered beta model. Currently \pkg{bayesnec} implements adjustments away from 0 (Gamma, Beta) or 1 (Beta) as a strategy for allowing modelling with these types of data using the closest most convenient statistical distribution. - There are no readily available distributions able to model data that includes 0 and 1 on the continuous scale in \pkg{brms} and \pkg{bayesnec} currently does 0 and 1 adjustments followed by modelling using a Beta distribution. The ordered beta model has been suggested as a better method for modelling continuous data with lower an upper bounds (see \citet{Kubinec}) that could be readily implemented in the \pkg{brms} customs families framework. - For data that are 0 to \(\infty\) on the continuous scale the Tweedie distribution may prove a much better option than the current zero-bounded Gamma, and has been used extensively in fisheries research for biomass data \citep{Shono2008}. As this \code{family} is not currently available in \pkg{brms} this would also need to be implemented as a custom \code{family}, which for the Tweedie is not trivial. + The addition of other families to accommodate a broader range of response variables. This could include a range of distributions already available in \pkg{brms}, such as the zero-inflated Poisson or zero-truncated Gaussian. In addition, there are other useful distributions currently unavailable in \pkg{brms}, such as the Tweedie distribution and ordered beta model. Currently \pkg{bayesnec} implements adjustments away from 0 (Gamma, Beta) or 1 (Beta) as a strategy for allowing modelling with these types of data using the closest most convenient statistical distribution. There are no readily available distributions able to model data that includes 0 and 1 on the continuous scale in \pkg{brms} and \pkg{bayesnec} currently does 0 and 1 adjustments followed by modelling using a Beta distribution. The ordered beta model has been suggested as a better method for modelling continuous data with lower an upper bounds \citep[see][]{Kubinec} that could be readily implemented in the \pkg{brms} customs families framework. For data that are 0 to \(\infty\) on the continuous scale the Tweedie distribution may prove a much better option than the current zero-bounded Gamma, and has been used extensively in fisheries research for biomass data \citep{Shono2008}. As this \code{family} is not currently available in \pkg{brms} this would also need to be implemented as a custom \code{family}, which for the Tweedie is not trivial. \item A hypothesis method for testing against toxicity thresholds. The \pkg{brms} package includes a \code{hypothesis} function that allows for testing parameter estimates against specified criteria. This is used in \pkg{bayesnec} in the \code{check_prior} function, which is a wrapper that examines the deviation of each parameter in the given model relative to 0 as a means of generating posterior and prior probability density plots for comparison. However, an additional wrapper function could be developed that allows toxicity to be assessed, as measured through NEC, or ECx for example, against a required pre-defined threshold. Such a feature may be useful where toxicity testing is used as a trigger in risk management \citep[for example, using whole-effluent-toxicity (WET) testing,][]{Karman2019}. \end{itemize} diff --git a/article/render_tex_pdf.R b/article/render_tex_pdf.R index 2ef523f7..21c927af 100644 --- a/article/render_tex_pdf.R +++ b/article/render_tex_pdf.R @@ -10,9 +10,9 @@ modify_tex <- function(file_in, file_out) { } } } else { - tex[author_ands] <- gsub("AND Diego", "And Diego", tex[author_ands]) %>% - gsub("And Gerard", "AND Gerard", .) %>% - gsub("AND David", "And David", .) + tex[author_ands] <- gsub("AND Diego", "And Diego", tex[author_ands]) |> + gsub("And Gerard", "AND Gerard", x = _) |> + gsub("AND David", "And David", x = _) } tab_begs <- grep("^\\\\begin\\{table", tex) tab_caps <- grep("^\\\\caption\\{", tex) @@ -21,6 +21,16 @@ modify_tex <- function(file_in, file_out) { cap_txt <- tex[tab_caps] tex <- tex[-tab_caps] end_tabs <- grep("^\\\\end\\{tabular", tex) + for (i in seq_along(tab_begs)) { + tmp_tab <- gsub("Ametryn", "Ame.", tex[tab_begs[i]:end_tabs[i]]) |> + gsub("Atrazine", "Atr.", x = _) |> + gsub("Diuron", "Diu.", x = _) |> + gsub("Hexazinone", "Hex.", x = _) |> + gsub("Irgarol", "Irg.", x = _) |> + gsub("Simazine", "Sim.", x = _) |> + gsub("Tebuthiuron", "Teb.", x = _) + tex[tab_begs[i]:end_tabs[i]] <- tmp_tab + } beg_cen_cap <- "\\captionsetup{justification=centering}" tex <- c(tex[1:end_tabs[1]], beg_cen_cap, cap_txt[1], tex[(end_tabs[1] + 1):end_tabs[2]], beg_cen_cap, diff --git a/man/manecsummary-class.Rd b/man/manecsummary-class.Rd index 03cdbc1a..0b91fedf 100644 --- a/man/manecsummary-class.Rd +++ b/man/manecsummary-class.Rd @@ -47,10 +47,26 @@ should the user decide to calculate them (see the non-exported from the single-model case of class \code{\link{bayesnecfit}}, these ECx estimates will be based on the model weights.} +\item{\code{bayesr2}}{A table containing the Bayesian R2 for all models, as +calculated by \code{\link[brms]{bayes_R2}}.} + \item{\code{rhat_issues}}{A \code{\link[base]{list}} detailing whether each fitted model exhibited convergence issues based on the Rhat evaluation.} }} +\references{ +Fisher R, Fox DR (2023). Introducing the no significant effect concentration +(NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +doi: 10.1002/etc.5610. + +Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +estimating no-effect toxicity concentrations in ecotoxicology. Integrated +Environmental Assessment and Management. doi:10.1002/ieam.4809. + +Fox DR (2010). A Bayesian Approach for Determining the No Effect +Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. +} \seealso{ \code{\link{bayesnec}}, \code{\link{bnec}}, diff --git a/man/necsummary-class.Rd b/man/necsummary-class.Rd index 402212d2..ff5b2b6c 100644 --- a/man/necsummary-class.Rd +++ b/man/necsummary-class.Rd @@ -30,11 +30,30 @@ the fitted non-linear model.} \item{\code{is_ecx}}{A \code{\link[base]{logical}} indicating whether \code{model} is an ECx-type model.} +\item{\code{nec_vals}}{The NEC values. Note that if model is an ECx-type model, +this estimate will be a NSEC proxy.} + \item{\code{ecs}}{A \code{\link[base]{list}} containing the ECx values should the user decide to calculate them (see the non-exported \code{bayesnec:::summary.bayesnecfit} help file for details).} + +\item{\code{bayesr2}}{The model Bayesian R2 as calculated by +\code{\link[brms]{bayes_R2}}.} }} +\references{ +Fisher R, Fox DR (2023). Introducing the no significant effect concentration +(NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +doi: 10.1002/etc.5610. + +Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +estimating no-effect toxicity concentrations in ecotoxicology. Integrated +Environmental Assessment and Management. doi:10.1002/ieam.4809. + +Fox DR (2010). A Bayesian Approach for Determining the No Effect +Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. +} \seealso{ \code{\link{bayesnec}}, \code{\link{bnec}}, diff --git a/man/summary.Rd b/man/summary.Rd index 15cb671f..f1e9dc45 100644 --- a/man/summary.Rd +++ b/man/summary.Rd @@ -28,8 +28,8 @@ A summary of the fitted model. In the case of a contents of a \code{\link[brms]{brmsfit}} object with the addition of an R2. In the case of a \code{\link{bayesmanecfit}} object, summary displays the family distribution information, model weights and averaging -method, the estimated model-averaged NEC, and R2 estimates for each -individual model. Warning messages are also printed to screen in case +method, and Bayesian R2 estimates for each individual model. +Warning messages are also printed to screen in case model fits are not satisfactory with regards to their Rhats. } \description{ @@ -37,6 +37,18 @@ Generates a summary for objects fitted by \code{\link{bnec}}. \code{object} should be of class \code{\link{bayesnecfit}} or \code{\link{bayesmanecfit}}. } +\details{ +The summary method for both \code{\link{bayesnecfit}} and +\code{\link{bayesmanecfit}} also returns a no-effect toxicity +estimate. Where the fitted model(s) are NEC models (threshold models, +containing a step function) the no-effect estimate is a true +no-effect-concentration (NEC, see Fox 2010). Where the fitted model(s) are +smooth ECx models with no step function, the no-effect estimate is a +no-significant-effect-concentration (NSEC, see Fisher and Fox 2023). In the +case of a \code{\link{bayesmanecfit}} that contains a mixture of both NEC and +ECx models, the no-effect estimate is a model averaged combination of the NEC +and NSEC estimates, and is reported as the N(S)EC (see Fisher et al. 2023). +} \examples{ \donttest{ library(bayesnec) @@ -45,3 +57,16 @@ nec4param <- pull_out(manec_example, "nec4param") summary(nec4param) } } +\references{ +Fisher R, Fox DR (2023). Introducing the no significant effect concentration +(NSEC).Environmental Toxicology and Chemistry, 42(9), 2019–2028. +doi: 10.1002/etc.5610. + +Fisher R, Fox DR, Negri AP, van Dam J, Flores F, Koppel D (2023). Methods for +estimating no-effect toxicity concentrations in ecotoxicology. Integrated +Environmental Assessment and Management. doi:10.1002/ieam.4809. + +Fox DR (2010). A Bayesian Approach for Determining the No Effect +Concentration and Hazardous Concentration in Ecotoxicology. Ecotoxicology +and Environmental Safety, 73(2), 123–131. doi: 10.1016/j.ecoenv.2009.09.012. +} diff --git a/tests/testthat/test-bayesnec_methods.R b/tests/testthat/test-bayesnec_methods.R index 11e30713..72ae6d9e 100644 --- a/tests/testthat/test-bayesnec_methods.R +++ b/tests/testthat/test-bayesnec_methods.R @@ -22,8 +22,8 @@ test_that("plot returns null, is invisible, and is silent", { test_that("summary behaves as expected", { summary_p <- suppressWarnings(summary(nec4param)) expect_equal(class(summary_p), "necsummary") - expect_equal(names(summary_p), c("brmssummary", "model", "is_ecx", "ecs", - "bayesr2")) + expect_equal(names(summary_p), c("brmssummary", "model", "is_ecx", "nec_vals", + "ecs", "bayesr2")) }) test_that("formula/model.frame behaves as expected", { diff --git a/tests/testthat/test-modname.R b/tests/testthat/test-modname.R new file mode 100644 index 00000000..53c0d1e7 --- /dev/null +++ b/tests/testthat/test-modname.R @@ -0,0 +1,4 @@ +test_that("model names remain less than 15 charactes", { + model_list <- unique(unlist(models())) + expect_lte(max(sapply(model_list, nchar)), 15) +}) diff --git a/vignettes/bayesnec.bib b/vignettes/bayesnec.bib index cf123325..84e9913a 100644 --- a/vignettes/bayesnec.bib +++ b/vignettes/bayesnec.bib @@ -195,11 +195,11 @@ @article{fisher2023ieam author = {Fisher, Rebecca and Fox, David R. and Negri, Andrew P. and van Dam, Joost and Flores, Florita and Koppel, Darren}, title = {Methods for estimating no-effect toxicity concentrations in ecotoxicology}, journal = {Integrated Environmental Assessment and Management}, + year = {2023}, volume = {n/a}, number = {n/a}, pages = {}, - keywords = {Concentration–response modeling, Ecosystem protection, No-effect-concentration, Statistical ecotoxicology, Toxicity estimate}, - doi = {https://doi.org/10.1002/ieam.4809}, - url = {https://setac.onlinelibrary.wiley.com/doi/abs/10.1002/ieam.4809}, - eprint = {https://setac.onlinelibrary.wiley.com/doi/pdf/10.1002/ieam.4809} + keywords = {Concentration–response modeling, Ecosystem protection, No-effect-concentration, Statistical ecotoxicology, Toxicity estimate}, + doi = {https://doi.org/10.1002/ieam.4809} } + diff --git a/vignettes/example1.Rmd.orig b/vignettes/example1.Rmd.orig index 640bf27c..efc94546 100644 --- a/vignettes/example1.Rmd.orig +++ b/vignettes/example1.Rmd.orig @@ -40,7 +40,7 @@ The `bayesnec` is an R package to fit concentration(dose) — response curves to Bayesian model fitting can be difficult to automate across a broad range of usage cases, particularly with respect to specifying valid initial values and appropriate priors. This is one reason the use of Bayesian statistics for *NEC* estimation (or even *ECx* estimation) is not currently widely adopted across the broader ecotoxicological community, who rarely have access to specialist statistical expertise. The `bayesnec` package provides an accessible interface specifically for fitting *NEC* models and other concentration—response models using Bayesian methods. A range of models are specified based on the known distribution of the "concentration" or "dose" variable (the predictor) as well as the "response" variable. The package requires a simplified model formula, which together with the data is used to wrangle more complex non-linear model formula(s), as well as to generate priors and initial values required to call a `brms` model. While the distribution of the predictor and response variables can be specified directly, `bayesnec` will automatically attempt to assign the correct distribution to use based on the characteristics of the provided data. -This project started with an implementation of the *NEC* model based on that described in (Pires et al. 2002) and [@Fox2010] using R2jags [@Fisher2020]. The package has been further generalised to allow a large range of response variables to be modelled using the appropriate statistical distribution. While the original `jagsNEC` implementation supported Gaussian-, Poisson-, Binomial-, Gamma-, Negative Binomial- and Beta-distributed response data, `bayesnec` additionally supports the Beta-Binomial distribution, and can be easily extended to include any of the available `brms` families. We have since also further added a range of alternative *NEC* model types, as well as a range of concentration—response models (such as 4-parameter logistic and Weibull models) that are commonly used in frequentist-based packages such as `drc`[@Ritz2016]. These models do not employ segmented linear regression (i.e., use of a `step` function) but model the response as a smooth function of concentration. +This project started with an implementation of the *NEC* model based on that described in (Pires et al. 2002) and [@Fox2010] using R2jags [@Fisher2020]. The package has been further generalised to allow a large range of response variables to be modelled using the appropriate statistical distribution. While the original `jagsNEC` implementation supported Gaussian-, Poisson-, Binomial-, Gamma-, Negative Binomial- and Beta-distributed response data, `bayesnec` additionally supports the Beta-Binomial distribution, and can be easily extended to include any of the available `brms` families. We have since also further added a range of alternative *NEC* model types, as well as a range of concentration—response models (such as 4-parameter logistic and Weibull models) that are commonly used in frequentist-based packages such as `drc`[@Ritz2016]. These models do not employ segmented linear regression (i.e., use of a `step` function) but model the response as a smooth function of concentration. They can be used to derive no-effect toxicity estimates as a no-significant-effect-concentration [@fisherfox2023]. Specific models can be fit directly using `bnec`, which is what we discuss here. Alternatively, it is possible to fit a custom model set, a specific model set, or all the available models. Further information on fitting multi-models using `bayesnec` can be found in the [Multi model usage][e2] vignette. For detailed information on the models available in `bayesnec` see the [Model details][e2b] vignette. @@ -138,7 +138,7 @@ summary(exp_1) Note the Rhat values in this example are one, indicating convergence. -The functions `plot` (extending `base` plot method) and `autoplot` (extending `ggplot2` plot method) can be used to plot the fitted model. You can also make your own plot from the data included in the returned object (class `bayesnecfit`) from the call to `bnec`. **Notice, however, that in this case we log-transformed the predictor `raw_x` in the input formula.** This causes `brms` to pass these transformations onto Stan's native code, and therefore estimates of *NEC* will be on that same scale. So we need to transform them back to the data scale using the additional argument `xform`. +The functions `plot` (extending `base` plot method) and `autoplot` (extending `ggplot2` plot method) can be used to plot the fitted model. You can also make your own plot from the data included in the returned object (class `bayesnecfit`) from the call to `bnec`. **Notice, however, that in this case we log-transformed the predictor `raw_x` in the input formula.** This causes `brms` to pass these transformations onto Stan's native code, and therefore derived toxicity estimates will be on that same scale. So we need to transform them back to the data scale using the additional argument `xform`. ```{r exmp1-binom, fig.width = 5, message = FALSE} autoplot(exp_1, xform = exp) diff --git a/vignettes/example2.Rmd.orig b/vignettes/example2.Rmd.orig index 6a2f7fe0..829023d7 100644 --- a/vignettes/example2.Rmd.orig +++ b/vignettes/example2.Rmd.orig @@ -84,13 +84,15 @@ We have created some plotting method functions for both the `bayesnecfit` and `b autoplot(exp_5) ``` -The default plot looks exactly the same as our regular `bayesnecfit` plot, but the output is based on a weighted average of all the model fits in the `model = "decline"` model set that were able to fit successfully using the default `bnec` settings. The *NEC* estimate on this plot (and in the summary output below) is based on a mix of actual *NEC* estimates, as well as the *NSEC* [@fisherfox2023] estimates that are used as an approximation to *NEC* for all the **ecx**-containing models in the set. The fitted `bayesmanecfit` object contains different elements to the `bayesnecfit`. In particular, `mod_stats` contains the table of model fit statistics for all the fitted models. This includes the model name, the WAIC (as returned from `brms`), wi (the model weight, currently defaulting to `"pseudobma"` using Bayesian bootstrap from `loo`), and the dispersion estimates (only reported if response is modelled with a Poisson or Binomial distribution, otherwise NA). +The default plot looks exactly the same as our regular `bayesnecfit` plot, but the output is based on a weighted average of all the model fits in the `model = "decline"` model set that were able to fit successfully using the default `bnec` settings. Because the model set contains a mix of *NEC* and *ECx* models, no-effect toxicity estimate reported by default on this plot (and in the summary output below) is reported as a N(S)EC (see @fisher2023ieam). + +The fitted `bayesmanecfit` object contains different elements to the `bayesnecfit`. In particular, `mod_stats` contains the table of model fit statistics for all the fitted models. This includes the model name, the WAIC (as returned from `brms`), wi (the model weight, currently defaulting to `"pseudobma"` using Bayesian bootstrap from `loo`), and the dispersion estimates (only reported if response is modelled with a Poisson or Binomial distribution, otherwise NA). ```{r} exp_5$mod_stats ``` -We can obtain a neater summary of the model fit by using the `summary` method for a `bayesmanecfit` object. A list of fitted models, and model weights are provided. In addition, the model averaged *NEC* is reported, however a warning is provided indicating it contains *NSEC* values. For this example all the nec4param and ecxwb2 models have the highest weights. +We can obtain a neater summary of the model fit by using the `summary` method for a `bayesmanecfit` object. A list of fitted models, and model weights are provided. In addition, the model averaged no-effect estimate is reported, in this case as an N(S)EC. For this example the **nec4param** and **ecxwb2** models have the highest weights. All these model fits are satisfactory despite the relatively low number of iterations set in our example, but the summary would also include a warning if there were fits with divergent transitions. @@ -151,7 +153,7 @@ rhat(exp_5, rhat_cutoff = 1.01) |> ### Extracting toxicity values -The models prefixed with **ecx** are all models that do not have the *NEC* as a parameter in the model. That is, they are smooth curves as a function of concentration and have no breakpoint. The *NEC* on the plots above for these models are an approximation based on *NSEC* [@fisherfox2023] and should only be used with careful consideration of the validity of this toxicity value (see the [Model details][e2b] vignette for more details). If a true model averaged estimate of *NEC* based only on threshold models is desired, this can be obtained with `model = "nec"`. We can use the helper functions `pull_out` and `amend` to alter the model set as required. `pull_out` has a `model` argument and can be used to pull a single model out (as above) or to pull out a specific set of models. +The models prefixed with **ecx** are all models that do not have the *NEC* as a parameter in the model. That is, they are smooth curves as a function of concentration and have no breakpoint (no true No-Effect-Concentration, NEC). Thus the reported no-effect toxicity estimate is the NSEC [@fisherfox2023] (see the [Model details][e2b] vignette for more details). If a true model averaged estimate of *NEC* based only on threshold models is desired, this can be obtained with `model = "nec"`. We can use the helper functions `pull_out` and `amend` to alter the model set as required. `pull_out` has a `model` argument and can be used to pull a single model out (as above) or to pull out a specific set of models. We can use this to obtain first a set of *NEC* only models from the existing set. diff --git a/vignettes/example2b.Rmd.orig b/vignettes/example2b.Rmd.orig index 8f61a26d..6c4d8c11 100644 --- a/vignettes/example2b.Rmd.orig +++ b/vignettes/example2b.Rmd.orig @@ -77,7 +77,9 @@ $$ # Model types for *NEC* and *EC~x~* estimation -All models provide an estimate for the No-Effect-Concentration (*NEC*). For model types with **nec** as a prefix, the *NEC* is directly estimated as parameter $\eta = \text{NEC}$ in the model, as per @Fox2010. Models with **ecx** as a prefix are continuous curve models, typically used for extracting *EC~x~* values from concentration-response data. In this instance the *NEC* reported is actually the No-Significant-Effect-Concentration (*NSEC*), defined as the concentration at which there is a user supplied (see `sig_val`) percentage certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote ($\tau = \text{top}$) of the response (i.e. the response value is significantly lower than that expected in the case of no exposure) [@fisherfox2023]. The default value for `sig_val` is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. See `?nsec` and [@fisherfox2023] for more details. To obtain true model averaged estimates of the *NEC* based on threshold models, you must use only the `"nec"` model set for estimation of *NEC* values. +In principle all models provide an estimate for "no-effect" toxicity concentration. As seen above, for model strings with **nec** as a prefix, the NEC is directly estimated as parameter $\eta = \text{NEC}$ in the model, as per @Fox2010. On the other hand, model strings with **ecx** as a prefix are continuous curve models with no threshold, typically used for extracting ECx values from concentration-response data. In this instance, the NEC reported is actually the No-Significant-Effect-Concentration [NSEC, see details in @fisherfox2023], defined as the concentration at which there is a user supplied certainty (based on the Bayesian posterior estimate) that the response falls below the estimated value of the upper asymptote ($\tau = \text{top}$) of the response (i.e., the response value is significantly lower than that expected in the case of no exposure). The default value for this NSEC proportion is 0.01, which corresponds to an alpha value (Type-I error rate) of 0.01 for a one-sided test of significance. The NSEC concept has been recently explored using simulation studies and case study examples, and when combined with the NEC estimates of threshold models within a model‐ +averaging approach, can yield robust estimates of N(S)EC and of their uncertainty within a single +analysis framework [@fisher2023ieam]. Both NEC and NSEC can be calculated from fitted models using the functions \code{nec} and \code{nsec}. The model averaged N(S)EC is automatically returned as part of the fitted model for any \code{bayesmanecfit} that contains a combination of both \code{"nec"} and \code{"ecx"} models. The significance level used can be adjusted from the default value using \code{amend}. *EC~x~* estimates can be equally obtained from both `"nec"` and `"ecx"` models. *EC~x~* estimates will usually be lower (more conservative) for `"ecx"` models fitted to the same data as `"nec"` models (see the [Comparing posterior predictions][e4]) vignette for an example. However, we recommend using `"all"` models where *EC~x~* estimation is required because `"nec"` models can fit some datasets better than `"ecx"` models and the model averaging approach will place the greatest weight for the outcome that best fits the supplied data. This approach will yield *EC~x~* estimates that are the most representative of the underlying relationship in the dataset.