diff --git a/.DS_Store b/.DS_Store index f5ba376..780b2cb 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.Rbuildignore b/.Rbuildignore index 4f75f8e..abb96c4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,14 +1,15 @@ -^data-raw$ -^Meta$ -^doc$ -^docs$ -^_pkgdown\.yml$ -^codecov\.yml$ -^.*\.Rproj$ -^\.Rproj\.user$ -ignore/ -R_ignore/ -^index.md -^\.github$ -^README\.Rmd$ -test_scripts/ +^data-raw$ +^Meta$ +^doc$ +^docs$ +^_pkgdown\.yml$ +^codecov\.yml$ +^.*\.Rproj$ +^\.Rproj\.user$ +ignore/ +R_ignore/ +^index.md +^\.github$ +^README\.Rmd$ +test_scripts/ +LICENSE.md diff --git a/.github/workflows/develop_build.yaml b/.github/workflows/develop_build.yaml index 863e601..28f49a7 100644 --- a/.github/workflows/develop_build.yaml +++ b/.github/workflows/develop_build.yaml @@ -1,81 +1,81 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: - - develop - pull_request: - branches: - - develop - -name: develop_build - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + - develop + pull_request: + branches: + - develop + +name: develop_build + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.github/workflows/master_build.yaml b/.github/workflows/master_build.yaml index d57f321..42ceffa 100644 --- a/.github/workflows/master_build.yaml +++ b/.github/workflows/master_build.yaml @@ -1,81 +1,81 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: - - master - pull_request: - branches: - - master - -name: master_build - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: master_build + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/DESCRIPTION b/DESCRIPTION index 94ac377..ccff2ea 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: drjacoby Type: Package Title: Flexible Markov Chain Monte Carlo via Reparameterization -Version: 1.5.2 +Version: 1.5.3 Authors@R: c( person("Bob", "Verity", email = "r.verity@imperial.ac.uk", role = c("aut", "cre")), person("Pete", "Winskill", email = "p.winskill@imperial.ac.uk", role = c("aut")) @@ -13,7 +13,7 @@ Description: drjacoby is an R package for performing Bayesian inference via License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.1 +RoxygenNote: 7.3.1 LinkingTo: Rcpp Imports: @@ -21,16 +21,12 @@ Imports: coda, parallel, ggplot2, - RcppXPtrUtils, usethis, tools, rlang, cowplot, dplyr, - magrittr, - readr, - data.table -SystemRequirements: C++11 + magrittr Suggests: testthat, covr, diff --git a/LICENSE b/LICENSE index 796e1f8..e70c1c1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -MIT License - -Copyright (c) 2019 MRC Centre for Outbreak Analysis and Modelling - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2024 +COPYRIGHT HOLDER: MRC Centre for Outbreak Analysis and Modelling diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..1c446fa --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 MRC Centre for Outbreak Analysis and Modelling + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R/diagnostic.R b/R/diagnostic.R index 45b3c9b..2287fc1 100644 --- a/R/diagnostic.R +++ b/R/diagnostic.R @@ -1,94 +1,94 @@ -#------------------------------------------------ -#' Gelman-Rubin statistic -#' -#' Estimate sthe Gelman-Rubin (rhat) convergence statistic for a single parameter -#' across multiple chains. Basic method, assuming all chains are of equal length -#' -#' @references Gelman, A., and D. B. Rubin. 1992. -#' Inference from Iterative Simulation Using Multiple Sequences. -#' Statistical Science 7: 457–511. -#' @references \url{https://astrostatistics.psu.edu/RLectures/diagnosticsMCMC.pdf} -#' -#' @param par_matrix Matrix (interations x chains) -#' @param chains number of chains -#' @param samples number of samples -#' -#' @return Gelman-Rubin statistic -gelman_rubin <- function(par_matrix, chains, samples){ - - # Check that >1 chains and >1 samples - assert_gr(chains, 1) - assert_gr(samples, 1) - - # Coerce to matrix - par_matrix <- as.data.frame(par_matrix) - - # Mean over all samples - all_mean <- mean(par_matrix[,2]) - - # Mean of each chain - chain_mean <- tapply(par_matrix[,2], par_matrix[,1], mean) - - # Variance of each chain - chain_var <- tapply(par_matrix[,2], par_matrix[,1], stats::var) - W <- (1 / chains) * sum(chain_var) - B <- samples / (chains - 1) * sum((chain_mean - all_mean)^2) - V <- (1 - 1 / samples) * W + (1 / samples) * B - round(sqrt(V / W), 4) -} - -#------------------------------------------------ -#' @title Estimate autocorrelation -#' -#' @inheritParams plot_autocorrelation -#' @param x Single chain. -acf_data <- function(x, lag){ - stats::acf(x, plot = FALSE, lag.max = lag)$acf -} - -#------------------------------------------------ -# check that geweke p-value non-significant at alpha significance level on -# values x[1:n] -#' @importFrom coda mcmc -#' @noRd -test_convergence <- function(x, n, alpha = 0.01) { - - # check inputs - assert_vector_numeric(x) - assert_single_pos_int(n) - assert_single_bounded(alpha) - - # fail if n = 1 - if (n == 1) { - return(FALSE) - } - - # fail if ESS too small - ESS <- try(coda::effectiveSize(x[1:n]), silent = TRUE) - if (class(ESS) == "try-error") { - return(FALSE) - } - if (ESS < 10) { - return(FALSE) - } - - # fail if geweke p-value < threshold - g <- geweke_pvalue(mcmc(x[1:n])) - ret <- (g > alpha) - - # return - return(ret) -} - -#------------------------------------------------ -# geweke_pvalue -# return p-value of Geweke's diagnostic convergence statistic, estimated from -# package coda -#' @importFrom stats pnorm -#' @importFrom coda geweke.diag -#' @noRd -geweke_pvalue <- function(x) { - ret <- 2*pnorm(abs(coda::geweke.diag(x)$z), lower.tail = FALSE) - return(ret) -} - +#------------------------------------------------ +#' Gelman-Rubin statistic +#' +#' Estimate sthe Gelman-Rubin (rhat) convergence statistic for a single parameter +#' across multiple chains. Basic method, assuming all chains are of equal length +#' +#' @references Gelman, A., and D. B. Rubin. 1992. +#' Inference from Iterative Simulation Using Multiple Sequences. +#' Statistical Science 7: 457–511. +#' @references \url{https://astrostatistics.psu.edu/RLectures/diagnosticsMCMC.pdf} +#' +#' @param par_matrix Matrix (interations x chains) +#' @param chains number of chains +#' @param samples number of samples +#' +#' @return Gelman-Rubin statistic +gelman_rubin <- function(par_matrix, chains, samples){ + + # Check that >1 chains and >1 samples + assert_gr(chains, 1) + assert_gr(samples, 1) + + # Coerce to matrix + par_matrix <- as.data.frame(par_matrix) + + # Mean over all samples + all_mean <- mean(par_matrix[,2]) + + # Mean of each chain + chain_mean <- tapply(par_matrix[,2], par_matrix[,1], mean) + + # Variance of each chain + chain_var <- tapply(par_matrix[,2], par_matrix[,1], stats::var) + W <- (1 / chains) * sum(chain_var) + B <- samples / (chains - 1) * sum((chain_mean - all_mean)^2) + V <- (1 - 1 / samples) * W + (1 / samples) * B + round(sqrt(V / W), 4) +} + +#------------------------------------------------ +#' @title Estimate autocorrelation +#' +#' @inheritParams plot_autocorrelation +#' @param x Single chain. +acf_data <- function(x, lag){ + stats::acf(x, plot = FALSE, lag.max = lag)$acf +} + +#------------------------------------------------ +# check that geweke p-value non-significant at alpha significance level on +# values x[1:n] +#' @importFrom coda mcmc +#' @noRd +test_convergence <- function(x, n, alpha = 0.01) { + + # check inputs + assert_vector_numeric(x) + assert_single_pos_int(n) + assert_single_bounded(alpha) + + # fail if n = 1 + if (n == 1) { + return(FALSE) + } + + # fail if ESS too small + ESS <- try(coda::effectiveSize(x[1:n]), silent = TRUE) + if (inherits(ESS, "try-error")) { + return(FALSE) + } + if (ESS < 10) { + return(FALSE) + } + + # fail if geweke p-value < threshold + g <- geweke_pvalue(mcmc(x[1:n])) + ret <- (g > alpha) + + # return + return(ret) +} + +#------------------------------------------------ +# geweke_pvalue +# return p-value of Geweke's diagnostic convergence statistic, estimated from +# package coda +#' @importFrom stats pnorm +#' @importFrom coda geweke.diag +#' @noRd +geweke_pvalue <- function(x) { + ret <- 2*pnorm(abs(coda::geweke.diag(x)$z), lower.tail = FALSE) + return(ret) +} + diff --git a/R/drjacoby.R b/R/drjacoby.R index 747a8e0..cc6b5c8 100644 --- a/R/drjacoby.R +++ b/R/drjacoby.R @@ -1,22 +1,22 @@ -#------------------------------------------------ -#' @title Flexible Markov Chain Monte Carlo via Reparameterization -#' -#' @description Flexible Markov chain monte carlo via reparameterization using -#' the Jacobean matrix. -#' -#' @docType package -#' @name drjacoby -NULL - -#------------------------------------------------ -#' @useDynLib drjacoby, .registration = TRUE -#' @importFrom Rcpp sourceCpp -#' @importFrom magrittr %>% -NULL - -#------------------------------------------------ -# unload DLL when package is unloaded -#' @noRd -.onUnload <- function(libpath) { - library.dynam.unload("drjacoby", libpath) # nocov -} +#------------------------------------------------ +#' @title Flexible Markov Chain Monte Carlo via Reparameterization +#' +#' @description Flexible Markov chain monte carlo via reparameterization using +#' the Jacobean matrix. +#' +#' _PACKAGE +#' @name drjacoby +NULL + +#------------------------------------------------ +#' @useDynLib drjacoby, .registration = TRUE +#' @importFrom Rcpp sourceCpp +#' @importFrom magrittr %>% +NULL + +#------------------------------------------------ +# unload DLL when package is unloaded +#' @noRd +.onUnload <- function(libpath) { + library.dynam.unload("drjacoby", libpath) # nocov +} diff --git a/R/main.R b/R/main.R index 6539d89..5261b86 100644 --- a/R/main.R +++ b/R/main.R @@ -151,10 +151,7 @@ check_params <- function(x) { #' log-likelihood/log-prior functions *by reference*. This means if you modify #' these objects inside the functions then any changes will persist. #' -#' @param data a named list of numeric data values. When using C++ likelihood -#' and/or prior these values are treated internally as doubles, so while -#' integer and Boolean values can be used, keep in mind that these will be -#' recast as doubles in the likelihood (i.e. \code{TRUE = 1.0}). +#' @param data a named list or data frame or data values. #' @param df_params a data.frame of parameters (see \code{?define_params}). #' @param misc optional list object passed to likelihood and prior. This can be #' useful for passing values that are not strictly data, for example passing a @@ -237,8 +234,7 @@ run_mcmc <- function(data, # check data assert_list_named(data) - assert_numeric(unlist(data)) - + # check misc assert_list(misc) diff --git a/R/plot.R b/R/plot.R index d5d8a14..595d96c 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,553 +1,554 @@ - -#------------------------------------------------ -#' @title Plot loglikelihood 95\% credible intervals -#' -#' @description Plot loglikelihood 95\% credible intervals. -#' -#' @param x an object of class \code{drjacoby_output} -#' @param chain which chain to plot. -#' @param phase which phase to plot. Must be either "burnin" or "sampling". -#' @param x_axis_type how to format the x-axis. 1 = integer rungs, 2 = values of -#' the thermodynamic power. -#' @param y_axis_type how to format the y-axis. 1 = raw values, 2 = truncated at -#' auto-chosen lower limit. 3 = double-log scale. -#' -#' @export - -plot_rung_loglike <- function(x, chain = 1, phase = "sampling", x_axis_type = 1, y_axis_type = 1) { - - # check inputs - assert_class(x, "drjacoby_output") - assert_single_pos_int(chain) - assert_leq(chain, length(x)) - assert_in(phase, c("burnin", "sampling")) - assert_single_pos_int(x_axis_type) - assert_in(x_axis_type, 1:2) - assert_single_pos_int(y_axis_type) - assert_in(y_axis_type, 1:3) - - # get useful quantities - thermo_power <- x$diagnostics$rung_details$thermodynamic_power - rungs <- length(thermo_power) - - # define x-axis type - if (x_axis_type == 1) { - x_vec <- 1:rungs - x_lab <- "rung" - } else { - x_vec <- thermo_power - x_lab <- "thermodynamic power" - } - - # get plotting values (loglikelihoods) - phase_get <- phase - chain_get <- chain - data <- dplyr::filter(x$pt, .data$chain == chain_get, .data$phase == phase_get) - y_lab <- "log-likelihood" - - # move to plotting deviance if specified - if (y_axis_type == 3) { - data$loglikelihood <- -2 * data$loglikelihood - y_lab <- "deviance" - - # if needed, scale by adding/subtracting a power of ten until all values are - # positive - if (min(data$loglikelihood) < 0) { - dev_scale_power <- ceiling( log(abs(min(data$loglikelihood))) / log(10) ) - dev_scale_sign <- -sign(min(data$loglikelihood)) - data$loglikelihood <- data$loglikelihood + dev_scale_sign*10^dev_scale_power - - dev_scale_base <- ifelse(dev_scale_power == 0, 1, 10) - dev_scale_power_char <- ifelse(dev_scale_power <= 1, "", paste("^", dev_scale_power)) - dev_scale_sign_char <- ifelse(dev_scale_sign < 0, "-", "+") - y_lab <- parse(text = paste("deviance", dev_scale_sign_char, dev_scale_base, dev_scale_power_char)) - } - } - - # get 95% credible intervals over plotting values - y_intervals <- data %>% - dplyr::group_by(.data$rung) %>% - dplyr::summarise(Q2.5 = quantile(.data$loglikelihood, 0.025), - Q50 = quantile(.data$loglikelihood, 0.5), - Q97.5 = quantile(.data$loglikelihood, 0.975)) - - # get data into ggplot format and define temperature colours - df <- y_intervals - df$col <- thermo_power - df$x <- x_vec - - # produce plot - plot1 <- df %>% - ggplot() + - geom_vline(aes(xintercept = x_vec), col = grey(0.9)) + - geom_pointrange(aes_(x = ~x, ymin = ~Q2.5, y = ~Q50, ymax = ~Q97.5, col = ~col)) + - xlab(x_lab) + - ylab(y_lab) + - scale_colour_gradientn(colours = c("red", "blue"), name = "thermodynamic\npower", limits = c(0,1)) + - theme_bw() + - theme(panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank()) - - # define y-axis - if (y_axis_type == 2) { - y_min <- quantile(df$Q2.5, probs = 0.5) - y_max <- max(df$Q97.5) - plot1 <- plot1 + coord_cartesian(ylim = c(y_min, y_max)) - } else if (y_axis_type == 3) { - plot1 <- plot1 + scale_y_continuous(trans = "log10") - } - - # return plot object - return(plot1) -} - -#------------------------------------------------ -#' @title Plot Metropolis coupling acceptance rates -#' -#' @description Plot Metropolis coupling acceptance rates between all rungs. -#' -#' @inheritParams plot_rung_loglike -#' @param chain which chain to plot. If \code{NULL} then plot all chains. -#' -#' @import ggplot2 dplyr -#' @importFrom grDevices grey -#' @export - -plot_mc_acceptance <- function(x, chain = NULL, phase = "sampling", x_axis_type = 1) { - - # check inputs - assert_class(x, "drjacoby_output") - if (!is.null(chain)) { - assert_single_pos_int(chain, zero_allowed = FALSE) - assert_in(chain, unique(x$output$chain)) - } - assert_in(phase, c("burnin", "sampling")) - assert_single_pos_int(x_axis_type) - assert_in(x_axis_type, 1:2) - - # get useful quantities - thermo_power <- x$diagnostics$rung_details$thermodynamic_power - thermo_power_mid <- thermo_power[-1] - diff(thermo_power)/2 - rungs <- length(thermo_power) - - # exit if rungs = 1 - if (rungs == 1) { - stop("no metropolis coupling when rungs = 1") - } - - # define x-axis type - if (x_axis_type == 1) { - breaks_vec <- 1:rungs - x_vec <- (2:rungs) - 0.5 - x_lab <- "rung" - } else { - breaks_vec <- thermo_power - x_vec <- thermo_power_mid - x_lab <- "thermodynamic power" - } - - # get chain properties - phase_get <- phase - if (is.null(chain)) { - mc_accept <- dplyr::filter(x$diagnostics$mc_accept, .data$phase == phase_get) %>% - dplyr::pull(.data$value) - } else { - chain_get <- chain - mc_accept <- dplyr::filter(x$diagnostics$mc_accept, .data$phase == phase_get, .data$chain == chain_get) %>% - dplyr::pull(.data$value) - } - - # get data into ggplot format and define temperature colours - df <- data.frame(x_vec = x_vec, mc_accept = mc_accept, col = thermo_power_mid) - - # produce plot - plot1 <- ggplot(df) + - geom_vline(aes(xintercept = x), col = grey(0.9), data = data.frame(x = breaks_vec)) + - scale_y_continuous(limits = c(0,1), expand = c(0,0)) + - geom_point(aes(x = x_vec, y = mc_accept, color = col)) + - xlab(x_lab) + ylab("coupling acceptance rate") + - scale_colour_gradientn(colours = c("red", "blue"), name = "thermodynamic\npower", limits = c(0,1)) + - theme_bw() + - theme(panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank()) - - return(plot1) -} - -#------------------------------------------------ -#' @title Plot autocorrelation -#' -#' @description Plot autocorrelation for specified parameters -#' -#' @inheritParams plot_rung_loglike -#' @param lag maximum lag. Must be an integer between 1 and 500. -#' @param par vector of parameter names. If \code{NULL} all parameters are -#' plotted. -#' -#' @export - -plot_autocorrelation <- function(x, lag = 20, par = NULL, chain = 1, phase = "sampling") { - - # check inputs - assert_class(x, "drjacoby_output") - assert_single_bounded(lag, 1, 500) - if (is.null(par)) { - par <- setdiff(names(x$output), c("chain", "iteration", "phase", - "logprior", "loglikelihood")) - } - assert_vector_string(par) - assert_in(par, names(x$output)) - assert_single_pos_int(chain) - assert_leq(chain, length(x)) - assert_in(phase, c("burnin", "sampling")) - - # get output for the chosen chain, phase - chain_get <- chain - phase_get <- phase - data <- dplyr::filter(x$output, .data$chain == chain_get, .data$phase == phase_get) %>% - dplyr::select(-.data$chain, -.data$iteration, -.data$phase, -.data$logprior, -.data$loglikelihood) %>% - as.data.frame() - - # select parameters - data <- data[, par, drop = FALSE] - - # estimate autocorrelation - out <- as.data.frame(apply(data, 2, acf_data, lag = lag)) - - # format data for plotting - out$lag <- 0:lag - out <- do.call(rbind, mapply(function(i) { - data.frame(lag = out$lag, - parameter = names(out)[i], - autocorrelation = out[,i]) - }, seq_len(ncol(data)), SIMPLIFY = FALSE)) - - # produce plot - ggplot2::ggplot(data = out, - ggplot2::aes(x = .data$lag, y = 0, xend = .data$lag, yend =.data$autocorrelation)) + - ggplot2::geom_hline(yintercept = 0, lty = 2, col = "red") + - ggplot2::geom_segment(size = 1.5) + - ggplot2::theme_bw() + - ggplot2::ylab("Autocorrelation") + - ggplot2::xlab("Lag") + - ggplot2::ylim(min(0, min(out$autocorrelation)), 1) + - ggplot2::facet_wrap(~ .data$parameter) -} - -#------------------------------------------------ -#' @title Plot parameter estimates -#' -#' @description Produce a series of plots corresponding to each parameter, -#' including the raw trace, the posterior histogram and an autocorrelation -#' plot. Plotting objects can be cycled through interactively, or can be -#' returned as an object allowing them to be viewed/edited by the user. -#' -#' @inheritParams plot_rung_loglike -#' @param show optional vector of parameter names to plot. Parameters matching -#' show will be included. -#' @param hide optional vector of parameter names to filter out. Parameters -#' matching hide will be hidden. -#' @param lag maximum lag. Must be an integer between 1 and 500. -#' @param downsample boolean. Whether to downsample chain to make plotting more -#' efficient. -#' @param display boolean. Whether to show plots, if \code{FALSE} then plotting -#' objects are returned without displaying. -#' -#' @export - -plot_par <- function(x, show = NULL, hide = NULL, lag = 20, - downsample = TRUE, phase = "sampling", - chain = NULL, display = TRUE) { - - # check inputs and define defaults - assert_class(x, "drjacoby_output") - param_names <- setdiff(names(x$output), c("chain", "iteration", "phase", "logprior", "loglikelihood")) - if (!is.null(show)) { - assert_vector_string(show) - assert_in(show, param_names) - param_names <- show - } - if (!is.null(hide)) { - assert_vector_string(hide) - assert_in(hide, param_names) - param_names <- setdiff(param_names, hide) - } - assert_gr(length(param_names), 0, message = "at least one parameter must be specified") - if (length(param_names) > 10){ - message("More than 10 parameters to summarise, consider using the show or hide arguments - to select parameters and reduce computation time.") - } - assert_single_bounded(lag, 1, 500) - assert_single_logical(downsample) - assert_in(phase, c("burnin", "sampling", "both")) - if (is.null(chain)) { - chain <- unique(x$output$chain) - } - assert_pos_int(chain, zero_allowed = FALSE) - assert_single_logical(display) - - # deal with phase = "both" situation - if (phase == "both") { - phase <- c("burnin", "sampling") - } - - # subset based on chain and phase - chain_get <- chain - phase_get <- phase - data <- dplyr::filter(x$output, phase %in% phase_get, chain %in% chain_get) - - # get autocorrelation (on full data, before downsampling) - ac_data <- as.data.frame(apply(data[, param_names, drop = FALSE], 2, acf_data, lag = lag)) - ac_data$lag <- 0:lag - - # downsample - if (downsample & nrow(data) > 2000) { - data <- data[round(seq(1, nrow(data), length.out = 2000)),] - } - - # set minimum bin number - b <- min(nrow(data) / 4, 40) - - # produce plots over all parameters - plot_list <- c() - for(j in 1:length(param_names)){ - - # create plotting data - pd <- data[, c("chain", "iteration", param_names[j])] - names(pd) <- c("chain", "iteration", "y") - - pd2 <- ac_data[, c("lag", param_names[j])] - names(pd2) <- c("lag", "Autocorrelation") - - # Histogram - p1 <- ggplot2::ggplot(pd, ggplot2::aes(x = .data$y)) + - ggplot2::geom_histogram(bins = b, fill = "deepskyblue3", col = "darkblue") + - ggplot2::ylab("Count") + - ggplot2::xlab(param_names[j]) + - ggplot2::theme_bw() - - # Trace plots - p2 <- ggplot2::ggplot(pd, ggplot2::aes(x = .data$iteration, y = .data$y, col = as.factor(.data$chain))) + - ggplot2::geom_line() + - scale_color_discrete(name = "Chain") + - ggplot2::xlab("Iteration") + - ggplot2::ylab(param_names[j]) + - ggplot2::theme_bw() - - # Autocorrealtion - p3 <- ggplot2::ggplot(data = pd2, - ggplot2::aes(x = .data$lag, y = 0, xend = .data$lag, yend =.data$Autocorrelation)) + - ggplot2::geom_hline(yintercept = 0, lty = 2, col = "red") + - ggplot2::geom_segment(size = 1.5) + - ggplot2::theme_bw() + - ggplot2::ylab("Autocorrelation") + - ggplot2::xlab("Lag") + - ggplot2::ylim(min(0, min(pd2$Autocorrelation)), 1) - - # Arrange - pc1 <- cowplot::plot_grid(p1, p3, ncol = 2) - pc2 <- cowplot::plot_grid(p2, pc1, nrow = 2) - plot_list[[j]] <- list(trace = p2, - hist = p1, - acf = p3, - combined = pc2) - - # Display plots, asking user for next page if multiple parameters - if(display){ - graphics::plot(plot_list[[j]]$combined) - if (j < length(param_names)) { - z <- readline("Press n for next plot, f to return the list of all plots or any other key to exit ") - if(z == "f"){ - display <- FALSE - } - if(!z %in% c("n", "f")){ - return(invisible()) - } - } - } - } - names(plot_list) <- paste0("Plot_", param_names) - - return(invisible(plot_list)) -} - -#------------------------------------------------ -#' @title Plot parameter correlation -#' -#' @description Plots correlation between two parameters -#' -#' @inheritParams plot_rung_loglike -#' @param parameter1 name of parameter first parameter. -#' @param parameter2 name of parameter second parameter. -#' @param downsample whether to downsample output to speed up plotting. -#' -#' @export - -plot_cor <- function(x, parameter1, parameter2, - downsample = TRUE, phase = "sampling", - chain = NULL) { - - # check inputs - assert_class(x, "drjacoby_output") - assert_single_string(parameter1) - assert_single_string(parameter2) - assert_in(parameter1, names(x$output)) - assert_in(parameter2, names(x$output)) - assert_single_logical(downsample) - assert_in(phase, c("burnin", "sampling", "both")) - if (is.null(chain)) { - chain <- unique(x$output$chain) - } - assert_pos_int(chain) - - # deal with phase = "both" situation - if (phase == "both") { - phase <- c("burnin", "sampling") - } - - # get basic quantities - chain_get <- chain - phase_get <- phase - data <- dplyr::filter(x$output, phase %in% phase_get, chain %in% chain_get) - - # subset to corr params - data <- data[,c("chain", parameter1, parameter2)] - colnames(data) <- c("chain", "x", "y") - - # downsample - if (downsample & nrow(data) > 2000) { - data <- data[round(seq(1, nrow(data), length.out = 2000)),] - } - - # produce plot - ggplot2::ggplot(data = data, - ggplot2::aes(x = .data$x, y = .data$y, col = as.factor(.data$chain))) + - ggplot2::geom_point(alpha = 0.5) + - ggplot2::xlab(parameter1) + - ggplot2::ylab(parameter2) + - scale_color_discrete(name = "Chain") + - ggplot2::theme_bw() - -} - -#------------------------------------------------ -#' @title Plot 95\% credible intervals -#' -#' @description Plots posterior 95\% credible intervals over specified set of -#' parameters (defauls to all parameters). -#' -#' @inheritParams plot_rung_loglike -#' @param show vector of parameter names to plot. -#' @param param_names optional vector of names to replace the default parameter names. -#' -#' @export - -plot_credible <- function(x, show = NULL, phase = "sampling", param_names = NULL) { - - # check inputs - assert_class(x, "drjacoby_output") - if (!is.null(show)) { - assert_string(show) - assert_in(show, names(x$output)) - } - assert_in(phase, c("burnin", "sampling", "both")) - - # define defaults - if (is.null(show)) { - show <- setdiff(names(x$output), c("chain", "rung", "iteration", "phase", "logprior", "loglikelihood")) - } - if (is.null(param_names)) { - param_names <- show - } - - # deal with phase = "both" situation - if (phase == "both") { - phase <- c("burnin", "sampling") - } - - # subset based on phase and rung - phase_get <- phase - data <- dplyr::filter(x$output, phase %in% phase_get) - data <- data[, show, drop = FALSE] - - # get quantiles - df_plot <- as.data.frame(t(apply(data, 2, quantile_95))) - df_plot$param <- factor(param_names, levels = param_names) - - # produce plot - ggplot2::ggplot(data = df_plot) + ggplot2::theme_bw() + - ggplot2::geom_point(ggplot2::aes(x = .data$param, y = .data$Q50)) + - ggplot2::geom_segment(ggplot2::aes(x = .data$param, y = .data$Q2.5, xend = .data$param, yend = .data$Q97.5)) + - ggplot2::xlab("") + - ggplot2::ylab("95% CrI") - -} - -#------------------------------------------------ -#' @title Plot posterior correlation matrix -#' -#' @description Produces a matrix showing the correlation between all parameters -#' from posterior draws. -#' -#' @inheritParams plot_rung_loglike -#' @param show Vector of parameter names to plot. -#' @param param_names Optional vector of names to replace the default parameter names. -#' -#' @importFrom stats cor -#' @export - -plot_cor_mat <- function(x, show = NULL, phase = "sampling", param_names = NULL) { - - # check inputs - assert_class(x, "drjacoby_output") - if (!is.null(show)) { - assert_string(show) - assert_in(show, names(x$output)) - assert_gr(length(show), 1, message = "must show at least two parameters") - } - assert_in(phase, c("burnin", "sampling", "both")) - - # define defaults - if (is.null(show)) { - show <- setdiff(names(x$output), c("chain", "iteration", "phase", "logprior", "loglikelihood")) - } - if (is.null(param_names)) { - param_names <- show - } - - # deal with phase = "both" situation - if (phase == "both") { - phase <- c("burnin", "sampling") - } - - # subset based on phase - phase_get <- phase - data <- dplyr::filter(x$output, phase %in% phase_get) - data <- data[, show, drop = FALSE] - n <- ncol(data) - - # get correlation matrix into dataframe for ggplot - m <- cor(data) - df_plot <- data.frame(x = rep(names(data), each = n), - xi = rep(1:n, each = n), - y = names(data), - yi = 1:n, - z = as.vector(m)) - df_plot <- subset(df_plot, df_plot$xi < df_plot$yi) - df_plot$x <- factor(df_plot$x, levels = names(data)) - df_plot$y <- factor(df_plot$y, levels = names(data)) - - # get colour range - max_range <- max(abs(range(df_plot$z))) - max_plot <- ceiling(max_range * 10) / 10 - - # produce plot - ggplot2::ggplot(df_plot) + ggplot2::theme_bw() + - ggplot2::geom_raster(ggplot2::aes_(x = ~x, y = ~y, fill = ~z)) + - ggplot2::scale_fill_gradientn(colours = c("red", "white", "blue"), - values = c(0, 0.5, 1), - limits = c(-max_plot, max_plot), - name = "correlation") + - ggplot2::xlab("") + ggplot2::ylab("") - -} + +#------------------------------------------------ +#' @title Plot loglikelihood 95\% credible intervals +#' +#' @description Plot loglikelihood 95\% credible intervals. +#' +#' @param x an object of class \code{drjacoby_output} +#' @param chain which chain to plot. +#' @param phase which phase to plot. Must be either "burnin" or "sampling". +#' @param x_axis_type how to format the x-axis. 1 = integer rungs, 2 = values of +#' the thermodynamic power. +#' @param y_axis_type how to format the y-axis. 1 = raw values, 2 = truncated at +#' auto-chosen lower limit. 3 = double-log scale. +#' +#' @export + +plot_rung_loglike <- function(x, chain = 1, phase = "sampling", x_axis_type = 1, y_axis_type = 1) { + + # check inputs + assert_class(x, "drjacoby_output") + assert_single_pos_int(chain) + assert_leq(chain, length(x)) + assert_in(phase, c("burnin", "sampling")) + assert_single_pos_int(x_axis_type) + assert_in(x_axis_type, 1:2) + assert_single_pos_int(y_axis_type) + assert_in(y_axis_type, 1:3) + + # get useful quantities + thermo_power <- x$diagnostics$rung_details$thermodynamic_power + rungs <- length(thermo_power) + + # define x-axis type + if (x_axis_type == 1) { + x_vec <- 1:rungs + x_lab <- "rung" + } else { + x_vec <- thermo_power + x_lab <- "thermodynamic power" + } + + # get plotting values (loglikelihoods) + phase_get <- phase + chain_get <- chain + data <- dplyr::filter(x$pt, .data$chain == chain_get, .data$phase == phase_get) + y_lab <- "log-likelihood" + + # move to plotting deviance if specified + if (y_axis_type == 3) { + data$loglikelihood <- -2 * data$loglikelihood + y_lab <- "deviance" + + # if needed, scale by adding/subtracting a power of ten until all values are + # positive + if (min(data$loglikelihood) < 0) { + dev_scale_power <- ceiling( log(abs(min(data$loglikelihood))) / log(10) ) + dev_scale_sign <- -sign(min(data$loglikelihood)) + data$loglikelihood <- data$loglikelihood + dev_scale_sign*10^dev_scale_power + + dev_scale_base <- ifelse(dev_scale_power == 0, 1, 10) + dev_scale_power_char <- ifelse(dev_scale_power <= 1, "", paste("^", dev_scale_power)) + dev_scale_sign_char <- ifelse(dev_scale_sign < 0, "-", "+") + y_lab <- parse(text = paste("deviance", dev_scale_sign_char, dev_scale_base, dev_scale_power_char)) + } + } + + # get 95% credible intervals over plotting values + y_intervals <- data %>% + dplyr::group_by(.data$rung) %>% + dplyr::summarise(Q2.5 = quantile(.data$loglikelihood, 0.025), + Q50 = quantile(.data$loglikelihood, 0.5), + Q97.5 = quantile(.data$loglikelihood, 0.975)) + + # get data into ggplot format and define temperature colours + df <- y_intervals + df$col <- thermo_power + df$x <- x_vec + + # produce plot + plot1 <- df %>% + ggplot() + + geom_vline(aes(xintercept = x_vec), col = grey(0.9)) + + geom_pointrange(aes_(x = ~x, ymin = ~Q2.5, y = ~Q50, ymax = ~Q97.5, col = ~col)) + + xlab(x_lab) + + ylab(y_lab) + + scale_colour_gradientn(colours = c("red", "blue"), name = "thermodynamic\npower", limits = c(0,1)) + + theme_bw() + + theme(panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank()) + + # define y-axis + if (y_axis_type == 2) { + y_min <- quantile(df$Q2.5, probs = 0.5) + y_max <- max(df$Q97.5) + plot1 <- plot1 + coord_cartesian(ylim = c(y_min, y_max)) + } else if (y_axis_type == 3) { + plot1 <- plot1 + scale_y_continuous(trans = "log10") + } + + # return plot object + return(plot1) +} + +#------------------------------------------------ +#' @title Plot Metropolis coupling acceptance rates +#' +#' @description Plot Metropolis coupling acceptance rates between all rungs. +#' +#' @inheritParams plot_rung_loglike +#' @param chain which chain to plot. If \code{NULL} then plot all chains. +#' +#' @import ggplot2 dplyr +#' @importFrom grDevices grey +#' @export + +plot_mc_acceptance <- function(x, chain = NULL, phase = "sampling", x_axis_type = 1) { + + # check inputs + assert_class(x, "drjacoby_output") + if (!is.null(chain)) { + assert_single_pos_int(chain, zero_allowed = FALSE) + assert_in(chain, unique(x$output$chain)) + } + assert_in(phase, c("burnin", "sampling")) + assert_single_pos_int(x_axis_type) + assert_in(x_axis_type, 1:2) + + # get useful quantities + thermo_power <- x$diagnostics$rung_details$thermodynamic_power + thermo_power_mid <- thermo_power[-1] - diff(thermo_power)/2 + rungs <- length(thermo_power) + + # exit if rungs = 1 + if (rungs == 1) { + stop("no metropolis coupling when rungs = 1") + } + + # define x-axis type + if (x_axis_type == 1) { + breaks_vec <- 1:rungs + x_vec <- (2:rungs) - 0.5 + x_lab <- "rung" + } else { + breaks_vec <- thermo_power + x_vec <- thermo_power_mid + x_lab <- "thermodynamic power" + } + + # get chain properties + phase_get <- phase + if (is.null(chain)) { + mc_accept <- dplyr::filter(x$diagnostics$mc_accept, .data$phase == phase_get) %>% + dplyr::pull(.data$value) + } else { + chain_get <- chain + mc_accept <- dplyr::filter(x$diagnostics$mc_accept, .data$phase == phase_get, .data$chain == chain_get) %>% + dplyr::pull(.data$value) + } + + # get data into ggplot format and define temperature colours + df <- data.frame(x_vec = x_vec, mc_accept = mc_accept, col = thermo_power_mid) + + # produce plot + plot1 <- ggplot(df) + + geom_vline(aes(xintercept = x), col = grey(0.9), data = data.frame(x = breaks_vec)) + + scale_y_continuous(limits = c(0,1), expand = c(0,0)) + + geom_point(aes(x = x_vec, y = mc_accept, color = col)) + + xlab(x_lab) + ylab("coupling acceptance rate") + + scale_colour_gradientn(colours = c("red", "blue"), name = "thermodynamic\npower", limits = c(0,1)) + + theme_bw() + + theme(panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank()) + + return(plot1) +} + +#------------------------------------------------ +#' @title Plot autocorrelation +#' +#' @description Plot autocorrelation for specified parameters +#' +#' @inheritParams plot_rung_loglike +#' @param lag maximum lag. Must be an integer between 1 and 500. +#' @param par vector of parameter names. If \code{NULL} all parameters are +#' plotted. +#' +#' @export + +plot_autocorrelation <- function(x, lag = 20, par = NULL, chain = 1, phase = "sampling") { + + # check inputs + assert_class(x, "drjacoby_output") + assert_single_bounded(lag, 1, 500) + if (is.null(par)) { + par <- setdiff(names(x$output), c("chain", "iteration", "phase", + "logprior", "loglikelihood")) + } + assert_vector_string(par) + assert_in(par, names(x$output)) + assert_single_pos_int(chain) + assert_leq(chain, length(x)) + assert_in(phase, c("burnin", "sampling")) + + # get output for the chosen chain, phase + chain_get <- chain + phase_get <- phase + data <- dplyr::filter(x$output, .data$chain == chain_get, .data$phase == phase_get) %>% + dplyr::select(-.data$chain, -.data$iteration, -.data$phase, -.data$logprior, -.data$loglikelihood) %>% + as.data.frame() + + # select parameters + data <- data[, par, drop = FALSE] + + # estimate autocorrelation + out <- as.data.frame(apply(data, 2, acf_data, lag = lag)) + + # format data for plotting + out$lag <- 0:lag + out <- do.call(rbind, mapply(function(i) { + data.frame(lag = out$lag, + parameter = names(out)[i], + autocorrelation = out[,i]) + }, seq_len(ncol(data)), SIMPLIFY = FALSE)) + + # produce plot + ggplot2::ggplot(data = out, + ggplot2::aes(x = .data$lag, y = 0, xend = .data$lag, yend =.data$autocorrelation)) + + ggplot2::geom_hline(yintercept = 0, lty = 2, col = "red") + + ggplot2::geom_segment(size = 1.5) + + ggplot2::theme_bw() + + ggplot2::ylab("Autocorrelation") + + ggplot2::xlab("Lag") + + ggplot2::ylim(min(0, min(out$autocorrelation)), 1) + + ggplot2::facet_wrap(~ .data$parameter) +} + +#------------------------------------------------ +#' @title Plot parameter estimates +#' +#' @description Produce a series of plots corresponding to each parameter, +#' including the raw trace, the posterior histogram and an autocorrelation +#' plot. Plotting objects can be cycled through interactively, or can be +#' returned as an object allowing them to be viewed/edited by the user. +#' +#' @inheritParams plot_rung_loglike +#' @param show optional vector of parameter names to plot. Parameters matching +#' show will be included. +#' @param hide optional vector of parameter names to filter out. Parameters +#' matching hide will be hidden. +#' @param lag maximum lag. Must be an integer between 1 and 500. +#' @param downsample boolean. Whether to downsample chain to make plotting more +#' efficient. +#' @param phase which phase to plot. Must be either "burnin", "sampling" or "both". +#' @param display boolean. Whether to show plots, if \code{FALSE} then plotting +#' objects are returned without displaying. +#' +#' @export + +plot_par <- function(x, show = NULL, hide = NULL, lag = 20, + downsample = TRUE, phase = "sampling", + chain = NULL, display = TRUE) { + + # check inputs and define defaults + assert_class(x, "drjacoby_output") + param_names <- setdiff(names(x$output), c("chain", "iteration", "phase", "logprior", "loglikelihood")) + if (!is.null(show)) { + assert_vector_string(show) + assert_in(show, param_names) + param_names <- show + } + if (!is.null(hide)) { + assert_vector_string(hide) + assert_in(hide, param_names) + param_names <- setdiff(param_names, hide) + } + assert_gr(length(param_names), 0, message = "at least one parameter must be specified") + if (length(param_names) > 10){ + message("More than 10 parameters to summarise, consider using the show or hide arguments + to select parameters and reduce computation time.") + } + assert_single_bounded(lag, 1, 500) + assert_single_logical(downsample) + assert_in(phase, c("burnin", "sampling", "both")) + if (is.null(chain)) { + chain <- unique(x$output$chain) + } + assert_pos_int(chain, zero_allowed = FALSE) + assert_single_logical(display) + + # deal with phase = "both" situation + if (phase == "both") { + phase <- c("burnin", "sampling") + } + + # subset based on chain and phase + chain_get <- chain + phase_get <- phase + data <- dplyr::filter(x$output, phase %in% phase_get, chain %in% chain_get) + + # get autocorrelation (on full data, before downsampling) + ac_data <- as.data.frame(apply(data[, param_names, drop = FALSE], 2, acf_data, lag = lag)) + ac_data$lag <- 0:lag + + # downsample + if (downsample & nrow(data) > 2000) { + data <- data[round(seq(1, nrow(data), length.out = 2000)),] + } + + # set minimum bin number + b <- min(nrow(data) / 4, 40) + + # produce plots over all parameters + plot_list <- c() + for(j in 1:length(param_names)){ + + # create plotting data + pd <- data[, c("chain", "iteration", param_names[j])] + names(pd) <- c("chain", "iteration", "y") + + pd2 <- ac_data[, c("lag", param_names[j])] + names(pd2) <- c("lag", "Autocorrelation") + + # Histogram + p1 <- ggplot2::ggplot(pd, ggplot2::aes(x = .data$y)) + + ggplot2::geom_histogram(bins = b, fill = "deepskyblue3", col = "darkblue") + + ggplot2::ylab("Count") + + ggplot2::xlab(param_names[j]) + + ggplot2::theme_bw() + + # Trace plots + p2 <- ggplot2::ggplot(pd, ggplot2::aes(x = .data$iteration, y = .data$y, col = as.factor(.data$chain))) + + ggplot2::geom_line() + + scale_color_discrete(name = "Chain") + + ggplot2::xlab("Iteration") + + ggplot2::ylab(param_names[j]) + + ggplot2::theme_bw() + + # Autocorrealtion + p3 <- ggplot2::ggplot(data = pd2, + ggplot2::aes(x = .data$lag, y = 0, xend = .data$lag, yend =.data$Autocorrelation)) + + ggplot2::geom_hline(yintercept = 0, lty = 2, col = "red") + + ggplot2::geom_segment(size = 1.5) + + ggplot2::theme_bw() + + ggplot2::ylab("Autocorrelation") + + ggplot2::xlab("Lag") + + ggplot2::ylim(min(0, min(pd2$Autocorrelation)), 1) + + # Arrange + pc1 <- cowplot::plot_grid(p1, p3, ncol = 2) + pc2 <- cowplot::plot_grid(p2, pc1, nrow = 2) + plot_list[[j]] <- list(trace = p2, + hist = p1, + acf = p3, + combined = pc2) + + # Display plots, asking user for next page if multiple parameters + if(display){ + graphics::plot(plot_list[[j]]$combined) + if (j < length(param_names)) { + z <- readline("Press n for next plot, f to return the list of all plots or any other key to exit ") + if(z == "f"){ + display <- FALSE + } + if(!z %in% c("n", "f")){ + return(invisible()) + } + } + } + } + names(plot_list) <- paste0("Plot_", param_names) + + return(invisible(plot_list)) +} + +#------------------------------------------------ +#' @title Plot parameter correlation +#' +#' @description Plots correlation between two parameters +#' +#' @inheritParams plot_rung_loglike +#' @param parameter1 name of parameter first parameter. +#' @param parameter2 name of parameter second parameter. +#' @param downsample whether to downsample output to speed up plotting. +#' +#' @export + +plot_cor <- function(x, parameter1, parameter2, + downsample = TRUE, phase = "sampling", + chain = NULL) { + + # check inputs + assert_class(x, "drjacoby_output") + assert_single_string(parameter1) + assert_single_string(parameter2) + assert_in(parameter1, names(x$output)) + assert_in(parameter2, names(x$output)) + assert_single_logical(downsample) + assert_in(phase, c("burnin", "sampling", "both")) + if (is.null(chain)) { + chain <- unique(x$output$chain) + } + assert_pos_int(chain) + + # deal with phase = "both" situation + if (phase == "both") { + phase <- c("burnin", "sampling") + } + + # get basic quantities + chain_get <- chain + phase_get <- phase + data <- dplyr::filter(x$output, phase %in% phase_get, chain %in% chain_get) + + # subset to corr params + data <- data[,c("chain", parameter1, parameter2)] + colnames(data) <- c("chain", "x", "y") + + # downsample + if (downsample & nrow(data) > 2000) { + data <- data[round(seq(1, nrow(data), length.out = 2000)),] + } + + # produce plot + ggplot2::ggplot(data = data, + ggplot2::aes(x = .data$x, y = .data$y, col = as.factor(.data$chain))) + + ggplot2::geom_point(alpha = 0.5) + + ggplot2::xlab(parameter1) + + ggplot2::ylab(parameter2) + + scale_color_discrete(name = "Chain") + + ggplot2::theme_bw() + +} + +#------------------------------------------------ +#' @title Plot 95\% credible intervals +#' +#' @description Plots posterior 95\% credible intervals over specified set of +#' parameters (defauls to all parameters). +#' +#' @inheritParams plot_rung_loglike +#' @param show vector of parameter names to plot. +#' @param param_names optional vector of names to replace the default parameter names. +#' +#' @export + +plot_credible <- function(x, show = NULL, phase = "sampling", param_names = NULL) { + + # check inputs + assert_class(x, "drjacoby_output") + if (!is.null(show)) { + assert_string(show) + assert_in(show, names(x$output)) + } + assert_in(phase, c("burnin", "sampling", "both")) + + # define defaults + if (is.null(show)) { + show <- setdiff(names(x$output), c("chain", "rung", "iteration", "phase", "logprior", "loglikelihood")) + } + if (is.null(param_names)) { + param_names <- show + } + + # deal with phase = "both" situation + if (phase == "both") { + phase <- c("burnin", "sampling") + } + + # subset based on phase and rung + phase_get <- phase + data <- dplyr::filter(x$output, phase %in% phase_get) + data <- data[, show, drop = FALSE] + + # get quantiles + df_plot <- as.data.frame(t(apply(data, 2, quantile_95))) + df_plot$param <- factor(param_names, levels = param_names) + + # produce plot + ggplot2::ggplot(data = df_plot) + ggplot2::theme_bw() + + ggplot2::geom_point(ggplot2::aes(x = .data$param, y = .data$Q50)) + + ggplot2::geom_segment(ggplot2::aes(x = .data$param, y = .data$Q2.5, xend = .data$param, yend = .data$Q97.5)) + + ggplot2::xlab("") + + ggplot2::ylab("95% CrI") + +} + +#------------------------------------------------ +#' @title Plot posterior correlation matrix +#' +#' @description Produces a matrix showing the correlation between all parameters +#' from posterior draws. +#' +#' @inheritParams plot_rung_loglike +#' @param show Vector of parameter names to plot. +#' @param param_names Optional vector of names to replace the default parameter names. +#' +#' @importFrom stats cor +#' @export + +plot_cor_mat <- function(x, show = NULL, phase = "sampling", param_names = NULL) { + + # check inputs + assert_class(x, "drjacoby_output") + if (!is.null(show)) { + assert_string(show) + assert_in(show, names(x$output)) + assert_gr(length(show), 1, message = "must show at least two parameters") + } + assert_in(phase, c("burnin", "sampling", "both")) + + # define defaults + if (is.null(show)) { + show <- setdiff(names(x$output), c("chain", "iteration", "phase", "logprior", "loglikelihood")) + } + if (is.null(param_names)) { + param_names <- show + } + + # deal with phase = "both" situation + if (phase == "both") { + phase <- c("burnin", "sampling") + } + + # subset based on phase + phase_get <- phase + data <- dplyr::filter(x$output, phase %in% phase_get) + data <- data[, show, drop = FALSE] + n <- ncol(data) + + # get correlation matrix into dataframe for ggplot + m <- cor(data) + df_plot <- data.frame(x = rep(names(data), each = n), + xi = rep(1:n, each = n), + y = names(data), + yi = 1:n, + z = as.vector(m)) + df_plot <- subset(df_plot, df_plot$xi < df_plot$yi) + df_plot$x <- factor(df_plot$x, levels = names(data)) + df_plot$y <- factor(df_plot$y, levels = names(data)) + + # get colour range + max_range <- max(abs(range(df_plot$z))) + max_plot <- ceiling(max_range * 10) / 10 + + # produce plot + ggplot2::ggplot(df_plot) + ggplot2::theme_bw() + + ggplot2::geom_raster(ggplot2::aes_(x = ~x, y = ~y, fill = ~z)) + + ggplot2::scale_fill_gradientn(colours = c("red", "white", "blue"), + values = c(0, 0.5, 1), + limits = c(-max_plot, max_plot), + name = "correlation") + + ggplot2::xlab("") + ggplot2::ylab("") + +} diff --git a/R_ignore/.DS_Store b/R_ignore/.DS_Store index f7ed7b1..245f826 100644 Binary files a/R_ignore/.DS_Store and b/R_ignore/.DS_Store differ diff --git a/R_ignore/deploy.R b/R_ignore/deploy.R new file mode 100644 index 0000000..93e27b3 --- /dev/null +++ b/R_ignore/deploy.R @@ -0,0 +1,47 @@ + +# deploy.R +# +# Author: Bob Verity +# Date: 2024-06-19 +# +# Purpose: +# Test package functions. + +# ------------------------------------------------------------------ + +library(tidyverse) + +# define R loglike function +r_loglike <- function(params, data, misc) { + sum(dnorm(data$x, mean = params["mu"], sd = params["sigma"], log = TRUE)) +} + +# define R logprior function +r_logprior <- function(params, misc) { + dunif(params["mu"], -10, 10, log = TRUE) + dnorm(params["sigma"], 0, 1, log = TRUE) +} + +# source C++ likelihood and prior +#cpp11::cpp_source("ignore/source/normal_model.cpp") + + +# ------------------------------------------------------------------ + +set.seed(1) + +# define data +x <- rnorm(1e2, mean = 1, sd = 4) + +# define parameters dataframe +df_params <- define_params(name = "mu", min = -10, max = 10, + name = "sigma", min = 0, max = Inf) + + +mcmc <- run_mcmc(data = list(x = x), + df_params = df_params, + loglike = r_loglike, + logprior = r_logprior, + burnin = 1e3, + samples = 1e3) + +plot_par(mcmc, show = "mu", phase = "burnin") diff --git a/man/drjacoby.Rd b/man/drjacoby.Rd index 28d3f51..957310a 100644 --- a/man/drjacoby.Rd +++ b/man/drjacoby.Rd @@ -1,10 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/drjacoby.R -\docType{package} \name{drjacoby} \alias{drjacoby} \title{Flexible Markov Chain Monte Carlo via Reparameterization} \description{ Flexible Markov chain monte carlo via reparameterization using the Jacobean matrix. + +_PACKAGE } diff --git a/man/plot_par.Rd b/man/plot_par.Rd index c30306e..62e977c 100644 --- a/man/plot_par.Rd +++ b/man/plot_par.Rd @@ -29,7 +29,7 @@ matching hide will be hidden.} \item{downsample}{boolean. Whether to downsample chain to make plotting more efficient.} -\item{phase}{which phase to plot. Must be either "burnin" or "sampling".} +\item{phase}{which phase to plot. Must be either "burnin", "sampling" or "both".} \item{chain}{which chain to plot.} diff --git a/man/run_mcmc.Rd b/man/run_mcmc.Rd index 9f36b06..cb4f968 100644 --- a/man/run_mcmc.Rd +++ b/man/run_mcmc.Rd @@ -26,10 +26,7 @@ run_mcmc( ) } \arguments{ -\item{data}{a named list of numeric data values. When using C++ likelihood -and/or prior these values are treated internally as doubles, so while -integer and Boolean values can be used, keep in mind that these will be -recast as doubles in the likelihood (i.e. \code{TRUE = 1.0}).} +\item{data}{a named list or data frame or data values.} \item{df_params}{a data.frame of parameters (see \code{?define_params}).} diff --git a/src/Particle.h b/src/Particle.h index 885aebe..6e7d282 100644 --- a/src/Particle.h +++ b/src/Particle.h @@ -58,6 +58,7 @@ class Particle { // initialise likelihood and prior values template void init_like(TYPE1 get_loglike, TYPE2 get_logprior) { + PutRNGstate(); for (int i = 0; i < d; ++i) { for (unsigned int j = 0; j < s_ptr->block[i].size(); ++j) { int this_block = s_ptr->block[i][j]; @@ -66,7 +67,10 @@ class Particle { } } loglike = sum(loglike_block); + logprior = Rcpp::as(get_logprior(theta, s_ptr->misc)); + GetRNGstate(); + // Catch for -Inf in likelihood or prior given init theta if(loglike == R_NegInf || logprior == R_NegInf){ Rcpp::Rcerr << "\n Current theta " << theta << std::endl; @@ -99,6 +103,7 @@ class Particle { double adj = get_adjustment(i); // calculate loglikelihood in each block + PutRNGstate(); loglike_prop_block = loglike_block; for (unsigned int j = 0; j < s_ptr->block[i].size(); ++j) { int this_block = s_ptr->block[i][j]; @@ -110,6 +115,7 @@ class Particle { loglike_prop = sum(loglike_prop_block); logprior_prop = Rcpp::as(get_logprior(theta_prop, s_ptr->misc)); + GetRNGstate(); // Check for NA/NaN/Inf in likelihood or prior if(R_IsNaN(loglike_prop) || loglike_prop == R_PosInf || R_IsNA(loglike_prop)){