version 0.9.0

changwoo-lee · Dec 24, 2023 · c728c7e · c728c7e
1 parent e33e703
commit c728c7e
Show file tree

Hide file tree

Showing 23 changed files with 653 additions and 105 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@
 .RData
 .Ruserdata
 docs
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,8 +11,15 @@ LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Imports: 
-    Matrix,
+    coda,
+    fields,
+    spam,
     spNNGP
 Depends: 
+    Matrix,
     R (>= 2.10)
 URL: https://changwoo-lee.github.io/bspme/
+Suggests: 
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,5 @@
-exportPattern("^[[:alpha:]]+")
+# Generated by roxygen2: do not edit by hand
+
+export(blm_me)
+export(vecchia_cov)
+importFrom(Matrix,Diagonal)
diff --git a/R/blm_me.R b/R/blm_me.R
@@ -0,0 +1,213 @@
+#' Bayesian normal linear regression models with (spatially) correlated measurement errors
+#'
+#' This function implements the Bayesian normal linear regression model subject to measurement error with semiconjugate priors.
+#'
+#' Model: Y = beta0 + beta_x X + beta_z Z + epsilon, epsilon ~ N(0, sigma2y)
+#' Measurement error: sigma_Y^2 ~ IG(0.01, 0.01), beta|sigma_Y^2  ~ N(0, sigma_Y^2*diag(var_beta))
+#'
+#' @param Y n by 1 matrix, response
+#' @param X_mean n by 1 matrix or list of n by 1 matrices of length q, mean of X.
+#' @param X_prec n by n matrix or list of n by n matrices of length q, precision matrix of X. Support sparse matrix format from Matrix package.
+#' @param Z n by p matrix, covariates without measurement error
+#' @param nburn integer, burn-in iteration
+#' @param nthin integer, thin-in rate
+#' @param nsave integer, number of posterior samples
+#' @param prior list of prior parameters, default is var_beta = 100,a_Y = 0.01, b_Y = 0.01
+#' @param saveX logical, save X or not
+#'
+#' @return list of (1) posterior, the (nsave)x(q+p) matrix of posterior samples as a coda object,
+#'  (2) cputime, cpu time taken in seconds,
+#'  and optionally (3) X_save, posterior samples of X
+#' @export
+#'
+#' @examples
+#'
+#'
+#'
+#'
+#'
+#'
+blm_me <- function(Y,
+                      X_mean,
+                      X_prec,
+                      Z,
+                      nburn = 2000,
+                      nsave = 2000,
+                      nthin = 5,
+                      prior = NULL,
+                      saveX = F){
+
+  # prior input, default
+  if(is.null(prior)){
+    prior = list(var_beta = 100,a_Y = 0.01, b_Y = 0.01)
+  }
+  var_beta = 100
+  a_Y = 0.01
+  b_Y = 0.01
+
+  n_y = length(Y)
+  if(is.vector(Z)) Z = as.matrix(Z)
+
+  if(!is.list(X_mean) & !is.list(X_prec)){
+    q = 1
+    X_mean = list(X_mean)
+    X_prec = list(X_prec)
+  }else if(is.list(X_mean) & is.list(X_prec)){
+    q = length(X_mean)
+    if(length(X_prec)!=q) stop("list length does not match")
+  }else{
+    stop("X_mean is not vector/matrix or list")
+  }
+  X_prec_X_mean = list()
+  X_spamstruct = vector(mode = 'list', length = q)
+  sparsealgo = rep(T,q)
+
+  for(qq in 1:q){
+    X_prec_X_mean[[qq]] = as.numeric(X_prec[[qq]]%*%X_mean[[qq]])
+
+    if(!("sparseMatrix" %in% is(X_prec[[qq]]))){
+      print(paste0(qq,"th X_prec is not a sparse matrix! Using dense algorithm, which may very slow when n is large"))
+      sparsealgo[qq] = F
+    }else{
+      X_prec[[qq]] = as(as(X_prec[[qq]], "generalMatrix"), "CsparseMatrix")
+      X_prec[[qq]] = spam::as.spam.dgCMatrix(X_prec[[qq]])# spam object
+      X_spamstruct[[qq]] = spam::chol(X_prec[[qq]])
+    }
+  }
+
+  X = matrix(0, n_y, q)
+  for(qq in 1:q) X[,qq] = X_mean[[q]]
+  if(is.null(names(X_mean))){
+    colnames(X) = paste0("exposure.",1:q)
+  }else{
+    colnames(X) =  paste0("exposure.",names(X_mean))
+  }
+
+  p = ncol(Z)
+  if(is.null(colnames(Z))){
+    colnames(Z) = paste0("covariate.",1:p)
+  }else{
+    colnames(Z) =  paste0("covariate.",colnames(Z))
+  }
+  df_temp = as.data.frame(cbind(X,Z))
+  D = model.matrix( ~ ., df_temp)
+
+
+  # prior
+  Sigma_beta = diag(var_beta, ncol(D))# 3 coefficients(beta0, beta1, betaz)
+  Sigma_betainv = solve(Sigma_beta)
+
+  # initialize
+  sigma2_Y = 1
+  beta = rep(0.1, ncol(D))
+
+  sigma2_save = matrix(0, nsave, 1)
+  colnames(sigma2_save) = "sigma2_Y"
+  beta_save = matrix(0, nsave, ncol(D))
+  colnames(beta_save) <- colnames(D)
+  if(saveX){
+    X_save = array(0, dim = c(nsave, n_y, q))
+    dimnames(X_save)[[3]] = names(X_mean)
+  }
+
+  YtY = crossprod(Y)
+  #browser()
+  # sampler starts
+  isave = 0
+  isnegative = numeric(n_y)
+  pb <- txtProgressBar(style=3)
+  t_start = Sys.time()
+  for(imcmc in 1:(nsave*nthin + nburn)){
+    setTxtProgressBar(pb, imcmc/(nsave*nthin + nburn))
+    # sample beta
+    Vbetainv = Sigma_betainv + crossprod(D)/sigma2_Y
+    betatilde = solve(Vbetainv,crossprod(D,Y)/sigma2_Y)
+    beta = as.numeric(spam::rmvnorm.prec(1, mu = betatilde, Q = Vbetainv))
+    # sample sigma2_Y
+    SSR = crossprod(Y - D%*%beta)
+    sigma2_Y = 1/rgamma(1, a_Y + n_y/2, b_Y + SSR/2 )
+
+    for(qq in 1:q){
+      # 1st is intercept
+      b_G =  X_prec_X_mean[[qq]]  + beta[qq + 1]/sigma2_Y*(Y-D[,-(qq+1)]%*%beta[-(qq+1)])
+      Qtilde = X_prec[[qq]] # dense or spam
+      if(sparsealgo[qq]){
+        Qtilde = Qtilde + spam::diag.spam(beta[qq + 1]^2/sigma2_Y, n_y, n_y)
+      }else{
+        diag(Qtilde) = diag(Qtilde) + beta[qq + 1]^2/sigma2_Y
+      }
+      Xstar = spam::rmvnorm.canonical(1, b = as.vector(b_G),
+                                      Q = Qtilde,# dense or spam
+                                      Rstruct = X_spamstruct[[qq]]) #browser()
+      if(imcmc > nburn) isnegative = isnegative + (Xstar<0)
+      D[,(qq+1)] = as.vector(Xstar)
+    }
+
+
+    if((imcmc > nburn)&(imcmc%%nthin==0)){
+      isave = isave + 1
+      sigma2_save[isave] = sigma2_Y
+      beta_save[isave,] = beta
+      if(saveX) X_save[isave,,] = D[,2:(q+1)]
+    }
+  }
+  t_diff = difftime(Sys.time(), t_start, units = "secs")
+  #print(paste0("Exposure components contains negative vaules total ",sum(isnegative)," times among (# exposures) x n_y x (MCMC iter after burnin) = ",q," x ",n_y," x ",nsave*nthin," instances"))
+  out = list()
+  out$posterior = cbind(beta_save, sigma2_save)
+  out$posterior = coda::mcmc(out$posterior)
+  out$cputime = t_diff
+  if(saveX) out$X_save = X_save
+  out
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# data(mtcars)
+#
+# fitme = linreg_me(Y = mtcars$mpg,
+#               X_mean = mtcars$wt,
+#               X_prec = diag(rep(10, 32)),
+#               Z = cbind(mtcars$am, mtcars$vs), var_beta = 10000, nsave = 1000,  nthin = 10, saveX = T)
+# fitme_conj = linreg_me_conj(Y = mtcars$mpg,
+#                   X_mean = mtcars$wt,
+#                   X_prec = Matrix(diag(rep(10, 32))),
+#                   Z = cbind(mtcars$am, mtcars$vs), var_beta = 10000, nsave = 1000,  nthin = 10, saveX = T)
+#
+# fitme2 = linreg_me(Y = mtcars$mpg,
+#                    X_mean = list(wt = mtcars$wt, cyl = mtcars$cyl),
+#                    X_prec = list(wt = diag(rep(10, 32)), cyl = diag(rep(10, 32))),
+#                    Z = cbind(mtcars$am, mtcars$vs), var_beta = 10000, nsave = 1000,  nthin = 10, saveX = T)
+
+#
+#
+# summary(fit$posterior)
+#
+# summary(fitme$posterior)
+#
+# summary(fitme2$posterior)
+# str(fitme2$X_save)
+# plot(fitme2$posterior)
diff --git a/R/bspme-package.R b/R/bspme-package.R
@@ -0,0 +1,37 @@
+#' @keywords internal
+"_PACKAGE"
+
+## usethis namespace: start
+## usethis namespace: end
+NULL
+
+
+#' Dataset, ozone exposure
+#'
+#' This is a subset of "ozone2" dataset in fields package, only containing data from monitoring station with no missing values.
+#' The 8-hour average (surface) ozone (from 9AM-4PM) measured in parts per billion (PPB) for 67 sites in the midwestern US over the period June 3,1987 through August 31, 1987, 89 days.
+#'
+#' @format A data frame with 5963 rows and 6 variables:
+#' \describe{
+#'   \item{date_id}{integer, 1 corresponds to 06/03/1987 and 89 corresponds to 08/31/1987}
+#'   \item{date}{POIXct, date}
+#'   \item{station_id}{character, station id}
+#'   \item{coords_lon}{numeric, longitude of monitoring station}
+#'   \item{coords_lat}{numeric, latitude of monitoring station}
+#'   \item{ozone_ppb}{8-hour average surface ozone from 9am-4pm in parts per billion (PPB)}
+#' }
+"ozone"
+
+
+#' Dataset, simulated health data
+#'
+#' Simulated health data based on ozone exposures on 06/18/1987.
+#'
+#' @format A data frame with 3000 rows and 4 variables:
+#' \describe{
+#'   \item{y}{numeric, simulated health outcome}
+#'   \item{coords_y_lon}{numeric, simulated health subject longitude}
+#'   \item{coords_y_lat}{numeric, simulated health subject latitude,}
+#'   \item{covariate}{numeric, covariate}
+#' }
+"health_sim"
diff --git a/R/hello.R b/R/hello.R
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,4 @@ @@
     .RData
     .Ruserdata
     docs
+    inst/doc