-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBayesian_linear_model_simulation.R
213 lines (170 loc) · 7.85 KB
/
Bayesian_linear_model_simulation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Simulate our data
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# A function that generates 1000 (x,x*,y,z) data points
simdat <- function(beta, gamma, taux, tauy, tauxstr,
alpha, zmean, zvar, n1=1000, n2=100) {
z = rnorm(n1,zmean, zvar)
x.true = alpha[1] + alpha[2]*z + rnorm(n1, 0,1/taux)
x.partial = c(x.true[1:n2], rep(NA, n1-n2))
y.true = beta[1] + beta[2]*x.true + beta[3]*z
# Observed y-values
y = y.true + rnorm(n1, 0, 1/tauy)
### Measurement model
xstr = x.true + gamma + rnorm(n1,0,1/tauxstr)
output <- list(z, x.true, x.partial, xstr, y.true, y)
names(output) <- c('z', 'x.true', 'x.partial', 'xstr', 'y.true', 'y')
return(output)
}
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Bayesian model
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
library(rjags)
myburnin=500
mysample=2000
### Set up data, initializations, and model
bayes.estimate <- function(s, n1=1000,
f="C:/Users/Shannon/Documents/STAT_536D/simulation_2.bug") {
# Set the parameter values
jags.data <- list('y'=s$y, 'xstr'=s$xstr, 'x'=s$x.partial, 'z'=s$z,
'n1'=n1,
'betamean'=c(0,0,0),
'betaprec'=diag(rep(0.0001,3)),
'gammamean'=c(0,0),
'gammaprec'=diag(c(0.0001,0.0001)),
'alphamean'=c(0,0),
'alphaprec'=diag(c(0.0001,0.0001)))
# Choose which parameters to monitor
jags.params = c('gamma', 'taux', 'tauxstr', 'tauy', 'beta', 'alpha')
# Keep initializations relatively reasonable
# Theoretically, we should have good identifiability, so we could use
# more extreme values for inits but then we'd have to wait longer (more iterations)!
inits <- list(list(beta=c(0,1,2), alpha=c(2,3), gamma=c(1,-1), taux=3, tauy=3, tauxstr=1),
list(beta=c(-1,2,-1), alpha=c(-1,0), gamma=c(0,3), taux=1, tauy=2, tauxstr=2),
list(beta=c(3,3,-3), alpha=c(0,1), gamma=c(3,0), taux=1, tauy=1, tauxstr=1))
m <- jags.model(f,
data=jags.data, n.chains=3, inits=inits)
### Make a preliminary run of 10000 iterations, with monitoring
# X <- coda.samples(m, variable.names=jags.params,
# n.iter=20000, thin=100)
# This lets us figure out an appropriate burn-in and number of simulations to run
# Seems like mixing usually occurs by about 500
# Let's run another 1500 after that for each chain
# Now let's re-run this entire code-chunk (reset the model) to run with burn-in
burnin <- coda.samples(m, jags.params, n.iter=myburnin)
mainsample <- coda.samples(m, jags.params, n.iter=mysample)
return(mainsample)
}
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Some code so we can nicely display our findings!
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## A function to get the beta-related quantiles
beta.q <- function(mcmc.s, model='m') {
mcmc.quantiles <- as.data.frame(summary(mcmc.s)[[2]])
mcmc.quantiles$var <- rownames(mcmc.quantiles)
mcmc.quantiles$model <- model
mcmc.quantiles <- mcmc.quantiles[startsWith(mcmc.quantiles$var, 'beta'),c(1,3,5,6,7)]
names(mcmc.quantiles) <- c('low', 'est', 'high', 'var', 'model')
return(mcmc.quantiles)
}
## Now some code to output the beta-coefficient estimates from an OLS model
# b1 + b2*x + b3*z
ols.ests <- function(ols.model, model){do.call(rbind, lapply(1:3, function(i){
# Run the actual linear model itself
ols.summary <- summary(ols.model)$coef
# Extract the three different covariate values
high <- ols.summary[i,1] + 1.96*ols.summary[i,2]
low <- ols.summary[i,1] - 1.96*ols.summary[i,2]
est <- ols.summary[i,1]
return(data.frame(low=low, est=est, high=high,
var=c('beta[1]', 'beta[2]', 'beta[3]')[i],
model=model))
}))}
library(ggplot2)
library(reshape2)
# Plot the estimates given a table with format:
# low est high var model
# "vars" is a list of variable parameters: e.g.
plot.ests <- function(ests, vars) {
# First, define the label:
ests$var <- gsub('beta\\[1\\]', 'Intercept', ests$var)
ests$var <- gsub('beta\\[2\\]', 'beta_X', ests$var)
ests$var <- gsub('beta\\[3\\]', 'beta_Z', ests$var)
p <- ggplot(ests, aes(group=paste(var,model), col=model)) +
# Draw a line at zero
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
# Draw a line at one (our true value)
geom_hline(yintercept = 1, colour = gray(1/2), lty = 2) +
geom_point(aes(x = var, y = est),
position = position_dodge(width = 1/2),
size=2) +
geom_linerange(aes(x = var,
ymin = low, ymax = high),
lwd = 1, position = position_dodge(width = 1/2)) +
ylab("Coefficient value") + xlab("Coefficient") +
coord_flip() + theme_bw() +
theme(legend.position='none')
return(p)
}
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Define parameters for which we want to simulate data and run the models
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
beta=list(c(1,0,1), c(1,1,1))
# Measurement bias
gamma=list(0,1)
# Measurement error precision
tauxstr=list(0.5,2)
# X=a1+a2*Z
alpha=list(c(1,0), c(1,1))
# Let's get a cross of all possible combinations of our variables
vars=expand.grid(gamma, tauxstr, alpha, beta)
names(vars) <- c("gamma", "tauxstr", "alpha", "beta")
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Let's go ahead and run our simulation
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ests.2 <- vector("list", dim(vars)[1])
for (i in 1:dim(vars)[1]) {
# Generate a simulated sample with the desired coefficients
s <- simdat(beta=vars$beta[[i]], # outcome model: y=b0+b1*X+b2*Z
# Measurement model: additive & multiplicative measurement bias
gamma=vars$gamma[[i]],
### Variances (precisions) of true X and Y
taux=1, tauy=1,
# Precision of measurement (accounts for random measurement error)
tauxstr=vars$tauxstr[[i]],
### Exposure model: relating Z to X
alpha=vars$alpha[[i]],
zmean=1, zvar=1)
# Run Bayesian models
bayes_bias <- bayes.estimate(s, n1=1000,
f="bayesmod_bias.bug")
bayes_nobias <- bayes.estimate(s, n1=1000,
f="bayesmod_nobias.bug")
# Run the ordinary least squares
ols <- lm(s$y~s$xstr + s$z)
# Run the OLS on the small sample with true values for X
ols.small <- lm(s$y[1:100]~s$x.partial[1:100] + s$z[1:100])
# Gather the estimates and save them as a table in our ests list!
ests.2[[i]] <- rbind(ols.ests(ols, 'OLS using X* (n=1000)'),
ols.ests(ols.small, 'OLS using true X (n=100)'),
beta.q(bayes_bias, model='Bayes, measurement bias'),
beta.q(bayes_nobias, model='Bayes, no measurement bias'))
}
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Plot our estimated coefficients!
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plots <- lapply(1:16, function(i){plot.ests(ests.2[[i]])})
for (i in 1:16) {
png(paste0("C:/Users/Shannon/Documents/STAT_536D/coefplots/plot_", i, ".png"))
# Add a quick description of the variables we are working with!
est.lab <- paste0('X*|X~N(',vars2$alpha[[i]][1],'+',vars2$alpha[[i]][2],'Z,',vars2$tauxstr[[i]],')',
'\n X|Z~N(X+',vars2$gamma[[i]], ',1)',
'\n beta_X=', vars2$beta[[i]][2])
print(plots[[i]] + annotate('text', x=3.3, y=0.25,label=est.lab, size=3))
dev.off()
}
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## An aside: don't forget to check traceplots
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
library(MCMCvis)
MCMCtrace(bayes_bias)