Skip to content

Commit

Permalink
posteriori data generation and structures defined
Browse files Browse the repository at this point in the history
  • Loading branch information
Piotr Chlebicki committed May 16, 2024
1 parent 2b8e19d commit 737509a
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 60 deletions.
38 changes: 25 additions & 13 deletions src/UnobservedCountEstimation.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,42 @@
module UnobservedCountEstimation

# Write your package code here.

# Imports
using Optim
using Statistics
using GLM
using Distributions
using Copulas
using DataFrames
using Integrals, FastGaussQuadrature
using SpecialFunctions
using Distributions
using FastGaussQuadrature
using GLM
using Integrals
using LinearAlgebra
using Copulas
using Optim
using ProgressMeter
using SpecialFunctions
using Statistics
# TODO:: StatsBase.jl compatibility
# TODO:: Turing.jl compatibility

include("zhang_likelihood.jl")
include("original_model.jl")

include("binomial_model_sampling_no_random_effect.jl")
include("binomial_model_sampling_random_effect.jl")
include("binomial_model.jl")
include("struct.jl")

include("interface.jl")

export placeholder
export zhang_model
export binomial_model
export
# functions
zhang_model,
binomial_model,
placeholder,
posterior_data_generation

# Concrete types
ZhangUnobservedCountModel,
BayesianUnobservedCountModel,

# Abstract supertype
AbstractUnobservedCountModel

end
end # end module
55 changes: 45 additions & 10 deletions src/binomial_model.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
function binomial_model(m, N, n; start = "glm", iter = 2000,
warm_up = floor(Int, iter / 2), grid,
warm_up = 1, grid,
save_simulation = true,
k_prior = :auto, θ_prior = :auto,
Σ_prior = :auto, rand_eff = false)
k_prior = :auto, theta_prior = :auto,
sigma_prior = :auto, rand_eff = false,
u_method = "exact")
# TODO:: add X, Z arguments and then methods for type X/Z nothing or formula
df = DataFrame(
y = m,
x1 = log.(N),
x2 = log.(n ./ N)
)
# TODO:: check length of n, m, N
Q = length(n)

X = ones(length(n))
X = X[:, :]
Expand All @@ -28,12 +31,12 @@ function binomial_model(m, N, n; start = "glm", iter = 2000,
end # end

# Asigning priors
if Σ_prior == :auto
Σ_prior = vcov(mm)
if sigma_prior == :auto
sigma_prior = vcov(mm)
end

if θ_prior == :auto
θ_prior = diag(Σ_prior) ./ start[(end-1):end]
if theta_prior == :auto
theta_prior = diag(sigma_prior) ./ start[(end-1):end]
end

if k_prior == :auto
Expand All @@ -43,13 +46,45 @@ function binomial_model(m, N, n; start = "glm", iter = 2000,
# TODO warnings if bad prior
res = nothing
if !rand_eff
res = gibbs_sampler_binomial_model(start, grid, iter, n, N, m, k_prior, θ_prior, Σ_prior)
res = gibbs_sampler_binomial_model(start, grid, iter, n, N, m, k_prior, theta_prior, sigma_prior)
else
push!(start, ones(Float64, length(m)))
res = gibbs_sampler_binomial_model_random_eff(start, grid, iter, n, N, m, k_prior, θ_prior, Σ_prior)
res = gibbs_sampler_binomial_model_random_eff(start, grid, iter, n, N, m, k_prior, theta_prior, sigma_prior, u_method)
end # end if

#M = start[1]
#γ₁ = start[2]
#γ₂ = start[3]
#u = start[4]


# TODO:: Add standard errors and add code to compute standard errors of medians via:
# Var(median) = (4 * n * dP/dλ(median) ^ 2)
# and standard errors of means
coefs = Dict(
# push conditiona; expected value estimates
"Mean" => reduce(vcat,
[[mean(res[k][warm_up:end]) for k in 1:Q], mean(res[Q+1][warm_up:end]), mean(res[Q+2][warm_up:end])]
),
# push maximum a posteriori estimates
"MAP" => reduce(vcat,
[[median(res[k][warm_up:end]) for k in 1:Q], median(res[Q+1][warm_up:end]), median(res[Q+2][warm_up:end])]
)
)

if rand_eff
# append estimates for u
append!(coefs["Mean"], [ mean(res[Q+2+k][warm_up:end]) for k in 1:Q])
append!(coefs[ "MAP"], [median(res[Q+2+k][warm_up:end]) for k in 1:Q])
end # end if
# return object with summary statistics and
res

BayesianUnobservedCountModel(
res, # sim_res
[m, n, N], # data
coefs, # coefs
[sigma_prior, k_prior, theta_prior], #prior
iter, # iter
nrow(df) # Q
)
end # end function
Empty file.
57 changes: 37 additions & 20 deletions src/binomial_model_sampling_random_effect.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ function sample_gamma_1_cond_random_eff(grid, n, N, γ₂, M, m, u, k_prior, θ_
res = BigFloat.(m .* log.(μ) + (M - m) .* log.(1 .- u .* μ) - exp.(lξ) + M .* lξ)
exp(sum(res)) * pdf(copula, c) * pdf(distr_prior_marginal, x)
end # end funciton
evaluated_denisty = density_function.(grid)

evaluated_denisty = density_function.(grid)
# temporary solution
evaluated_denisty[isnan.(evaluated_denisty)] .= 0
#println(evaluated_denisty)
evaluated_denisty ./= sum(evaluated_denisty)

# sample acording to evaluation
grid[rand(Categorical(evaluated_denisty))]
end # end funciton
Expand All @@ -39,10 +40,10 @@ function sample_gamma_2_cond_random_eff(grid, n, N, γ₁, M, m, u, k_prior, θ_
res = BigFloat.(m .* log.(μ) + (M - m) .* log.(1 .- u .* μ))
exp(sum(res)) * pdf(copula, c) * pdf(distr_prior_marginal, x)
end # end funciton

evaluated_denisty = density_function.(grid)
# temporary solution
evaluated_denisty[isnan.(evaluated_denisty)] .= 0
#println(evaluated_denisty)
evaluated_denisty ./= sum(evaluated_denisty)
# sample acording to evaluation
grid[rand(Categorical(evaluated_denisty))]
Expand All @@ -59,45 +60,61 @@ function sample_M_cond_random_eff(n, N, m, γ₁, γ₂, u)
m + M_minus_m
end # end funciton

function sample_u_cond_random_eff(n, N, m, M, γ₁, γ₂; prec = 40)
function sample_u_cond_random_eff(n, N, m, M, γ₁, γ₂; prec = 40, method)
# compute normalization factor
μ = (N .^ γ₁) .* ((n ./ N) .^ γ₂)
μ = 1 ./ (1 .+ 1 ./ μ)
beta_distr = Beta.(n, N - n)
U = rand(Uniform(), length(N))

res = Vector{Float64}()
for k in eachindex(M)
setprecision(prec) do
# TODO:: this could be much faster if broadcasted but GaussLegendre() doesn't work in R^d where d=>2
f(x, p) = exp(BigFloat(m[k] * log(x) + (M - m)[k] * log(1 - x * μ[k]) + logpdf(beta_distr[k], x)))
# TODO this fails for older integrals.jl versions
prob(x) = IntegralProblem(f, (0, x))
ff(x) = solve(prob(x), GaussLegendre(), reltol = 1e-10, abstol = 1e-10)
R = ff(1)
a = optimize(x -> (ff(x)[1] / R[1] - U[k]) ^ 2, 0, 1, Brent(), rel_tol = 1e-10)
push!(res, a.minimizer)
end # end set precision
end # end for

if method == "exact"
for k in eachindex(M)
setprecision(prec) do
# TODO:: this could be much faster if broadcasted but GaussLegendre() doesn't work in R^d where d=>2
f(x, p) = exp(BigFloat(m[k] * log(x) + (M - m)[k] * log(1 - x * μ[k]) + logpdf(beta_distr[k], x)))
# this is deprecated
# prob(x) = IntegralProblem(f, (0, x))
# Pkg.status("Integrals")
# for older versions:
prob(x) = IntegralProblem(f, 0, x)
ff(x) = solve(prob(x), GaussLegendre(), reltol = 1e-10, abstol = 1e-10)
R = ff(1)
a = optimize(x -> (ff(x)[1] / R[1] - U[k]) ^ 2, 0, 1, Brent(), rel_tol = 1e-10)
push!(res, a.minimizer)
end # end set precision
end # end for
else
grid = 0.001:0.001:(1-0.001)
f(x, k) = exp(BigFloat(m[k] * log(x) + (M - m)[k] * log(1 - x * μ[k]) + logpdf(beta_distr[k], x)))
for k in eachindex(M)
evaluated_denisty = f.(grid, k)
evaluated_denisty[isnan.(evaluated_denisty)] .= 0
evaluated_denisty ./= sum(evaluated_denisty)

push!(res, grid[rand(Categorical(evaluated_denisty))])
end # ned for
end # end if
res
end # end funciton

function gibbs_sampler_binomial_model_random_eff(start, grid, iter, n, N, m, k_prior, θ_prior, Σ_prior)
function gibbs_sampler_binomial_model_random_eff(start, grid, iter, n, N, m, k_prior, θ_prior, Σ_prior, u_method)
# create storage vectors
M = start[1]
γ₁ = start[2]
γ₂ = start[3]
u = start[4]

storage = reduce(vcat, [[[M[k]] for k in eachindex(M)], [[u[k]] for k in eachindex(M)], [[γ₁]], [[γ₂]]])
prog = Progress(iter, "Sampling progress ...")
prog = Progress(iter; desc = "Sampling progress ...")

for k in 1:iter
for _ in 1:iter
# sample M conditional on γ₁ and γ₂
M = sample_M_cond_random_eff(n, N, m, γ₁, γ₂, u)
#println(M)
u = sample_u_cond_random_eff(
n, N, m, M, γ₁, γ₂; prec = 40
n, N, m, M, γ₁, γ₂; prec = 40, method = u_method
)
# sample γ₁ conditional on M and γ₂
γ₁ = sample_gamma_1_cond_random_eff(
Expand Down
4 changes: 3 additions & 1 deletion src/original_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,7 @@ function zhang_model(m, N, n; start = "glm")
ξ̂ = sum(N .^ α̂)
#[coef(ols), coef(mm)]
#[start, log_l, grad_l, hes_l]
[[α̂, β̂, ϕ̂, ξ̂], optim_problem]

# return structure
ZhangUnobservedCountModel([α̂, β̂, ϕ̂, ξ̂], optim_problem)
end # end function
22 changes: 22 additions & 0 deletions src/posterior_data_generation.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
function posterior_data_generation(object::ZhangUnobservedCountModel)
"foo"
end


# MAP or Mean
function posterior_data_generation(object::BayesianUnobservedCountModel; est = "MAP", rep = 500)
par = object.coefs[est]
Q = length.(object.:data)[1]

M = par[1:Q]
γ₁ = par[end - 1]
γ₂ = par[end]
u = 1
if length(par) != Q + 2
u = par[(Q + 1):(end - 2)]
end # end if
μ = (object.data[3] .^ γ₁) .* ((object.data[2] ./ object.data[3]) .^ γ₂)
μ = 1 ./ (1 .+ 1 ./ μ)

rand.(Binomial.(M, u .* μ), rep)
end
21 changes: 21 additions & 0 deletions src/struct.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# TODO:: rename after deciding on type name

abstract type
AbstractUnobservedCountModel
end


mutable struct ZhangUnobservedCountModel <: AbstractUnobservedCountModel
coefs
optim_result
end


mutable struct BayesianUnobservedCountModel <: AbstractUnobservedCountModel
sim_res
data
coefs
prior
iter::Int
Q::Int
end
44 changes: 28 additions & 16 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ using CSV, DataFrames, Random, Statistics

@testset "zhang_model.jl" begin
df = CSV.read(pwd() * "/test_csv_zhang.csv", DataFrame)
#df = CSV.read(pwd() * "/test/test_csv_zhang.csv", DataFrame)

a = zhang_model(df[:, :m], df[:, :N], df[:, :n]; start = "glm")
b = zhang_model(df[:, :m], df[:, :N], df[:, :n]; start = "lm")

@test a[1][4] sum(df[:, ]) rtol=.075
@test a[1][4] sum(df[:, :M]) rtol=.075
@test sum(a.coefs["MAP"][1:20]) sum(df[:, ]) rtol=.075
@test sum(a.coefs["MAP"][1:20]) sum(df[:, :M]) rtol=.075

@test b[1][4] sum(df[:, ]) rtol=.075
@test b[1][4] sum(df[:, :M]) rtol=.075
end
@test sum(b.coefs["MAP"][1:20]) sum(df[:, ]) rtol=.075
@test sum(b.coefs["MAP"][1:20]) sum(df[:, :M]) rtol=.075
end # end test "zhang_model.jl"

@testset "binomial_model.jl" begin
Random.seed!(1234)
Expand All @@ -27,32 +28,43 @@ end
θ = [0.04, 0.09523809523809523]
Q = 20

a = binomial_model(df[:, :m], df[:, :N], df[:, :n]; start = "glm", grid = .3:.01:3, k_prior = k, θ_prior = θ, Σ_prior = Σ, iter = 1_000)
a = binomial_model(
df[:, :m], df[:, :N], df[:, :n];
start = "glm", grid = .3:.01:3,
k_prior = k, theta_prior = θ,
sigma_prior = Σ, iter = 1_000
)

res_a = reduce(vcat, [[mean(a[k]) for k in 1:Q], [mean(a[end-1])], [mean(a[end])]])
res_a = a.coefs["Mean"]
parm = reduce(vcat, [df[:, :M], [γ₁], [γ₂]])

@test res_a parm rtol = .25
@test all(quantile.(a[1:Q], 2.5 / 100) <= df[:, :M] <= quantile.(a[1:Q], 97.5 / 100))
@test all(quantile.(a.sim_res[1:Q], 2.5 / 100) <= df[:, :M] <= quantile.(a.sim_res[1:Q], 97.5 / 100))

@test res_a[(end-1):end] [γ₁, γ₂] rtol = .06
# this does not fit in 95% interval but this is probably just due to chance
@test quantile(a[end - 1], 0) <= γ₁ <= quantile(a[end], 95 / 100)
@test quantile(a[end], 2.5 / 100) <= γ₂ <= quantile(a[end], 97.5 / 100)
@test quantile(a.sim_res[end - 1], 0) <= γ₁ <= quantile(a.sim_res[end], 95 / 100)
@test quantile(a.sim_res[end], 2.5 / 100) <= γ₂ <= quantile(a.sim_res[end], 97.5 / 100)

# new data
df = CSV.read(pwd() * "/test_csv_binomial_with_random_effect.csv", DataFrame)
#df = CSV.read(pwd() * "/test/test_csv_binomial_with_random_effect.csv", DataFrame)
γ₁ = 0.8983650801874796
b = binomial_model(df[:, :m], df[:, :N], df[:, :n]; start = "lm", grid = .3:.01:3, k_prior = k, θ_prior = θ, Σ_prior = Σ, iter = 1_000, rand_eff = true)
b = binomial_model(
df[:, :m], df[:, :N], df[:, :n];
start = "lm", grid = .3:.01:3,
k_prior = k, theta_prior = θ,
sigma_prior = Σ, iter = 1_000,
rand_eff = true, u_method = "grid"
)

res_b = reduce(vcat, [[mean(b[k]) for k in 1:Q], [mean(b[end-1])]])
res_b = b.coefs["Mean"]
parm = reduce(vcat, [df[:, :M], [γ₁]])

@test res_b parm rtol = .25
@test sum(quantile.(b[1:Q], 2.5 / 100) .<= df[:, :M] .<= quantile.(b[1:Q], 97.5 / 100)) > 18
@test sum(quantile.(b.sim_res[1:Q], 2.5 / 100) .<= df[:, :M] .<= quantile.(b.sim_res[1:Q], 97.5 / 100)) > 18

@test res_b[end] γ₁ rtol = .08
@test quantile(b[end-1], 2.5 / 100) <= γ₁ <= quantile(b[end-1], 97.5 / 100)
end
@test res_b[Q + 1] γ₁ rtol = .08
@test quantile(b.sim_res[end-1], 2.5 / 100) <= γ₁ <= quantile(b.sim_res[end-1], 97.5 / 100)
end # end test "binomial_model.jl"

0 comments on commit 737509a

Please sign in to comment.