Skip to content

Commit

Permalink
Some work on bayesian binomial model
Browse files Browse the repository at this point in the history
  • Loading branch information
Piotr Chlebicki committed Apr 19, 2024
1 parent 7f197bf commit d3a1223
Show file tree
Hide file tree
Showing 8 changed files with 422 additions and 103 deletions.
263 changes: 257 additions & 6 deletions Manifest.toml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Integrals = "de52edbc-65ea-441a-8357-d3a637375a31"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Expand Down
3 changes: 2 additions & 1 deletion src/UnobservedCountEstimation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ module UnobservedCountEstimation

# Imports
using Optim, Statistics, GLM, Distributions, DataFrames, SpecialFunctions, LinearAlgebra
using Integrals

include("zhang_likelihood.jl")
include("original_model.jl")

include("binomial_likelihood.jl")
include("binomial_model_sampling.jl")
include("binomial_model.jl")

include("interface.jl")
Expand Down
75 changes: 0 additions & 75 deletions src/binomial_likelihood.jl

This file was deleted.

28 changes: 8 additions & 20 deletions src/binomial_model.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
function binomial_model(m, N, n; start = "glm")
function binomial_model(m, N, n; start = "glm", iter = 2000,
warm_up = floor(Int, iter / 2), grid,
save_simulation = true)
# TODO:: add X, Z arguments and then methods for type X/Z nothing or formula
df = DataFrame(
y = m,
Expand All @@ -12,29 +14,15 @@ function binomial_model(m, N, n; start = "glm")
Z = ones(length(n))
Z = Z[:, :]

log_l_f = x -> log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)
grad_l_f = x -> grad_log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)
hes_l_f = x -> hess_log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)

#= result = optimize(log_l_f, [start[1], start[2], 1], Newton(); inplace = false)
result_1 = optimize(log_l_f, grad_l_f, [start[1], start[2], 1], Newton(); inplace = false) =#
# TODO :: dependent α, β

start = Float64[]
append!(start, zeros(length(N)))
start = [M]

if start == "glm"
append!(start, coef(glm(@formula(y ~ x1 + x2 + 0), df, Poisson(), LogLink())))
else
append!(start, coef(lm(@formula(log(y) ~ x1 + x2 + 0), df)))
end # end if
end # end

res = gibbs_sampler_binomial_model(start, grid, iter, n, N, m)

optim_problem = optimize(log_l_f, grad_l_f, hes_l_f, start, NewtonTrustRegion(); inplace = false)
α̂ = optim_problem.minimizer[end - 1]
β̂ = optim_problem.minimizer[end]
= optim_problem.minimizer[1:length(N)]
ξ̂ = N .^ α̂
#[coef(ols), coef(mm)]
#[start, log_l, grad_l, hes_l]
[[α̂, β̂, ξ̂, M̂, sum(ξ̂), sum(M̂)], optim_problem]
# return object with summary statistics and
end # end function
117 changes: 117 additions & 0 deletions src/binomial_model_sampling.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
function sample_gamma_1_cond(grid, n, N, γ₂, M, m, μ_γ₁, μ_γ₂, ρ, σ_γ₁, σ_γ₂, ε = 1e-6)
# get posteriori normal parameters
μ_γ₁_post = μ_γ₁ + ρ * σ_γ₂ / σ_γ₁ * (γ₂ - μ_γ₂)
σ_γ₁_post = sqrt(1 - ρ ^ 2) * σ_γ₁
distr = Normal(μ_γ₁, σ_γ₁)
# compute R_i's
#log_N_sq = log.(N) .^ 2
#= R = zeros(BigFloat, length(M))
next_iter = true
t = 0
while next_iter
println(t)
# Rprev = copy(R)
# iterate sum
R_add = cgf.(distr, (M .- t) .* log.(N)) .- logfactorial(t)
R .+= exp.(R_add) .* (-1) ^ t
println(R_add)
# check convergence
next_iter = any(exp.(R_add) .< ε) | t > 1
t += 1
end # end while
println(R) =#
f(x, p) = exp.(x .* M .* log.(N) - N .^ x) .* pdf(distr, x)
prob = IntegralProblem(f, [-Inf, Inf])
R = solve(prob, HCubatureJL(), reltol = ε, abstol = ε)
#println(R)
R .*= exp.(-logfactorial.(M)) # <--- if this fails this is probably why
#println(R)
#error("bcd")

# shift grid towards the mean since it is most probable
grid1 = grid .+ μ_γ₁_post
# get unscaled density function and evaluate it on a grid
function density_function(x)
μ = (N .^ x) .* ((n ./ N) .^ γ₂)
μ = 1 ./ (1 .+ exp.(-μ))
= x .* log.(N)

#res = (μ .^ m) .* ((1 .- μ) .^ (M .- m))
res = zeros(BigFloat, length(M))
res .+= m .* log.(μ) .+ (M .- m) .* log.(1 .- μ) .- log.(R) .- logfactorial.(M .- m) .- exp.(lξ) .+ M .*
#println(exp(sum(res)))
exp(sum(res)) .* pdf(Normal(μ_γ₁_post, σ_γ₁_post), x)
end # end funciton
evaluated_denisty = density_function.(grid1)
#println(evaluated_denisty)
evaluated_denisty ./= sum(evaluated_denisty)
# sample acording to evaluation
grid1[rand(Categorical(evaluated_denisty))]
end # end funciton

function sample_gamma_2_cond(grid, n, N, γ₁, M, m, μ_γ₁, μ_γ₂, ρ, σ_γ₁, σ_γ₂)
# get posteriori normal parameters
μ_γ₂_post = μ_γ₂ + ρ * σ_γ₁ / σ_γ₂ * (γ₁ - μ_γ₁)
σ_γ₂_post = sqrt(1 - ρ ^ 2) * σ_γ₂
# shift grid towards the mean since it is most probable
grid1 = grid .- μ_γ₂_post
# get unscaled density function and evaluate it on a grid
function density_function(x)
μ = (N .^ γ₁) .* ((n ./ N) .^ x)
μ = 1 ./ (1 .+ exp.(-μ))
#ξ = N .^ γ₁

res = logfactorial.(M) .- logfactorial.(M .- m) .- logfactorial.(m) .+ m .* log.(μ).+ (M .- m) .* log.(1 .- μ)
exp(sum(res)) * pdf(Normal(μ_γ₂_post, σ_γ₂_post), x)
end # end funciton
evaluated_denisty = density_function.(grid1)
evaluated_denisty ./= sum(evaluated_denisty)
# sample acording to evaluation
grid1[rand(Categorical(evaluated_denisty))]
end # end funciton

function sample_M_cond(n, N, m, γ₁, γ₂)
# compute ξ, μ
μ = (N .^ γ₁) .* ((n ./ N) .^ γ₂)
μ = 1 ./ (1 .+ exp.(-μ))
ξ = N .^ γ₁
# draw M-m vector from poisson intependently
M_minus_m = reduce(vcat, rand.(Poisson.(ξ .* (1 .- μ)), 1))
# return M = (M-m) + increment
m + M_minus_m
end # end funciton

function gibbs_sampler_binomial_model(start, grid, iter, n, N, m, μ_γ₁, μ_γ₂, σ_γ₁, σ_γ₂, ρ, ε = 1e-6)
# create storage vectors
M = start[1]
γ₁ = start[2]
γ₂ = start[3]

storage = [[M], [γ₁], [γ₂]]

for k in iter
# sample M conditional on γ₁ and γ₂
M = sample_M_cond(n, N, m, γ₁, γ₂)
#println(M)
# sample γ₁ conditional on M and γ₂
γ₁ = sample_gamma_1_cond(
grid, n, N, γ₂,
M, m, μ_γ₁, μ_γ₂,
ρ, σ_γ₁, σ_γ₂, ε
)
#println(γ₁)
# sample γ₂ conditional on γ₁ and M
γ₂ = sample_gamma_2_cond(
grid, n, N, γ₁,
M, m, μ_γ₁, μ_γ₂,
ρ, σ_γ₁, σ_γ₂
)
#println(γ₂)
# store them
append!(storage[1], M)
append!(storage[2], γ₁)
append!(storage[3], γ₂)
end # end for
# return stored values
storage
end # end funciton
17 changes: 16 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Test
# TODO:: CSV is only used for testing, maybe specify that in Project.toml
using CSV, DataFrames

@testset "UnobservedCountEstimation.jl" begin
@testset "zhang_model.jl" begin
df = CSV.read(pwd() * "/test_csv_zhang.csv", DataFrame)

a = zhang_model(df[:, :m], df[:, :N], df[:, :n]; start = "glm")
Expand All @@ -15,3 +15,18 @@ using CSV, DataFrames
@test b[1][4] sum(df[:, ]) rtol=.075
@test b[1][4] sum(df[:, :M]) rtol=.075
end

# for now
#= @testset "binomial_model.jl" begin
df = CSV.read(pwd() * "/test_csv_binomial.csv", DataFrame)
a = binomial_model(df[:, :m], df[:, :N], df[:, :n]; start = "glm")
b = binomial_model(df[:, :m], df[:, :N], df[:, :n]; start = "lm")
@test a[1][4] ≈ sum(df[:, :ξ]) rtol=.075
@test a[1][4] ≈ sum(df[:, :M]) rtol=.075
@test b[1][4] ≈ sum(df[:, :ξ]) rtol=.075
@test b[1][4] ≈ sum(df[:, :M]) rtol=.075
end =#

21 changes: 21 additions & 0 deletions test/test_csv_binomial.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
N,n,m,M,ξ
4946,501,616,907,902.4087332179555
4886,480,661,965,893.6403502565424
4935,506,608,887,900.8027961854027
4943,488,598,895,901.9708212977752
5079,535,647,943,921.7700017557884
5054,516,587,897,918.1384813234845
5027,521,658,922,914.2124012586877
4890,507,604,862,894.2255764308815
5022,490,593,868,913.4848871407867
4928,477,589,888,899.7804635417899
4983,513,636,930,907.8052912210046
4968,485,595,917,905.6184666779144
5144,543,716,970,931.195284188665
4993,507,643,942,909.2624427471206
4854,472,605,879,888.9550830898875
4988,499,588,884,908.5339400169311
4977,489,617,917,906.9307196114336
5053,524,617,902,917.9931458823555
4950,531,629,880,902.9925331538545
4845,492,551,837,887.6362400289374

0 comments on commit d3a1223

Please sign in to comment.