Skip to content

Commit

Permalink
Initial work on binomial model
Browse files Browse the repository at this point in the history
  • Loading branch information
Piotr Chlebicki committed Apr 10, 2024
1 parent 3a16c5e commit 7f197bf
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
[![Build Status](https://github.com/ncn-foreigners/UnobservedCountEstimation.Jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/ncn-foreigners/UnobservedCountEstimation.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![Coverage](https://codecov.io/gh/ncn-foreigners/UnobservedCountEstimation.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/ncn-foreigners/UnobservedCountEstimation.jl)

Julia package implementing statistical methods for population size estimation from "[Developing methods for determining the number of unau- thorized foreigners in norway](https://www.ssb.no/a/english/publikasjoner/pdf/doc_200811_en/doc_200811_en.pdf)" and extentions from working paper "[Estimation of the number of irregular foreigners in Poland using non-linear count regression models](https://arxiv.org/abs/2008.09407)".
Julia package implementing statistical methods for population size estimation from "[Developing methods for determining the number of unauthorized foreigners in norway](https://www.ssb.no/a/english/publikasjoner/pdf/doc_200811_en/doc_200811_en.pdf)" and extentions from working paper "[Estimation of the number of irregular foreigners in Poland using non-linear count regression models](https://arxiv.org/abs/2008.09407)".
5 changes: 5 additions & 0 deletions src/UnobservedCountEstimation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@ using Optim, Statistics, GLM, Distributions, DataFrames, SpecialFunctions, Linea

include("zhang_likelihood.jl")
include("original_model.jl")

include("binomial_likelihood.jl")
include("binomial_model.jl")

include("interface.jl")

export placeholder
export zhang_model
export binomial_model

end
75 changes: 75 additions & 0 deletions src/binomial_likelihood.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# likelihood and derivatives go here

function log_lik_binomial_model(M, α, β, m, N, n, X, Z)
μ = (N .^ (X * α)) .* ((n ./ N) .^ (Z * β))
M = exp.(M) .+ m
μ = exp.(μ) ./ (1 .+ exp.(μ))
p = μ ./ M

sum(loggamma.(M .+ 1) .- loggamma.(m .+ 1.0) .- loggamma.(M .- m .+ 1.0) .+ m .* log.(p) .+ (M .- m) .* log.(1 .- p))
end # end function

function grad_log_lik_binomial_model(M, α, β, m, N, n, X, Z)
μ = (N .^ (X * α)) .* ((n ./ N) .^ (Z * β))
Mprev = copy(M)
M = exp.(M) .+ m
μ = exp.(μ) ./ (1 .+ exp.(μ))
p = μ ./ M

dp = m ./ p .- (M .- m) ./ (1 .- p)

= dp .* (1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(N) ./ M
= dp .* (1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(n ./ N) ./ M

dM = digamma.(M .+ 1) .- digamma.(M .- m .+ 1) .- (m ./ M) .+ (M .- m) .* ((1 .- p) .^ -1) .* (p ./ M) .+ log.(1 .- p)
# TODO:: derivative correction M, exp link

vcat(dM .* exp.(Mprev), X' * dα, Z' * dβ)
end # end function

function hess_log_lik_binomial_model(M, α, β, m, N, n, X, Z)
μ = (N .^ (X * α)) .* ((n ./ N) .^ (Z * β))
μ = exp.(μ) ./ (1 .+ exp.(μ))
Mprev = copy(M)
M = exp.(M) .+ m
p = μ ./ M

dp = m ./ p .- (M .- m) ./ (1 .- p)
dp_2 = -m ./ p .^ 2 .- (M .- m) ./ (1 .- p) .^ 2

dα_2 = -2 .* μ .^ 2 .* (1 .- μ) .* (log.(N) .^ 2) .* (N .^ (2 * X * α)) .* ((n ./ N) .^ (2 * Z * β))
dα_2 += (log.(N) .^ 2) .* (N .^ (2 * X * α)) .* ((n ./ N) .^ (2 * Z * β)) .* μ .* (1 .- μ)
dα_2 += (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* (log.(N) .^ 2) .* μ .* (1 .- μ)
dα_2 .*= dp ./ M
dα_2 += dp_2 .* ((1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(N) ./ M) .^ 2

dβ_2 = -2 .* μ .^ 2 .* (1 .- μ) .* (log.(n ./ N) .^ 2) .* (N .^ (2 * X * α)) .* ((n ./ N) .^ (2 * Z * β))
dβ_2 += (log.(n ./ N) .^ 2) .* (N .^ (2 * X * α)) .* ((n ./ N) .^ (2 * Z * β)) .* μ .* (1 .- μ)
dβ_2 += (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* (log.(n ./ N) .^ 2) .* μ .* (1 .- μ)
dβ_2 .*= dp ./ M
dβ_2 += dp_2 .* ((1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(N) ./ M) .^ 2

dαdβ = dp .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(N) .* log.(n ./ N) .* μ .* (1 .- μ) ./ M
dαdβ .*= ((N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* (exp.((N .^ (X * α)) .* ((n ./ N) .^ (Z * β))) .- 1) - exp.((N .^ (X * α)) .* ((n ./ N) .^ (Z * β))) .- 1)
dαdβ += dp_2 .* ((1 .- μ) .* μ) .^ 2 .* (N .^ (2 * X * α)) .* ((n ./ N) .^ (2 * Z * β)) .* log.(N) .* log.(n ./ N) ./ (M .^ 2)

dpdM = (1 .- m ./ M) .* ((1 .- p) .^ -2) .- (1 .- p) .^ -1
dpdM .*= exp.(Mprev)

dαdM = dpdM .* (1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(N) ./ M
dβdM = dpdM .* (1 .- μ) .* μ .* (N .^ (X * α)) .* ((n ./ N) .^ (Z * β)) .* log.(n ./ N) ./ M

dM_2 = trigamma.(M .+ 1) .- trigamma.(M .- m .+ 1) .+ m ./ M .^ 2
dM_2 += (m ./ M .^ 2) .* p ./ (1 .- p) .+ μ ./ ((1 .- p) .* M .^ 2)
dM_2 -= (1 .- m ./ M) .* (p ./ M) ./ (1 .- p) .^ 2
dM = digamma.(M .+ 1) .- digamma.(M .- m .+ 1) .- (m ./ M) .+ (M .- m) .* ((1 .- p) .^ -1) .* (p ./ M) .+ log.(1 .- p)
dM_2 = dM_2 .* exp.(2 .* Mprev) .+ dM .* exp.(Mprev)
# TODO:: derivative correction M, exp link

## TODO if M is a vector then dM_2 is a diagonal matrix and dαdM
vcat(
hcat(Diagonal(dM_2[:, 1]), Diagonal(dαdM[:, 1]) * X, Diagonal(dβdM[:, 1]) * Z),
hcat(X' * Diagonal(dαdM[:, 1]), X' * (dα_2 .* X), (X' * (dαdβ .* Z))'),
hcat(Z' * Diagonal(dβdM[:, 1]), (X' * (dαdβ .* Z))', Z' * (dβ_2 .* Z))
)
end # end function
40 changes: 40 additions & 0 deletions src/binomial_model.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
function binomial_model(m, N, n; start = "glm")
# TODO:: add X, Z arguments and then methods for type X/Z nothing or formula
df = DataFrame(
y = m,
x1 = log.(N),
x2 = log.(n ./ N)
)

X = ones(length(n))
X = X[:, :]

Z = ones(length(n))
Z = Z[:, :]

log_l_f = x -> log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)
grad_l_f = x -> grad_log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)
hes_l_f = x -> hess_log_lik_binomial_model(x[1:(end - 2)], x[end - 1], x[end], m, N, n, X, Z) * (-1.0)

#= result = optimize(log_l_f, [start[1], start[2], 1], Newton(); inplace = false)
result_1 = optimize(log_l_f, grad_l_f, [start[1], start[2], 1], Newton(); inplace = false) =#
# TODO :: dependent α, β

start = Float64[]
append!(start, zeros(length(N)))

if start == "glm"
append!(start, coef(glm(@formula(y ~ x1 + x2 + 0), df, Poisson(), LogLink())))
else
append!(start, coef(lm(@formula(log(y) ~ x1 + x2 + 0), df)))
end # end if

optim_problem = optimize(log_l_f, grad_l_f, hes_l_f, start, NewtonTrustRegion(); inplace = false)
α̂ = optim_problem.minimizer[end - 1]
β̂ = optim_problem.minimizer[end]
= optim_problem.minimizer[1:length(N)]
ξ̂ = N .^ α̂
#[coef(ols), coef(mm)]
#[start, log_l, grad_l, hes_l]
[[α̂, β̂, ξ̂, M̂, sum(ξ̂), sum(M̂)], optim_problem]
end # end function

0 comments on commit 7f197bf

Please sign in to comment.