From 8c4296debf24c1f0273aa57d935902f026271121 Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Fri, 24 Nov 2023 08:14:21 +0100 Subject: [PATCH 1/7] add generalized chi squared distribution --- src/univariate/continuous/generalizedchisq.jl | 313 ++++++++++++++++++ .../univariate/continuous/generalizedchisq.jl | 41 +++ 2 files changed, 354 insertions(+) create mode 100644 src/univariate/continuous/generalizedchisq.jl create mode 100644 test/univariate/continuous/generalizedchisq.jl diff --git a/src/univariate/continuous/generalizedchisq.jl b/src/univariate/continuous/generalizedchisq.jl new file mode 100644 index 000000000..985eed3d4 --- /dev/null +++ b/src/univariate/continuous/generalizedchisq.jl @@ -0,0 +1,313 @@ +using Distributions, Random # remove +import Distributions: sampler, cf, cdf, pdf, quadgk, quantile_newton, quantile_bisect # remove + +""" + GeneralizedChisq(w, ν, λ, μ, σ) + +The *Generalized chi-squared distribution* is the distribution of a sum of independent noncentral chi-squared variables and a normal variable: + +```math +\\xi =\\sum_{i}w_{i}y_{i}+x,\\quad y_{i}\\sim \\chi '^{2}(\\nu_{i},\\lambda _{i}),\\quad x\\sim N(\\mu,\\sigma^{2}). +``` + +```julia +GeneralizedChisq(w, ν, λ, μ, σ) + +``` + +External links + +* [Generalized chi-squared distribution on Wikipedia](https://en.wikipedia.org/wiki/Generalized_chi-squared_distribution) + +""" +struct GeneralizedChisq{T<:Real, V<:AbstractVector{<:Real}} <: ContinuousUnivariateDistribution + w::V + ν::V + λ::V + μ::T + σ::T + + function GeneralizedChisq{T, V}(w, ν, λ, μ, σ; check_args::Bool=true) where {T, V} + # @check_args GeneralizedChisq (ν, all(ν .> 0)) (σ, σ ≥ zero(σ)) (length(w) == length(ν) == length(λ)) + new{T, V}(w, ν, λ, μ, σ) + end +end + +# Manage parameter types, ensuring that the parameters of normal are floats. + +function GeneralizedChisq(w, ν, λ, μ::Tμ, σ::Tσ; check_args...) where {Tμ<:Real, Tσ<:Real} + V = promote_type(typeof(w), typeof(ν), typeof(λ)) + T = promote_type(Tμ, Tσ, eltype(w), eltype(ν), eltype(λ)) + GeneralizedChisq{T, V}(w, ν, λ, μ, σ...; check_args...) +end + +GeneralizedChisq(w, ν, λ, μ::Integer, σ::Integer; check_args...) = GeneralizedChisq(w, ν, λ, float(μ), float(σ); check_args...) + +Base.eltype(::GeneralizedChisq{T,V}) where {T,V} = T + +""" + GeneralizedChisqSampler + +Sampler of a generalized chi-squared distribution, +created by `sampler(::GeneralizedChisq)`. +""" +struct GeneralizedChisqSampler{T<:Real, SC<:Sampleable{Univariate, Continuous}, SN<:Sampleable{Univariate, Continuous}} <: Sampleable{Univariate, Continuous} + μ::T + nchisqsamplers::Vector{Tuple{T, SC}} + normalsampler::SN # (zero-mean, since μ is defined apart) + skipnormal::Bool # Normal needs not be sampled if σ == 0 +end + +## Required functions + +# sampler that predefines the distributions for batch sampling +function sampler(d::GeneralizedChisq{T}) where T + μ = d.μ + nchisqsamplers = [(d.w[i], sampler(NoncentralChisq(d.ν[i], d.λ[i]))) for i in eachindex(d.w)] + normalsampler = sampler(Normal(zero(T), d.σ)) # zero-mean + skipnormal = iszero(d.σ) + GeneralizedChisqSampler(μ, nchisqsamplers, normalsampler, skipnormal) +end + +function rand(rng::AbstractRNG, s::GeneralizedChisqSampler) + result = s.μ + for (w, ncs) in s.nchisqsamplers + result += w * rand(rng, ncs) + end + if !s.skipnormal + result += rand(rng, s.normalsampler) + end + return result +end + +rand(rng::AbstractRNG, d::GeneralizedChisq) = rand(rng, sampler(d)) + +# cdf algorithm derived from https://github.com/abhranildas/gx2, by Abhranil Das. +function cdf(d::GeneralizedChisq{T}, x::Real) where T + # special cases + if iszero(d.σ) + (x < minimum(d)) && return zero(T) + (x > maximum(d)) && return one(T) + if all(==(first(d.w)), d.w) + iszero(first(d.w)) && return cdf(Normal(d.μ, d.σ), x) + nchisq = NoncentralChisq{T}(sum(d.ν), sum(d.λ)) + (first(d.w) > zero(T)) && return cdf(nchisq, (x - d.μ)/first(d.w)) + (first(d.w) < zero(T)) && return ccdf(nchisq, (x - d.μ)/first(d.w)) + end + end + # general case + GChisqComputations.daviescdf(d, x) +end + +function pdf(d::GeneralizedChisq{T}, x::Real) where T + # special cases + if iszero(d.σ) + !insupport(d, x) && return zero(T) + if all(==(first(d.w)), d.w) + iszero(first(d.w)) && return pdf(Normal(d.μ, d.σ), x) + nchisq = NoncentralChisq{T}(sum(d.ν), sum(d.λ)) + return pdf(nchisq, (x - d.μ)/first(d.w)) / abs(first(d.w)) + end + end + # general case + GChisqComputations.daviespdf(d, x) +end + +logpdf(d::GeneralizedChisq, x::Real) = log(pdf(d, x)) + +function quantile(d::GeneralizedChisq, p::Real) + # search starting point meeting convergence criterion + x0 = mean(d) + error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) + if !converges + bracket = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) + x0 = GChisqComputations.searchnewtonconvergence(d, p, bracket...) + end + quantile_newton(d, p, x0) +end + +function quantile_b(d::GeneralizedChisq, p::Real) + # search starting point meeting convergence criterion + x0 = mean(d) + error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) + if converges + return quantile_newton(d, p, x0) + else + (x1, x2), _, _ = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) + xl, xr = (x1 < x2) ? (x1, x2) : (x2, x1) + return quantile_bisect(d, p, xl, xr) + end +end + + +function minimum(d::GeneralizedChisq{T}) where T + d.σ > zero(T) || any(<(zero(T)), d.w) ? typemin(T) : d.μ +end + +function maximum(d::GeneralizedChisq{T}) where T + d.σ > zero(T) || any(>(zero(T)), d.w) ? typemax(T) : d.μ +end + +insupport(d::GeneralizedChisq, x::Real) = minimum(d) ≤ x ≤ maximum(d) + +## Recommended functions + +mean(d::GeneralizedChisq) = d.μ + sum(d.w[i] * (d.ν[i] + d.λ[i]) for i in eachindex(d.w)) +var(d::GeneralizedChisq) = d.σ^2 + 2 * sum(d.w[i]^2 * (d.ν[i] + 2*d.λ[i]) for i in eachindex(d.w)) + +# modes(d::GeneralizedChisq) +# mode(d::GeneralizedChisq) +# skewness(d::GeneralizedChisq) +# kurtosis(d::GeneralizedChisq, ::Bool) +# entropy(d::GeneralizedChisq, ::Real) +# mgf(d::GeneralizedChisq, ::Any) +cf(d::GeneralizedChisq, t) = GChisqComputations.cf_explicit(d, t) + +module GChisqComputations + import ..Normal, ..NoncentralChisq, ..GeneralizedChisq + import ..cf, ..cdf, ..insupport + import ..quadgk + + # Characteristic function - explicit formula + function cf_explicit(d, t) + arg = im * d.μ * t + denom = Complex(one(d.μ) * one(t)) # unit with stable type in later operations + for i in eachindex(d.w) + arg += im * d.w[i] * d.λ[i] * t / (1 - 2im * d.w[i] * t) + denom *= (1 - 2im * d.w[i] * t)^(d.ν[i]/2) + end + arg -= d.σ^2 * t^2 / 2 + return exp(arg) / denom + end + + # Characteristic function - by inheritance + function cf_inherit(d::GeneralizedChisq{T}, t) where T + result = exp(im * d.μ * t) + for i in eachindex(d.w) + result *= cf(NoncentralChisq{T}(d.ν[i], d.λ[i]), d.w[i] * t) + end + if !iszero(d.σ) + result *= cf(Normal(zero(d.μ), d.σ), t) + end + return result + end + + #= + Terms of the formula (13) in Davies (1980) to calculate + the cdf of a generalized chi-squared distribution, as: + F(x) = 1/2 - 1/π *∫sin(θ)/(u*ρ)du + + where `θ` and `ρ` are the outputs of this function. + + Those terms are related to the characteristic function of the distribution as: + exp(θ*im)/ρ = exp(-ux*im)*cf(u) + + They can be also used to calculate the pdf as: + f(x) = 1/π *∫cos(θ)/ρ du + + And its derivative as: + f'(x) = 1/π *∫u*sin(θ)/ρ du + =# + function daviesterms(d::GeneralizedChisq{T}, u, x) where {T<:Real} + θ = -T(u*(x - d.μ)) + ρ = exp(T(u*d.σ)^2 / 2) + for i in eachindex(d.w) + wi, νi, λi = T(d.w[i]), T(d.ν[i]), T(d.λ[i]) + τ = 1 + 4 * wi^2 * u^2 + θ += νi*atan(2*wi*u)/2 + (λi*wi*u)/τ + ρ *= τ^(νi/4) * exp(2λi*wi^2*u^2/τ) + end + return θ, ρ + end + + function daviescdf(d, x) + integral, _ = quadgk(0, Inf) do u + θ, ρ = daviesterms(d, u, x) + return sin(θ)/(u*ρ) + end + return 1/2 - integral/π + end + + function daviespdf(d, x) + integral, _ = quadgk(0, Inf) do u + θ, ρ = daviesterms(d, u, x) + return cos(θ)/ρ + end + return integral/π + end + + # derivative of pdf + function daviesdpdf(d, x) + integral, _ = quadgk(0, Inf) do u + θ, ρ = daviesterms(d, u, x) + return u*sin(θ)/ρ + end + return integral/π + end + + # function dpdf(d::GeneralizedChisq{T}, x::Real) where T + # # special case + # iszero(d.σ) && !insupport(d, x) && return zero(T) + # # general case + # GChisqComputations.daviesdpdf(x, d.w, d.ν, d.λ, d.μ, d.σ) + # end + + # functions to look for starting point where + # the Newton method for quantiles converges: + # sign(F(x) - q) == sign(f'(x)) + + # Convergence criterion: + function newtonconvergence(d::GeneralizedChisq{T}, p, x) where T + error = cdf(d, x) - T(p) + # out of bounds + iszero(d.σ) && !insupport(d, x) && return error, zero(T), false + # general case + curvature = daviesdpdf(d, x) + converges = sign(error) == sign(curvature) + return error, curvature, converges + end + + # Bracket containing solution for q, with x as one of the ends + function definebracket(d::GeneralizedChisq{T}, p, x, errorx, curvx, initialwidth) where T + # set direction of span and initialize bracket + span = (errorx < 0) ? abs(initialwidth) : -abs(initialwidth) + xbis = x + span + errorxbis, curvxbis, _ = newtonconvergence(d, p, xbis) + # greedier search if the bracket does not contain the solution + iterations = 0 + while sign(errorxbis) == sign(errorx) + iterations += 1 + span *= 2*errorxbis / (errorx - errorxbis) + x, errorx, curvx = xbis, errorxbis, curvxbis + xbis = x + span + errorxbis, curvxbis, _ = newtonconvergence(d, p, xbis) + end + @debug "Iterations to define bracket" iterations + # bracket end out of bounds + if !insupport(d, xbis) + xbis = d.μ + end + return (x, xbis), (errorx, errorxbis), (curvx, curvxbis) + end + + # Bisect bracket until convergence point is reached + function searchnewtonconvergence(d, p, (x,xbis), (errorx, errorxbis), (curvx, curvxbis)) + iterations = 0 + while true + iterations += 1 + # Assumes that the first point is *outside* the region of convergence + if sign(errorxbis) == sign(curvxbis) + @debug "Iterations to find convergence point" iterations + return xbis + end + xmid = (x + xbis) / 2 + errorxmid, curvxmid, _ = newtonconvergence(d, p, xmid) + # switch first point if the new end of the bracket is on the same side + if sign(errorxmid) == sign(errorx) + x, errorx, curvx = xbis, errorxbis, curvxbis + end + xbis, errorxbis, curvxbis = xmid, errorxmid, curvxmid + end + end + +end diff --git a/test/univariate/continuous/generalizedchisq.jl b/test/univariate/continuous/generalizedchisq.jl new file mode 100644 index 000000000..c6c44428e --- /dev/null +++ b/test/univariate/continuous/generalizedchisq.jl @@ -0,0 +1,41 @@ +using Distributions +import Distributions.quadgk +using Tests +using Random + +@testset "Generalized chi-squared" begin + rng = Random.MersenneTwister(0) + # check types of corner cases + gx2 = GeneralizedChisq([1,1], [1,1], [0,1], 10, 1) + @test typeof(gx2.μ) <: AbstractFloat + gx2 = GeneralizedChisq([1,1], [1,1], BigFloat[0,1], 10, 1) + @test typeof(gx2.μ) <: BigFloat + gx2 = GeneralizedChisq(Int[], Int[], Int[], 0, 1.0) + @test typeof(gx2.μ) == typeof(1.0) + # test sampler + # test maximum-minimum + # test cf, cdf, pdf + gx2 = GeneralizedChisq([1,-1], [1,2], [1.5, 1.5], 10, 1) + @test let t = 10 + randn() + cf(gx2, t) ≈ GChisqComputations.cf_inherit(gx2, t) + end + @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) + @test pdf(gx2, 15) - pdf(gx2, 5) ≈ first(quadgk(x->GChisqComputations.daviesdpdf(gx2, x), 5, 15)) + # test search of convergence for starting point of quantile + for p in 0.1:0.1:0.9, x in 0:15 + println(x, ", ", p, ", ", GChisqComputations.newtonconvergence(gx2, p, x)) + end + for p in 0.0:0.05:1.0 + println(p, ", ", GChisqComputations.newtonconvergence(gx2, p, gx2.μ)) + end + # using BenchmarkTools + for p in 0.05:0.05:0.9 + qn = quantile(gx2, p) + tn = @elapsed quantile(gx2, p) + qb = quantile_b(gx2, p) + tb = @elapsed quantile_b(gx2, p) + println(p, ", ", qn, ", ", tn, ", ", cdf(gx2, qn)) + println(p, ", ", qb, ", ", tb, ", ", cdf(gx2, qb)) + println() + end +end \ No newline at end of file From 813fee4fef91b570942a68716e8207d1f14bdc3c Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Sun, 26 Nov 2023 14:08:10 +0100 Subject: [PATCH 2/7] clean generalizedchisq.jl --- src/univariate/continuous/generalizedchisq.jl | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/univariate/continuous/generalizedchisq.jl b/src/univariate/continuous/generalizedchisq.jl index 985eed3d4..26a1de058 100644 --- a/src/univariate/continuous/generalizedchisq.jl +++ b/src/univariate/continuous/generalizedchisq.jl @@ -126,20 +126,6 @@ function quantile(d::GeneralizedChisq, p::Real) quantile_newton(d, p, x0) end -function quantile_b(d::GeneralizedChisq, p::Real) - # search starting point meeting convergence criterion - x0 = mean(d) - error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) - if converges - return quantile_newton(d, p, x0) - else - (x1, x2), _, _ = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) - xl, xr = (x1 < x2) ? (x1, x2) : (x2, x1) - return quantile_bisect(d, p, xl, xr) - end -end - - function minimum(d::GeneralizedChisq{T}) where T d.σ > zero(T) || any(<(zero(T)), d.w) ? typemin(T) : d.μ end @@ -200,7 +186,7 @@ module GChisqComputations where `θ` and `ρ` are the outputs of this function. Those terms are related to the characteristic function of the distribution as: - exp(θ*im)/ρ = exp(-ux*im)*cf(u) + exp(θ*im)/ρ = exp(-u*x*im)*cf(u) They can be also used to calculate the pdf as: f(x) = 1/π *∫cos(θ)/ρ du @@ -245,13 +231,6 @@ module GChisqComputations return integral/π end - # function dpdf(d::GeneralizedChisq{T}, x::Real) where T - # # special case - # iszero(d.σ) && !insupport(d, x) && return zero(T) - # # general case - # GChisqComputations.daviesdpdf(x, d.w, d.ν, d.λ, d.μ, d.σ) - # end - # functions to look for starting point where # the Newton method for quantiles converges: # sign(F(x) - q) == sign(f'(x)) From 63f0ccbb6bfaac89b46bc7c885afa94f87854699 Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Sun, 26 Nov 2023 22:10:19 +0100 Subject: [PATCH 3/7] optimize quantile(::GeneralizedChisq, ...) --- src/univariate/continuous/generalizedchisq.jl | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/univariate/continuous/generalizedchisq.jl b/src/univariate/continuous/generalizedchisq.jl index 26a1de058..4479c4125 100644 --- a/src/univariate/continuous/generalizedchisq.jl +++ b/src/univariate/continuous/generalizedchisq.jl @@ -115,15 +115,23 @@ end logpdf(d::GeneralizedChisq, x::Real) = log(pdf(d, x)) -function quantile(d::GeneralizedChisq, p::Real) - # search starting point meeting convergence criterion - x0 = mean(d) - error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) - if !converges - bracket = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) - x0 = GChisqComputations.searchnewtonconvergence(d, p, bracket...) +function quantile(d::GeneralizedChisq{T}, p::Real) where T + if 0 < p < 1 + # search starting point meeting convergence criterion + x0 = mean(d) + error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) + if !converges + bracket = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) + x0 = GChisqComputations.searchnewtonconvergence(d, p, bracket...) + end + return quantile_newton(d, p, x0) + elseif p == 0 + return minimum(d) + elseif p == 1 + return maximum(d) + else + return T(NaN) end - quantile_newton(d, p, x0) end function minimum(d::GeneralizedChisq{T}) where T @@ -207,7 +215,8 @@ module GChisqComputations end function daviescdf(d, x) - integral, _ = quadgk(0, Inf) do u + atol = eps(eltype(d)) # uniform tolerance to avoid issues at cdf ≈ 0.5 (integral ≈ 0) + integral, _ = quadgk(0, Inf; atol=atol) do u θ, ρ = daviesterms(d, u, x) return sin(θ)/(u*ρ) end From 963273913be8a212f00abec03a556d9a494c1e36 Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Mon, 27 Nov 2023 22:33:58 +0100 Subject: [PATCH 4/7] working code and tests of GeneralizedChisq --- src/Distributions.jl | 1 + src/univariate/continuous/generalizedchisq.jl | 99 ++++++++++----- src/univariates.jl | 1 + test/runtests.jl | 1 + .../univariate/continuous/generalizedchisq.jl | 115 +++++++++++++----- 5 files changed, 157 insertions(+), 60 deletions(-) diff --git a/src/Distributions.jl b/src/Distributions.jl index ceb6063c7..40783abfa 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -97,6 +97,7 @@ export FullNormalCanon, Gamma, DiscreteNonParametric, + GeneralizedChisq, GeneralizedPareto, GeneralizedExtremeValue, Geometric, diff --git a/src/univariate/continuous/generalizedchisq.jl b/src/univariate/continuous/generalizedchisq.jl index 4479c4125..b22d5ee4d 100644 --- a/src/univariate/continuous/generalizedchisq.jl +++ b/src/univariate/continuous/generalizedchisq.jl @@ -1,6 +1,3 @@ -using Distributions, Random # remove -import Distributions: sampler, cf, cdf, pdf, quadgk, quantile_newton, quantile_bisect # remove - """ GeneralizedChisq(w, ν, λ, μ, σ) @@ -63,7 +60,7 @@ end # sampler that predefines the distributions for batch sampling function sampler(d::GeneralizedChisq{T}) where T μ = d.μ - nchisqsamplers = [(d.w[i], sampler(NoncentralChisq(d.ν[i], d.λ[i]))) for i in eachindex(d.w)] + nchisqsamplers = [(T(d.w[i]), sampler(NoncentralChisq(d.ν[i], d.λ[i]))) for i in eachindex(d.w)] normalsampler = sampler(Normal(zero(T), d.σ)) # zero-mean skipnormal = iszero(d.σ) GeneralizedChisqSampler(μ, nchisqsamplers, normalsampler, skipnormal) @@ -85,31 +82,41 @@ rand(rng::AbstractRNG, d::GeneralizedChisq) = rand(rng, sampler(d)) # cdf algorithm derived from https://github.com/abhranildas/gx2, by Abhranil Das. function cdf(d::GeneralizedChisq{T}, x::Real) where T # special cases - if iszero(d.σ) + if isempty(d.w) + return cdf(Normal(d.μ, d.σ), x) + elseif iszero(d.σ) + # out of support (or all weights equal to zero) (x < minimum(d)) && return zero(T) - (x > maximum(d)) && return one(T) + (x ≥ maximum(d)) && return one(T) + # inside support if all(==(first(d.w)), d.w) - iszero(first(d.w)) && return cdf(Normal(d.μ, d.σ), x) - nchisq = NoncentralChisq{T}(sum(d.ν), sum(d.λ)) + nchisq = NoncentralChisq(sum(T, d.ν), sum(T, d.λ)) (first(d.w) > zero(T)) && return cdf(nchisq, (x - d.μ)/first(d.w)) (first(d.w) < zero(T)) && return ccdf(nchisq, (x - d.μ)/first(d.w)) end end # general case + (x == -Inf) && return zero(T) + (x == Inf) && return one(T) GChisqComputations.daviescdf(d, x) end function pdf(d::GeneralizedChisq{T}, x::Real) where T # special cases - if iszero(d.σ) + if isempty(d.w) + return pdf(Normal(d.μ, d.σ), x) + elseif iszero(d.σ) + # out of support !insupport(d, x) && return zero(T) + # inside support if all(==(first(d.w)), d.w) - iszero(first(d.w)) && return pdf(Normal(d.μ, d.σ), x) - nchisq = NoncentralChisq{T}(sum(d.ν), sum(d.λ)) + iszero(first(d.w)) && return typemax(T) # zero weights (delta at d.μ) + nchisq = NoncentralChisq(sum(T, d.ν), sum(T, d.λ)) return pdf(nchisq, (x - d.μ)/first(d.w)) / abs(first(d.w)) end end # general case + !isfinite(x) && return zero(T) GChisqComputations.daviespdf(d, x) end @@ -117,14 +124,17 @@ logpdf(d::GeneralizedChisq, x::Real) = log(pdf(d, x)) function quantile(d::GeneralizedChisq{T}, p::Real) where T if 0 < p < 1 - # search starting point meeting convergence criterion - x0 = mean(d) - error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) - if !converges - bracket = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) - x0 = GChisqComputations.searchnewtonconvergence(d, p, bracket...) + # special cases + if isempty(d.w) + return quantile(Normal(d.μ, d.σ), x) + elseif iszero(d.σ) && all(==(first(d.w)), d.w) + iszero(first(d.w)) && return d.μ # zero weights (delta at d.μ) + nchisq = NoncentralChisq(sum(T, d.ν), sum(T, d.λ)) + (first(d.w) > zero(T)) && return quantile(nchisq, p)*first(d.w) + d.μ + (first(d.w) < zero(T)) && return cquantile(nchisq, p)*first(d.w) + d.μ end - return quantile_newton(d, p, x0) + # general cases + return GChisqComputations.quantile(d, p) elseif p == 0 return minimum(d) elseif p == 1 @@ -148,19 +158,27 @@ insupport(d::GeneralizedChisq, x::Real) = minimum(d) ≤ x ≤ maximum(d) mean(d::GeneralizedChisq) = d.μ + sum(d.w[i] * (d.ν[i] + d.λ[i]) for i in eachindex(d.w)) var(d::GeneralizedChisq) = d.σ^2 + 2 * sum(d.w[i]^2 * (d.ν[i] + 2*d.λ[i]) for i in eachindex(d.w)) +cf(d::GeneralizedChisq, t) = GChisqComputations.cf_explicit(d, t) +# # [Missing] # modes(d::GeneralizedChisq) # mode(d::GeneralizedChisq) # skewness(d::GeneralizedChisq) # kurtosis(d::GeneralizedChisq, ::Bool) # entropy(d::GeneralizedChisq, ::Real) # mgf(d::GeneralizedChisq, ::Any) -cf(d::GeneralizedChisq, t) = GChisqComputations.cf_explicit(d, t) +""" + GChisqComputations + +Computations for the Generalized Chi-squared distribution. +This module is meant to be private, not part of the API. +""" module GChisqComputations - import ..Normal, ..NoncentralChisq, ..GeneralizedChisq - import ..cf, ..cdf, ..insupport - import ..quadgk + import ..Distributions # to use `Normal` and `NoncentralChisq` (which cannot be imported) + import ..GeneralizedChisq + import ..cf, ..cdf, ..insupport, ..quantile_newton + import ..quadgk, ..mean, ..var # Characteristic function - explicit formula function cf_explicit(d, t) @@ -175,33 +193,46 @@ module GChisqComputations end # Characteristic function - by inheritance - function cf_inherit(d::GeneralizedChisq{T}, t) where T + function cf_inherit(d::GeneralizedChisq, t) result = exp(im * d.μ * t) for i in eachindex(d.w) - result *= cf(NoncentralChisq{T}(d.ν[i], d.λ[i]), d.w[i] * t) + result *= cf(Distributions.NoncentralChisq(d.ν[i], d.λ[i]), d.w[i] * t) end if !iszero(d.σ) - result *= cf(Normal(zero(d.μ), d.σ), t) + result *= cf(Distributions.Normal(zero(d.μ), d.σ), t) end return result end - #= + """ + daviesterms(d::GeneralizedChisq, u, x) + Terms of the formula (13) in Davies (1980) to calculate the cdf of a generalized chi-squared distribution, as: + F(x) = 1/2 - 1/π *∫sin(θ)/(u*ρ)du where `θ` and `ρ` are the outputs of this function. Those terms are related to the characteristic function of the distribution as: + exp(θ*im)/ρ = exp(-u*x*im)*cf(u) They can be also used to calculate the pdf as: + f(x) = 1/π *∫cos(θ)/ρ du And its derivative as: + f'(x) = 1/π *∫u*sin(θ)/ρ du - =# + + Reference: + + Robert B. Davies (1980). + Algorithm AS 155: The Distribution of a Linear Combination of χ2Random Variables. + *Journal of the Royal Statistical Society. Series C (Applied Statistics)*, 29(3), 323–333 + DOI:10.2307/2346911 + """ function daviesterms(d::GeneralizedChisq{T}, u, x) where {T<:Real} θ = -T(u*(x - d.μ)) ρ = exp(T(u*d.σ)^2 / 2) @@ -240,7 +271,7 @@ module GChisqComputations return integral/π end - # functions to look for starting point where + # functions to look for the starting point where # the Newton method for quantiles converges: # sign(F(x) - q) == sign(f'(x)) @@ -256,7 +287,7 @@ module GChisqComputations end # Bracket containing solution for q, with x as one of the ends - function definebracket(d::GeneralizedChisq{T}, p, x, errorx, curvx, initialwidth) where T + function definebracket(d::GeneralizedChisq, p, x, errorx, curvx, initialwidth) # set direction of span and initialize bracket span = (errorx < 0) ? abs(initialwidth) : -abs(initialwidth) xbis = x + span @@ -298,4 +329,14 @@ module GChisqComputations end end + function quantile(d, p) + # search starting point meeting convergence criterion + x0 = mean(d) + error0, curv0, converges = GChisqComputations.newtonconvergence(d, p, x0) + if !converges + bracket = GChisqComputations.definebracket(d, p, x0, error0, curv0, sqrt(var(d))) + x0 = GChisqComputations.searchnewtonconvergence(d, p, bracket...) + end + return quantile_newton(d, p, x0) + end end diff --git a/src/univariates.jl b/src/univariates.jl index 0e2ae05e3..5ce6b4068 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -682,6 +682,7 @@ const continuous_distributions = [ "fdist", "frechet", "gamma", "erlang", + "generalizedchisq", "pgeneralizedgaussian", # GeneralizedGaussian depends on Gamma "generalizedpareto", "generalizedextremevalue", diff --git a/test/runtests.jl b/test/runtests.jl index ce3f16b79..9c7d66be7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,6 +75,7 @@ const tests = [ "univariate/continuous/erlang", "univariate/continuous/exponential", "univariate/continuous/gamma", + "univariate/continuous/generalizedchisq.jl", "univariate/continuous/gumbel", "univariate/continuous/lindley", "univariate/continuous/logistic", diff --git a/test/univariate/continuous/generalizedchisq.jl b/test/univariate/continuous/generalizedchisq.jl index c6c44428e..2c5492c79 100644 --- a/test/univariate/continuous/generalizedchisq.jl +++ b/test/univariate/continuous/generalizedchisq.jl @@ -1,41 +1,94 @@ using Distributions -import Distributions.quadgk -using Tests +using Test using Random @testset "Generalized chi-squared" begin rng = Random.MersenneTwister(0) - # check types of corner cases - gx2 = GeneralizedChisq([1,1], [1,1], [0,1], 10, 1) - @test typeof(gx2.μ) <: AbstractFloat - gx2 = GeneralizedChisq([1,1], [1,1], BigFloat[0,1], 10, 1) - @test typeof(gx2.μ) <: BigFloat - gx2 = GeneralizedChisq(Int[], Int[], Int[], 0, 1.0) - @test typeof(gx2.μ) == typeof(1.0) - # test sampler - # test maximum-minimum - # test cf, cdf, pdf + Computations = Distributions.GChisqComputations + quadgk = Distributions.quadgk + # General case gx2 = GeneralizedChisq([1,-1], [1,2], [1.5, 1.5], 10, 1) - @test let t = 10 + randn() - cf(gx2, t) ≈ GChisqComputations.cf_inherit(gx2, t) + @test eltype(gx2) == typeof(gx2.μ) <: AbstractFloat + @test minimum(gx2) == -Inf + @test maximum(gx2) == Inf + @test insupport(gx2, rand(gx2)) + @test let t = randn() + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) end @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) - @test pdf(gx2, 15) - pdf(gx2, 5) ≈ first(quadgk(x->GChisqComputations.daviesdpdf(gx2, x), 5, 15)) - # test search of convergence for starting point of quantile - for p in 0.1:0.1:0.9, x in 0:15 - println(x, ", ", p, ", ", GChisqComputations.newtonconvergence(gx2, p, x)) - end - for p in 0.0:0.05:1.0 - println(p, ", ", GChisqComputations.newtonconvergence(gx2, p, gx2.μ)) - end - # using BenchmarkTools - for p in 0.05:0.05:0.9 - qn = quantile(gx2, p) - tn = @elapsed quantile(gx2, p) - qb = quantile_b(gx2, p) - tb = @elapsed quantile_b(gx2, p) - println(p, ", ", qn, ", ", tn, ", ", cdf(gx2, qn)) - println(p, ", ", qb, ", ", tb, ", ", cdf(gx2, qb)) - println() + @test pdf(gx2, 15) - pdf(gx2, 5) ≈ first(quadgk(x->Computations.daviesdpdf(gx2, x), 5, 15)) + @testset for p in 0:0.05:1 + q = quantile(gx2, p) + @test cdf(gx2, q) ≈ p + end + # Other types + gx2 = GeneralizedChisq([1,1], [1,1], BigFloat[0,1], 10, 1) + @test eltype(gx2) == typeof(gx2.μ) <: BigFloat + # [Don't test with BigFloat] + gx2 = GeneralizedChisq([1,1], [1,1], [0,1], 10, 1f0) + @test eltype(gx2) == typeof(gx2.μ) <: Float32 + @test insupport(gx2, rand(gx2)) + # Special cases: + # σ == 0, positive weights + gx2 = GeneralizedChisq([1,1], [1,2], [0,1], 10, 0) + @test all(≥(10), rand(gx2, 10)) + @test minimum(gx2) == 10 + @test let t = randn() + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) + end + @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) ≈ first(quadgk(x->pdf(gx2, x), 10, 15)) + @testset for p in 0:0.05:1 + q = quantile(gx2, p) + @test cdf(gx2, q) ≈ p + end + # σ == 0, negative weights + gx2 = GeneralizedChisq([-1,-1], [1,2], [0,1], 10, 0) + @test all(≤(10), rand(gx2, 10)) + @test maximum(gx2) == 10 + @test let t = randn() + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) + end + @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) ≈ first(quadgk(x->pdf(gx2, x), 5, 10)) + @testset for p in 0:0.05:1 + q = quantile(gx2, p) + @test cdf(gx2, q) ≈ p + end + # All zero weights + gx2 = GeneralizedChisq([0,0], [1,2], [0,1], 10, 1) + gx2b = GeneralizedChisq(Int[], Int[], Int[], 10, 1) + normalequiv = Normal(gx2.μ, gx2.σ) + @test let t = randn() + cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) ≈ cf(normalequiv, t) + end + let x = 10 + randn() + @test cdf(gx2, x) ≈ cdf(gx2b, x) ≈ Computations.daviescdf(gx2, x) ≈ cdf(normalequiv, x) + @test pdf(gx2, x) ≈ pdf(gx2b, x) ≈ Computations.daviespdf(gx2, x) ≈ pdf(normalequiv, x) + end + @testset for p in 0:0.05:1 + q = quantile(gx2, p) + @test q == quantile(gx2, p) + @test cdf(gx2, q) ≈ cdf(gx2b, q) ≈ p + end + # delta at μ + gx2 = GeneralizedChisq([0,0], [1,2], [0,1], 10, 0) + gx2b = GeneralizedChisq(Int[], Int[], Int[], 10, 0) + @test let t = randn() + cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) + end + let x = 9 + @test cdf(gx2, x) == cdf(gx2b, x) == 0 + @test pdf(gx2, x) == pdf(gx2b, x) == 0 + end + let x = 10 + @test cdf(gx2, x) == cdf(gx2b, x) == 1 + @test pdf(gx2, x) == pdf(gx2b, x) == Inf + end + let x = 11 + @test cdf(gx2, x) == cdf(gx2b, x) == 1 + @test pdf(gx2, x) == pdf(gx2b, x) == 0 + end + @testset for p in 0:0.05:1 + q = quantile(gx2, p) + @test q == quantile(gx2, p) == 10 end end \ No newline at end of file From ee71d8efd081646915a83dc1da93cbfa9acd6155 Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Mon, 27 Nov 2023 23:14:16 +0100 Subject: [PATCH 5/7] minor fixes and docs * add `params` method * remove debug code --- docs/src/univariate.md | 7 +++++++ src/univariate/continuous/generalizedchisq.jl | 10 +++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/src/univariate.md b/docs/src/univariate.md index 0b2c48c6e..d656b5509 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -237,6 +237,13 @@ Gamma plotdensity((0, 18), Gamma, (7.5, 1)) # hide ``` +```@docs +GeneralizedChisq +``` +```@example plotdensity +plotdensity((-20, 20), GeneralizedChisq, ([1,-2], [1,2], [0,2], 10, 1)) # hide +``` + ```@docs GeneralizedExtremeValue ``` diff --git a/src/univariate/continuous/generalizedchisq.jl b/src/univariate/continuous/generalizedchisq.jl index b22d5ee4d..23206651a 100644 --- a/src/univariate/continuous/generalizedchisq.jl +++ b/src/univariate/continuous/generalizedchisq.jl @@ -4,7 +4,7 @@ The *Generalized chi-squared distribution* is the distribution of a sum of independent noncentral chi-squared variables and a normal variable: ```math -\\xi =\\sum_{i}w_{i}y_{i}+x,\\quad y_{i}\\sim \\chi '^{2}(\\nu_{i},\\lambda _{i}),\\quad x\\sim N(\\mu,\\sigma^{2}). +\\xi =\\sum_{i}w_{i}y_{i}+x,\\quad y_{i}\\sim \\chi^{2}(\\nu_{i},\\lambda _{i}),\\quad x\\sim N(\\mu,\\sigma^{2}). ``` ```julia @@ -42,6 +42,8 @@ GeneralizedChisq(w, ν, λ, μ::Integer, σ::Integer; check_args...) = Generaliz Base.eltype(::GeneralizedChisq{T,V}) where {T,V} = T +params(d::GeneralizedChisq) = (d.w, d.ν, d.λ, d.μ, d.σ) + """ GeneralizedChisqSampler @@ -293,15 +295,12 @@ module GChisqComputations xbis = x + span errorxbis, curvxbis, _ = newtonconvergence(d, p, xbis) # greedier search if the bracket does not contain the solution - iterations = 0 while sign(errorxbis) == sign(errorx) - iterations += 1 span *= 2*errorxbis / (errorx - errorxbis) x, errorx, curvx = xbis, errorxbis, curvxbis xbis = x + span errorxbis, curvxbis, _ = newtonconvergence(d, p, xbis) end - @debug "Iterations to define bracket" iterations # bracket end out of bounds if !insupport(d, xbis) xbis = d.μ @@ -311,12 +310,9 @@ module GChisqComputations # Bisect bracket until convergence point is reached function searchnewtonconvergence(d, p, (x,xbis), (errorx, errorxbis), (curvx, curvxbis)) - iterations = 0 while true - iterations += 1 # Assumes that the first point is *outside* the region of convergence if sign(errorxbis) == sign(curvxbis) - @debug "Iterations to find convergence point" iterations return xbis end xmid = (x + xbis) / 2 From 251c061da6b27d7e406d6e1f8d804dd9f634017c Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Tue, 28 Nov 2023 11:38:28 +0100 Subject: [PATCH 6/7] fix type in runtests.jl --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9c7d66be7..725f80dbf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,7 +75,7 @@ const tests = [ "univariate/continuous/erlang", "univariate/continuous/exponential", "univariate/continuous/gamma", - "univariate/continuous/generalizedchisq.jl", + "univariate/continuous/generalizedchisq", "univariate/continuous/gumbel", "univariate/continuous/lindley", "univariate/continuous/logistic", From 3ab5b062dcbcf9fc887588f22091b677e78744c5 Mon Sep 17 00:00:00 2001 From: Helios De Rosario Date: Wed, 29 Nov 2023 08:15:34 +0100 Subject: [PATCH 7/7] test the computation of Davies' terms --- test/univariate/continuous/generalizedchisq.jl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/test/univariate/continuous/generalizedchisq.jl b/test/univariate/continuous/generalizedchisq.jl index 2c5492c79..92bd18b66 100644 --- a/test/univariate/continuous/generalizedchisq.jl +++ b/test/univariate/continuous/generalizedchisq.jl @@ -3,8 +3,14 @@ using Test using Random @testset "Generalized chi-squared" begin - rng = Random.MersenneTwister(0) Computations = Distributions.GChisqComputations + # Function obtain the cf from Davies' terms, given: + # exp(θ*im)/ρ = exp(-u*x*im)*cf(u) + function cfdavies(d, u, x=randn()) + θ, ρ = Computations.daviesterms(d, u, x) + return exp(θ*im + u*x*im)/ρ + end + rng = Random.MersenneTwister(0) quadgk = Distributions.quadgk # General case gx2 = GeneralizedChisq([1,-1], [1,2], [1.5, 1.5], 10, 1) @@ -13,7 +19,7 @@ using Random @test maximum(gx2) == Inf @test insupport(gx2, rand(gx2)) @test let t = randn() - cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) ≈ cfdavies(gx2, t) end @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) @test pdf(gx2, 15) - pdf(gx2, 5) ≈ first(quadgk(x->Computations.daviesdpdf(gx2, x), 5, 15)) @@ -34,7 +40,7 @@ using Random @test all(≥(10), rand(gx2, 10)) @test minimum(gx2) == 10 @test let t = randn() - cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) ≈ cfdavies(gx2, t) end @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) ≈ first(quadgk(x->pdf(gx2, x), 10, 15)) @testset for p in 0:0.05:1 @@ -46,7 +52,7 @@ using Random @test all(≤(10), rand(gx2, 10)) @test maximum(gx2) == 10 @test let t = randn() - cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) + cf(gx2, t) ≈ Computations.cf_inherit(gx2, t) ≈ cfdavies(gx2, t) end @test cdf(gx2, 15) - cdf(gx2, 5) ≈ first(quadgk(x->pdf(gx2, x), 5, 15)) ≈ first(quadgk(x->pdf(gx2, x), 5, 10)) @testset for p in 0:0.05:1 @@ -58,7 +64,7 @@ using Random gx2b = GeneralizedChisq(Int[], Int[], Int[], 10, 1) normalequiv = Normal(gx2.μ, gx2.σ) @test let t = randn() - cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) ≈ cf(normalequiv, t) + cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) ≈ cfdavies(gx2, t) ≈ cf(normalequiv, t) end let x = 10 + randn() @test cdf(gx2, x) ≈ cdf(gx2b, x) ≈ Computations.daviescdf(gx2, x) ≈ cdf(normalequiv, x) @@ -73,7 +79,7 @@ using Random gx2 = GeneralizedChisq([0,0], [1,2], [0,1], 10, 0) gx2b = GeneralizedChisq(Int[], Int[], Int[], 10, 0) @test let t = randn() - cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) + cf(gx2, t) ≈ cf(gx2b, t) ≈ Computations.cf_inherit(gx2, t) ≈ cfdavies(gx2, t) end let x = 9 @test cdf(gx2, x) == cdf(gx2b, x) == 0