From bbaf1a935ef52d6ff2c51544616b113780adbffa Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:33:54 -0500 Subject: [PATCH 01/15] coef! method --- src/linearmixedmodel.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 3827f41ec..60e874506 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -266,9 +266,11 @@ function StatsAPI.fit( end end -function StatsAPI.coef(m::LinearMixedModel{T}) where {T} +StatsAPI.coef(m::LinearMixedModel{T}) where {T} = coef!(Vector{T}(undef, length(m.feterm.piv)), m) + +function coef!(v::Vector{T}, m::LinearMixedModel{T}) where {T} piv = m.feterm.piv - return invpermute!(fixef!(similar(piv, T), m), piv) + return invpermute!(fixef!(v, m), piv) end βs(m::LinearMixedModel) = NamedTuple{(Symbol.(coefnames(m))...,)}(coef(m)) From 7abb5ebd36ae95f0231534e132a6b987304c66b7 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:34:14 -0500 Subject: [PATCH 02/15] use coef! instead of fixef! in bootstrap --- src/bootstrap.jl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index b009199ef..ee909e8b7 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -80,7 +80,7 @@ been constructed with the same structure as the source of the saved replicates. The two-argument method constructs a [`MixedModelBootstrap`](@ref) with the same eltype as `m`. If an eltype is specified as the third argument, then a `MixedModelBootstrap` is returned. -If a subtype of `MixedModelFitCollection` is specified as the third argument, then that +If a subtype of `MixedModelFitCollection` is specified as the third argument, then that is the return type. See also [`savereplicates`](@ref), [`restoreoptsum!`](@ref). @@ -91,7 +91,7 @@ end # why this weird second method? it allows us to define custom types and write methods # to load into those types directly. For example, we could define a `PowerAnalysis <: MixedModelFitCollection` -# in MixedModelsSim and then overload this method to get a convenient object. +# in MixedModelsSim and then overload this method to get a convenient object. # Also, this allows us to write `restorereplicateS(f, m, ::Type{<:MixedModelNonparametricBootstrap})` for # entities in MixedModels bootstrap function restorereplicates( @@ -103,7 +103,7 @@ function restorereplicates( rep = first(Tables.rows(tbl)) allgood = length(rep.θ) == length(m.θ) && - string.(propertynames(rep.β)) == Tuple(coefnames(m)) + string.(propertynames(rep.β)) == Tuple(fixefnames(m)) allgood || throw(ArgumentError("Model is not compatible with saved replicates.")) @@ -120,14 +120,14 @@ end """ savereplicates(f, b::MixedModelFitCollection) -Save the replicates associated with a [`MixedModelFitCollection`](@ref), -e.g. [`MixedModelBootstrap`](@ref) as an Arrow file. +Save the replicates associated with a [`MixedModelFitCollection`](@ref), +e.g. [`MixedModelBootstrap`](@ref) as an Arrow file. See also [`restorereplicates`](@ref), [`saveoptsum`](@ref) !!! note **Only** the replicates are saved, not the entire contents of the `MixedModelFitCollection`. - `restorereplicates` requires a model compatible with the bootstrap to restore the full object. + `restorereplicates` requires a model compatible with the bootstrap to restore the full object. """ savereplicates(f, b::MixedModelFitCollection) = Arrow.write(f, b.fits) @@ -200,6 +200,10 @@ fit during the bootstrapping process. For example, `optsum_overrides=(;ftol_rel= reduces the convergence criterion, which can greatly speed up the bootstrap fits. Taking advantage of this speed up to increase `n` can often lead to better estimates of coverage intervals. + + +!!! note + All coefficients are bootstrapped, including inestimatable ones from the rank deficient case. """ function parametricbootstrap( rng::AbstractRNG, @@ -234,7 +238,7 @@ function parametricbootstrap( # this seemed to slow things down?! # _copy_away_from_lowerbd!(m.optsum.initial, morig.optsum.final, m.lowerbd; incr=0.05) - β_names = Tuple(Symbol.(fixefnames(morig))) + β_names = Tuple(Symbol.(coefnames(morig))) use_threads && Base.depwarn( "use_threads is deprecated and will be removed in a future release", @@ -247,7 +251,7 @@ function parametricbootstrap( ( objective=ftype.(m.objective), σ=ismissing(m.σ) ? missing : ftype(m.σ), - β=NamedTuple{β_names}(fixef!(βsc, m)), + β=NamedTuple{β_names}(coef!(βsc, m)), se=SVector{p,ftype}(stderror!(βsc, m)), θ=SVector{k,ftype}(getθ!(θsc, m)), ) @@ -331,8 +335,8 @@ end Compute bootstrap confidence intervals for coefficients and variance components, with confidence level level (by default 95%). -The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used -or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`, +The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used +or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`, but `:equaltail` gives the interval that is most comparable to the profile and Wald confidence intervals. !!! note @@ -621,7 +625,7 @@ end function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} """ σρ!(v, t, σ) - push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` + push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` """ dat = t.data for i in axes(dat, 1) From 2beeab5c8677839f19c4d474b9750562f2058a02 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:34:49 -0500 Subject: [PATCH 03/15] ignore version specific manifests --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 119a53c1a..a4fdd21fb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ docs/src/assets/*.svg test/scratch.jl tune.json Manifest.toml +Manifest-v1.*.toml settings.json docs/jmd LocalPreferences.toml From df55dab9d0876451c288da721c1aca8e91889706 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:35:00 -0500 Subject: [PATCH 04/15] slight efficiency tweak --- src/simulate.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/simulate.jl b/src/simulate.jl index 1906ab943..d2dfc63d3 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -163,9 +163,7 @@ function simulate!( if length(β) ≠ length(coef(m)) padding = length(coef(m)) - length(β) - for ii in 1:padding - push!(β, -0.0) - end + append!(β, fill(-0.0, padding)) end # initialize y to standard normal @@ -256,9 +254,7 @@ function _simulate!( if length(β) ≠ length(coef(m)) padding = length(coef(m)) - length(β) - for ii in 1:padding - push!(β, -0.0) - end + append!(β, fill(-0.0, padding)) end fast = (length(m.θ) == length(m.optsum.final)) From 75a4b0c482a3b7dcaf7c83c74a909b8d8e0eca74 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:44:19 -0500 Subject: [PATCH 05/15] coef! fixes --- src/linearmixedmodel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 60e874506..0c39fa7c3 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -268,7 +268,7 @@ end StatsAPI.coef(m::LinearMixedModel{T}) where {T} = coef!(Vector{T}(undef, length(m.feterm.piv)), m) -function coef!(v::Vector{T}, m::LinearMixedModel{T}) where {T} +function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T} piv = m.feterm.piv return invpermute!(fixef!(v, m), piv) end From a9897ac9579a6e259e1cb6bc08eadc2cdeda84df Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 12:48:52 -0500 Subject: [PATCH 06/15] fixef! with different target eltype for GLMM --- src/generalizedlinearmixedmodel.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 6486fd4a0..7b39e48c0 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -113,8 +113,8 @@ StatsAPI.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ) fixef(m::GeneralizedLinearMixedModel) = m.β -function fixef!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where {T} - return copyto!(fill!(v, -zero(T)), m.β) +function fixef!(v::AbstractVector{Tv}, m::GeneralizedLinearMixedModel{T}) where {Tv, T} + return copyto!(fill!(v, -zero(Tv)), m.β) end objective(m::GeneralizedLinearMixedModel) = deviance(m) From 3b2182cabe8c8b44050ec401d02b652cb09bcda8 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:03:35 -0500 Subject: [PATCH 07/15] simulate! tweaks --- src/simulate.jl | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/simulate.jl b/src/simulate.jl index d2dfc63d3..58a93c757 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -110,9 +110,6 @@ scale. The `wts` argument is currently ignored except for `GeneralizedLinearMixedModel` models with a `Binomial` distribution. -!!! warning - Models are assumed to be full rank. - !!! note Note that `simulate!` methods with a `y::AbstractVector` as the first argument (besides the RNG) and `simulate` methods return the simulated response. This is @@ -152,8 +149,7 @@ end function simulate!( rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ ) where {T} - length(β) == length(fixef(m)) || - length(β) == length(coef(m)) || + length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) β = convert(Vector{T}, β) @@ -161,8 +157,8 @@ function simulate!( θ = convert(Vector{T}, θ) isempty(θ) || setθ!(m, θ) - if length(β) ≠ length(coef(m)) - padding = length(coef(m)) - length(β) + if length(β) ≠ length(m.feterm.piv) + padding = length(model.feterm.piv) - m.feterm.rank append!(β, fill(-0.0, padding)) end @@ -232,8 +228,7 @@ function _simulate!( θ, resp=nothing, ) where {T} - length(β) == length(fixef(m)) || - length(β) == length(coef(m)) || + length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) dispersion_parameter(m) || @@ -252,8 +247,8 @@ function _simulate!( d = m.resp.d - if length(β) ≠ length(coef(m)) - padding = length(coef(m)) - length(β) + if length(β) ≠ length(m.feterm.piv) + padding = length(model.feterm.piv) - m.feterm.rank append!(β, fill(-0.0, padding)) end From f4cb66d5dc1f46846de5c69378b467724c6ff98e Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:15:26 -0500 Subject: [PATCH 08/15] add test for rank deficient bootstrap --- test/bootstrap.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/bootstrap.jl b/test/bootstrap.jl index f3a0da076..6683d7a9b 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -189,7 +189,7 @@ end @test pb1 == restorereplicates(seekstart(io), MixedModels.unfit!(deepcopy(m1))) @test pb1 ≈ restorereplicates(seekstart(io), m1, Float16) rtol=1 -end + end @testset "Bernoulli simulate! and GLMM bootstrap" begin contra = dataset(:contra) @@ -220,6 +220,22 @@ end σ=2, progress=false) @test sum(issingular(bs)) == 0 end + + @testset "Rank deficient" begin + rng = MersenneTwister(0); + x = rand(rng, 100); + data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5)) + model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false) + boot = quickboot(model, 10) + + dropped_idx = model.feterm.piv[end] + dropped_coef = coefnames(model)[dropped_idx] + @test all(boot.β) do nt + # if we're the dropped coef, then we must be -0.0 + # need isequal because of -0.0 + return nt.coefname != dropped_coef || isequal(nt.β, -0.0) + end + end end @testset "show and summary" begin @@ -236,7 +252,7 @@ end @test nrow(df) == 151 @test propertynames(df) == collect(propertynames(pr.tbl)) - @testset "CI method comparison" begin + @testset "CI method comparison" begin level = 0.68 ci_boot_equaltail = confint(pb; level, method=:equaltail) ci_boot_shortest = confint(pb; level, method=:shortest) From 13e11c4552b71059240e6e8f09882082d110ca4c Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:15:31 -0500 Subject: [PATCH 09/15] behavior note --- src/bootstrap.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index ee909e8b7..c2985ca44 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -203,7 +203,11 @@ of coverage intervals. !!! note - All coefficients are bootstrapped, including inestimatable ones from the rank deficient case. + All coefficients are bootstrapped. In the rank deficient case, the inestimatable coefficients are + treated as -0.0 in the simulations underlying the bootstrap, which will generally result + in their estimate from the simulated data also being being inestimable and thus set to -0.0. + **However this behavior behavior may change in future releases to explicitly drop the + extraneous columns before simulation and thus not include their estimates in the bootstrap result.** """ function parametricbootstrap( rng::AbstractRNG, From 20c7ed4101b0a6c4cabfe0601a95a3a43dd87399 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:17:50 -0500 Subject: [PATCH 10/15] version bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 30112b4d9..10dac745d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.22.5" +version = "4.23.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" From 3379fd66a73ff7e98c43caaf511fa44ccea940e0 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:18:41 -0500 Subject: [PATCH 11/15] NEWS update --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index e880cbc99..8df424dd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +MixedModels v4.23.0 Release Notes +============================== +* Support for rank deficiency in the parametric bootstrap. [#755] + MixedModels v4.22.5 Release Notes ============================== * Use `muladd` where possible to enable fused multiply-add (FMA) on architectures with hardware support. FMA will generally improve computational speed and gives more accurate rounding. [#740] @@ -504,3 +508,4 @@ Package dependencies [#740]: https://github.com/JuliaStats/MixedModels.jl/issues/740 [#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744 [#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748 +[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755 From 82dd03da6be768d9c3ccbe32eb05b65654d9f853 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:19:10 -0500 Subject: [PATCH 12/15] formatting Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/generalizedlinearmixedmodel.jl | 2 +- src/linearmixedmodel.jl | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 7b39e48c0..7973fecaf 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -113,7 +113,7 @@ StatsAPI.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ) fixef(m::GeneralizedLinearMixedModel) = m.β -function fixef!(v::AbstractVector{Tv}, m::GeneralizedLinearMixedModel{T}) where {Tv, T} +function fixef!(v::AbstractVector{Tv}, m::GeneralizedLinearMixedModel{T}) where {Tv,T} return copyto!(fill!(v, -zero(Tv)), m.β) end diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 0c39fa7c3..558a434e3 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -266,7 +266,9 @@ function StatsAPI.fit( end end -StatsAPI.coef(m::LinearMixedModel{T}) where {T} = coef!(Vector{T}(undef, length(m.feterm.piv)), m) +function StatsAPI.coef(m::LinearMixedModel{T}) where {T} + return coef!(Vector{T}(undef, length(m.feterm.piv)), m) +end function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T} piv = m.feterm.piv From 0fc8d0d426633246a7b9bea08a6080c1f5b8d61a Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:24:20 -0500 Subject: [PATCH 13/15] Update src/bootstrap.jl --- src/bootstrap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index c2985ca44..c186a53f2 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -103,7 +103,7 @@ function restorereplicates( rep = first(Tables.rows(tbl)) allgood = length(rep.θ) == length(m.θ) && - string.(propertynames(rep.β)) == Tuple(fixefnames(m)) + string.(propertynames(rep.β)) == Tuple(coefnames(m)) allgood || throw(ArgumentError("Model is not compatible with saved replicates.")) From 986015365aa2cbc1260e81564a526b3f6e431cfe Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 13:51:04 -0500 Subject: [PATCH 14/15] tweak nightly runner --- .github/workflows/nightly.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 883418267..563a6b81e 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -30,8 +30,7 @@ jobs: version: ${{ matrix.julia-version }} - uses: julia-actions/cache@v1 with: - cache-compiled: "true" - - uses: julia-actions/julia-buildpkg@v1 + cache-compiled: true - uses: julia-actions/julia-runtest@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From cdc40d643dd07d776b34e6a1741c36b3f3697f8e Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 19 Mar 2024 15:04:24 -0500 Subject: [PATCH 15/15] Apply suggestions from code review Co-authored-by: Alex Arslan --- src/bootstrap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index c186a53f2..f5af4730b 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -206,7 +206,7 @@ of coverage intervals. All coefficients are bootstrapped. In the rank deficient case, the inestimatable coefficients are treated as -0.0 in the simulations underlying the bootstrap, which will generally result in their estimate from the simulated data also being being inestimable and thus set to -0.0. - **However this behavior behavior may change in future releases to explicitly drop the + **However this behavior may change in future releases to explicitly drop the extraneous columns before simulation and thus not include their estimates in the bootstrap result.** """ function parametricbootstrap(