From e661296696baa77f942e90baabd5219dc5054d9f Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 7 May 2024 09:30:16 +0200 Subject: [PATCH 1/5] Remove white space --- src/ftest.jl | 2 +- src/glmfit.jl | 2 +- src/glmtools.jl | 4 +-- src/linpred.jl | 6 ++--- src/lm.jl | 4 +-- test/runtests.jl | 69 ++++++++++++++++++++++++------------------------ 6 files changed, 44 insertions(+), 43 deletions(-) diff --git a/src/ftest.jl b/src/ftest.jl index b29f6644..97ce89da 100644 --- a/src/ftest.jl +++ b/src/ftest.jl @@ -189,7 +189,7 @@ function show(io::IO, ftr::FTestResult{N}) where N # get rid of negative zero -- doesn't matter mathematically, # but messes up doctests and various other things - # cf. Issue #461 + # cf. Issue #461 r2vals = [replace(@sprintf("%.4f", val), "-0.0000" => "0.0000") for val in ftr.r2] outrows[2, :] = ["[1]", @sprintf("%.0d", ftr.dof[1]), " ", diff --git a/src/glmfit.jl b/src/glmfit.jl index 5406fcbb..bdb70434 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -622,7 +622,7 @@ function fit(::Type{M}, off = offset === nothing ? similar(y, 0) : offset wts = wts === nothing ? similar(y, 0) : wts rr = GlmResp(y, d, l, off, wts) - + if method === :cholesky res = M(rr, cholpred(X, dropcollinear), f, false) elseif method === :qr diff --git a/src/glmtools.jl b/src/glmtools.jl index 2c7c2ea0..544cb1ee 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -501,7 +501,7 @@ dispersion_parameter(::Union{Bernoulli, Binomial, Poisson}) = false """ _safe_int(x::T) - + Convert to Int, when `x` is within 1 eps of an integer. """ function _safe_int(x::T) where {T<:AbstractFloat} @@ -527,7 +527,7 @@ function loglik_obs end loglik_obs(::Bernoulli, y, μ, wt, ϕ) = wt*logpdf(Bernoulli(μ), y) loglik_obs(::Binomial, y, μ, wt, ϕ) = logpdf(Binomial(Int(wt), μ), _safe_int(y*wt)) loglik_obs(::Gamma, y, μ, wt, ϕ) = wt*logpdf(Gamma(inv(ϕ), μ*ϕ), y) -# In Distributions.jl, a Geometric distribution characterizes the number of failures before +# In Distributions.jl, a Geometric distribution characterizes the number of failures before # the first success in a sequence of independent Bernoulli trials with success rate p. # The mean of Geometric distribution is (1 - p) / p. # Hence, p = 1 / (1 + μ). diff --git a/src/linpred.jl b/src/linpred.jl index 9973d5d2..9c84d8b9 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -38,7 +38,7 @@ mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY, QRPivoted}} <: Dens scratchbeta::Vector{T} qr::Q scratchm1::Matrix{T} - + function DensePredQR(X::AbstractMatrix, pivot::Bool=false) n, p = size(X) T = typeof(float(zero(eltype(X)))) @@ -74,7 +74,7 @@ function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) X = p.X W = Diagonal(wt) - sqrtW = Diagonal(sqrt.(wt)) + sqrtW = Diagonal(sqrt.(wt)) mul!(p.scratchm1, sqrtW, X) mul!(p.delbeta, X'W, r) qnr = qr(p.scratchm1) @@ -88,7 +88,7 @@ function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal if rnk == length(p.delbeta) p.delbeta = p.qr\r else - R = @view p.qr.R[:, 1:rnk] + R = @view p.qr.R[:, 1:rnk] Q = @view p.qr.Q[:, 1:size(R, 1)] piv = p.qr.p p.delbeta = zeros(size(p.delbeta)) diff --git a/src/lm.jl b/src/lm.jl index f337862c..f26fb8e5 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -136,7 +136,7 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - + if method === :cholesky fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), nothing)) elseif method === :qr @@ -299,7 +299,7 @@ function StatsModels.predict!(res::Union{AbstractVector, length(res) == size(newx, 1) || throw(DimensionMismatch("length of `res` must equal the number of rows in `newx`")) res .= newx * coef(mm) - elseif mm.pp isa DensePredChol && + elseif mm.pp isa DensePredChol && mm.pp.chol isa CholeskyPivoted && mm.pp.chol.rank < size(mm.pp.chol, 2) throw(ArgumentError("prediction intervals are currently not implemented " * diff --git a/test/runtests.jl b/test/runtests.jl index 235c3745..8a326a48 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using CategoricalArrays, CSV, DataFrames, LinearAlgebra, SparseArrays, StableRNG using GLM using StatsFuns: logistic using Distributions: TDist +using Downloads test_show(x) = show(IOBuffer(), x) @@ -28,7 +29,7 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod) test_show(lm1) @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) - + @test isapprox(vcov(lm1), Σ) @test isapprox(cor(lm1), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2)) @test dof(lm1) == 3 @@ -72,14 +73,14 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y end @testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr) - st_df = DataFrame( + st_df = DataFrame( Y=[6.4, 7.4, 10.4, 15.1, 12.3 , 11.4], XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5], - XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8], + XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8], XC=[3., 13., 23., 39.8, 34., 31.], # values from SAS proc reg - CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, 0.0875726457, 0.1331183932], - CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, 0.0294348488, 0.0691589296, 0.0273045538], + CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, 0.0875726457, 0.1331183932], + CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, 0.0294348488, 0.0691589296, 0.0273045538], CooksD_multi=[1.7122291956, 18.983407026, 0.000118078, 0.8470797843, 0.0715921999, 0.1105843157], ) @@ -87,7 +88,7 @@ end t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) - # linear regression, no intercept + # linear regression, no intercept t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod) @test isapprox(st_df.CooksD_noint, cooksdistance(t_lm_noint)) @@ -102,12 +103,12 @@ end @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) end -@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) +@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - + lm_model = lm(f, df, wts = df.weights; method=dmethod) glm_model = glm(f, df, Normal(), wts = df.weights; method=dmethod) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @@ -115,7 +116,7 @@ end @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @test isapprox(r2(lm_model), 0.8330258148644486) @test isapprox(adjr2(lm_model), 0.832788298242634) - @test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + @test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; -0.06772589439264813 6.670664781664879e-5]) @test isapprox(first(predict(lm_model)), 357.57694841780994) @test isapprox(loglikelihood(lm_model), -4353.946729075838) @@ -153,7 +154,7 @@ end X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) - + m1 = fit(LinearModel, X, y; method=dmethod) @test isapprox(deviance(m1), 0.12160301538297297) Xmissingcell = X[inds, :] @@ -170,9 +171,9 @@ end @test isa(m2p.pp.chol, CholeskyPivoted) @test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, 3.9661379199401257, 5.079410103608552, 6.1944618141188625, - 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, 10.84972230524356, 11.844809275711485]) - + @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) elseif dmethod == :qr @@ -180,13 +181,13 @@ end method = dmethod, dropcollinear=false) @test isapprox(coef(m2p), [0.9772643585228962, 11.889730016918342, 3.027347397503282, 3.9661379199401177, 5.079410103608539, 6.194461814118862, - -2.9863884084219015, 7.930328728005132, 8.87999491860477, + -2.9863884084219015, 7.930328728005132, 8.87999491860477, 0.0, 10.849722305243555, 11.844809275711487]) || isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, 3.9661379199401257, 5.079410103608552, 6.1944618141188625, - 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, 10.84972230524356, 11.844809275711485]) - + @test isa(m2p.pp.qr, QRPivoted) @test isa(m2p_dep_pos.pp.qr, QRPivoted) @@ -198,7 +199,7 @@ end @test GLM.linpred_rank(m2p.pp) == 11 @test isapprox(deviance(m2p), 0.1215758392280204) - + @test GLM.linpred_rank(m2p_dep_pos.pp) == GLM.linpred_rank(m2p.pp) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @test isapprox(coef(m2p_dep_pos), coef(m2p)) @@ -210,7 +211,7 @@ end @testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr) df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) - + model = lm(@formula(y ~ x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @@ -279,7 +280,7 @@ end # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat data = DataFrame(x = 60:70, y = 130:140) - + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [2.07438016528926] @test stderror(mdl) ≈ [0.165289256198347E-01] @@ -303,7 +304,7 @@ end # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [0.727272727272727] @test stderror(mdl) ≈ [0.420827318078432E-01] @@ -324,7 +325,7 @@ end X = [4 5 6]' y = [3, 4, 4] data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - + mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod) mdl2 = lm(X, y) @@ -829,8 +830,8 @@ end @test aic(gm22) ≈ 1112.7422553284146 @test aicc(gm22) ≈ 1113.7933502189255 @test bic(gm22) ≈ 1136.6111083020812 - @test coef(gm22)[1:7] ≈ [2.8978546663153897, -0.5701067649409168, 0.08040181505082235, - -0.4497584898742737, 0.08622664933901254, 0.3558996662512287, + @test coef(gm22)[1:7] ≈ [2.8978546663153897, -0.5701067649409168, 0.08040181505082235, + -0.4497584898742737, 0.08622664933901254, 0.3558996662512287, 0.29016080736927813] @test stderror(gm22) ≈ [0.22754287093719366, 0.15274755092180423, 0.15928431669166637, 0.23853372776980591, 0.2354231414867577, 0.24750780320597515, @@ -1423,7 +1424,7 @@ end ──────────────────────────────────────────────────────────────── DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F) ──────────────────────────────────────────────────────────────── - [1] 2 3.2292 0.0000 + [1] 2 3.2292 0.0000 [2] 3 1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7 ────────────────────────────────────────────────────────────────""" end @@ -1870,7 +1871,7 @@ end end @testset "dropcollinear in GLM with Cholesky" begin - data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], + data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], x3=[4.2, 4.6, 8.4, 6.2, 4.2], y=[14, 14, 24, 20, 11]) @testset "Check normal with identity link against equivalent linear model" begin @@ -2019,7 +2020,7 @@ end # see issue 503 y, μ, wt, ϕ = 0.6376811594202898, 0.8492925285671102, 69.0, NaN # due to floating point: - # 1. y * wt == 43.99999999999999 + # 1. y * wt == 43.99999999999999 # 2. 44 / y == wt # 3. 44 / wt == y @test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) ≈ GLM.logpdf(Binomial(Int(wt), μ), 44) @@ -2036,8 +2037,8 @@ end # > data(Duncan) # > lm1 = lm(prestige ~ 1 + income + education, Duncan) # > vif(lm1) - # income education - # 2.1049 2.1049 + # income education + # 2.1049 2.1049 # > lm2 = lm(prestige ~ 1 + income + education + type, Duncan) # > vif(lm2) # GVIF Df GVIF^(1/(2*Df)) @@ -2048,26 +2049,26 @@ end lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan) @test termnames(lm1)[2] == coefnames(lm1) @test vif(lm1) ≈ gvif(lm1) - + lm1_noform = lm(modelmatrix(lm1), response(lm1)) @test vif(lm1) ≈ vif(lm1_noform) @test_throws ArgumentError("model was fitted without a formula") gvif(lm1_noform) - + lm1log = lm(@formula(Prestige ~ 1 + exp(log(Income)) + exp(log(Education))), duncan) @test termnames(lm1log)[2] == coefnames(lm1log) == ["(Intercept)", "exp(log(Income))", "exp(log(Education))"] @test vif(lm1) ≈ vif(lm1log) - + gm1 = glm(modelmatrix(lm1), response(lm1), Normal()) @test vif(lm1) ≈ vif(gm1) - + lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan) @test termnames(lm2)[2] != coefnames(lm2) @test gvif(lm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4 - + gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal()) @test termnames(gm2)[2] != coefnames(gm2) - @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4 - + @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4 + # the VIF definition depends on modelmatrix, vcov and stderror returning valid # values. It doesn't care about links, offsets, etc. as long as the model matrix, # vcov matrix and stderrors are well defined. From 10f8528865922b455255956f0826560c1e707c61 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 7 May 2024 13:04:19 +0200 Subject: [PATCH 2/5] Fix QR based fitting Avoid erroring out for low rank design matrices when dropcollinear=false. Avoid unnecessary triangular solves. Avoid indexing in the Q. Avoid slicing R matrix in a way that triggers a minimum norm solution. Remove unnecessary temporaries. --- Project.toml | 3 +- src/linpred.jl | 76 ++++++++++++++++++------------------------------ test/runtests.jl | 21 +++++++++++-- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/Project.toml b/Project.toml index 02d75dd4..ddd8ca51 100644 --- a/Project.toml +++ b/Project.toml @@ -36,9 +36,10 @@ julia = "1.6" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["CategoricalArrays", "CSV", "DataFrames", "RDatasets", "StableRNGs", "Test"] +test = ["CategoricalArrays", "CSV", "DataFrames", "Downloads", "RDatasets", "StableRNGs", "Test"] diff --git a/src/linpred.jl b/src/linpred.jl index 9c84d8b9..19e82aa1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -63,23 +63,19 @@ function delbeta! end function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where T<:BlasReal rnk = rank(p.qr.R) - rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) - p.delbeta = p.qr\r - mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + p.delbeta = p.qr \ r return p end function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal rnk = rank(p.qr.R) - rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) X = p.X W = Diagonal(wt) sqrtW = Diagonal(sqrt.(wt)) mul!(p.scratchm1, sqrtW, X) - mul!(p.delbeta, X'W, r) - qnr = qr(p.scratchm1) - Rinv = inv(qnr.R) - p.delbeta = Rinv * Rinv' * p.delbeta + ỹ = sqrtW * r + p.qr = qr!(p.scratchm1) + p.delbeta = p.qr \ ỹ return p end @@ -88,44 +84,32 @@ function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal if rnk == length(p.delbeta) p.delbeta = p.qr\r else - R = @view p.qr.R[:, 1:rnk] - Q = @view p.qr.Q[:, 1:size(R, 1)] + R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) piv = p.qr.p - p.delbeta = zeros(size(p.delbeta)) - p.delbeta[1:rnk] = R \ Q'r + fill!(p.delbeta, 0) + p.delbeta[1:rnk] = R \ view(p.qr.Q'r, 1:rnk) invpermute!(p.delbeta, piv) end - mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) return p end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - rnk = rank(p.qr.R) X = p.X W = Diagonal(wt) sqrtW = Diagonal(sqrt.(wt)) - delbeta = p.delbeta - scratchm2 = similar(X, T) mul!(p.scratchm1, sqrtW, X) - mul!(scratchm2, W, X) - mul!(delbeta, transpose(scratchm2), r) - - if rnk == length(p.delbeta) - qnr = qr(p.scratchm1) - Rinv = inv(qnr.R) - p.delbeta = Rinv * Rinv' * delbeta - else - qnr = pivoted_qr!(copy(p.scratchm1)) - R = @view qnr.R[1:rnk, 1:rnk] - Rinv = inv(R) - piv = qnr.p - permute!(delbeta, piv) - for k=(rnk+1):length(delbeta) - delbeta[k] = -zero(T) - end - p.delbeta[1:rnk] = Rinv * Rinv' * view(delbeta, 1:rnk) - invpermute!(delbeta, piv) + r̃ = sqrtW * r + + p.qr = pivoted_qr!(copy(p.scratchm1)) + rnk = rank(p.qr.R) # FIXME! Don't use svd for this + R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) + permute!(p.delbeta, p.qr.p) + for k = (rnk + 1):length(p.delbeta) + p.delbeta[k] = -zero(T) end + p.delbeta[1:rnk] = R \ (p.qr.Q'*r̃)[1:rnk] + invpermute!(p.delbeta, p.qr.p) + return p end @@ -279,27 +263,25 @@ end LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol -function invqr(x::DensePredQR{T,<: QRCompactWY}) where T - Q,R = qr(x.scratchm1) - Rinv = inv(R) +function invqr(p::DensePredQR{T,<: QRCompactWY}) where T + Rinv = inv(p.qr.R) Rinv*Rinv' end -function invqr(x::DensePredQR{T,<: QRPivoted}) where T - Q,R,pv = pivoted_qr!(copy(x.scratchm1)) - rnk = rank(R) - p = length(x.delbeta) - if rnk == p - Rinv = inv(R) +function invqr(p::DensePredQR{T,<: QRPivoted}) where T + rnk = rank(p.qr.R) + k = length(p.delbeta) + if rnk == k + Rinv = inv(p.qr.R) xinv = Rinv*Rinv' - ipiv = invperm(pv) + ipiv = invperm(p.qr.p) return xinv[ipiv, ipiv] else - Rsub = R[1:rnk, 1:rnk] + Rsub = UpperTriangular(view(p.qr.R, 1:rnk, 1:rnk)) RsubInv = inv(Rsub) - xinv = fill(convert(T, NaN), (p,p)) + xinv = fill(convert(T, NaN), (k, k)) xinv[1:rnk, 1:rnk] = RsubInv*RsubInv' - ipiv = invperm(pv) + ipiv = invperm(p.qr.p) return xinv[ipiv, ipiv] end end diff --git a/test/runtests.jl b/test/runtests.jl index 8a326a48..d345672a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -177,8 +177,8 @@ end @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) elseif dmethod == :qr - @test_throws RankDeficientException m2 = fit(LinearModel, Xmissingcell, ymissingcell; - method = dmethod, dropcollinear=false) + @test fit(LinearModel, Xmissingcell, ymissingcell; + method = dmethod, dropcollinear=false) isa LinearModel @test isapprox(coef(m2p), [0.9772643585228962, 11.889730016918342, 3.027347397503282, 3.9661379199401177, 5.079410103608539, 6.194461814118862, -2.9863884084219015, 7.930328728005132, 8.87999491860477, @@ -2073,3 +2073,20 @@ end # values. It doesn't care about links, offsets, etc. as long as the model matrix, # vcov matrix and stderrors are well defined. end + +@testset "NIST - Filip. Issue 558" begin + fn = Downloads.download("https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Filip.dat") + filip_estimates_df = CSV.read(fn, DataFrame; skipto = 31, limit = 11, header = ["parameter", "estimate", "se"], delim = " ", ignorerepeated = true) + filip_data_df = CSV.read(fn, DataFrame; skipto = 61, header = ["y", "x"], delim = " ", ignorerepeated = true) + X = [filip_data_df.x[i]^j for i in 1:length(filip_data_df.x), j in 0:10] + + # No weights + f1 = lm(X, filip_data_df.y, dropcollinear = false, method = :qr) + @test coef(f1) ≈ filip_estimates_df.estimate rtol = 1e-7 + @test stderror(f1) ≈ filip_estimates_df.se rtol = 1e-7 + + # Weights + f2 = lm(X, filip_data_df.y, dropcollinear = false, method = :qr, wts = ones(length(filip_data_df.y))) + @test coef(f2) ≈ filip_estimates_df.estimate rtol = 1e-7 + @test stderror(f2) ≈ filip_estimates_df.se rtol = 1e-7 +end \ No newline at end of file From fffd07d55819203c25832ed01d15f732ae585c03 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 5 Nov 2024 14:31:22 +0100 Subject: [PATCH 3/5] Address review comments --- src/linpred.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 19e82aa1..bb06fa17 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -62,18 +62,16 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res function delbeta! end function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where T<:BlasReal - rnk = rank(p.qr.R) p.delbeta = p.qr \ r return p end function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - rnk = rank(p.qr.R) X = p.X - W = Diagonal(wt) - sqrtW = Diagonal(sqrt.(wt)) + wtsqrt = sqrt.(wt) + sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) - ỹ = sqrtW * r + ỹ = (wtsqrt .*= r) # to reuse wtsqrt's memory p.qr = qr!(p.scratchm1) p.delbeta = p.qr \ ỹ return p @@ -82,7 +80,7 @@ end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal rnk = rank(p.qr.R) if rnk == length(p.delbeta) - p.delbeta = p.qr\r + p.delbeta = p.qr \ r else R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) piv = p.qr.p @@ -96,18 +94,19 @@ end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal X = p.X W = Diagonal(wt) - sqrtW = Diagonal(sqrt.(wt)) + wtsqrt = sqrt.(wt) + sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) - r̃ = sqrtW * r + r̃ = (wtsqrt .*= r) # to reuse wtsqrt's memory - p.qr = pivoted_qr!(copy(p.scratchm1)) + p.qr = pivoted_qr!(p.scratchm1) rnk = rank(p.qr.R) # FIXME! Don't use svd for this R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) permute!(p.delbeta, p.qr.p) for k = (rnk + 1):length(p.delbeta) - p.delbeta[k] = -zero(T) + p.delbeta[k] = zero(T) end - p.delbeta[1:rnk] = R \ (p.qr.Q'*r̃)[1:rnk] + p.delbeta[1:rnk] = R \ view(p.qr.Q'*r̃, 1:rnk) invpermute!(p.delbeta, p.qr.p) return p From 0c95f73c057d33c585e796f1e58f7d73d67a33fe Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 5 Nov 2024 15:09:14 +0100 Subject: [PATCH 4/5] Try to run docs on lts --- .github/workflows/Documenter.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 39582f93..28ee5b8a 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -14,8 +14,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: julia-actions/julia-buildpkg@latest - - uses: julia-actions/julia-docdeploy@latest + - uses: julia-actions/setup-julia@v2 + with: + version: 'lts' + show-versioninfo: true + - uses: julia-actions/julia-docdeploy@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} From 2b61e906e708cc89248153b2e60e288976d629da Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 5 Nov 2024 17:13:18 +0100 Subject: [PATCH 5/5] Use a "mirror" for the test data --- test/runtests.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index d345672a..11f6b0bb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2075,7 +2075,8 @@ end end @testset "NIST - Filip. Issue 558" begin - fn = Downloads.download("https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Filip.dat") + # Since "https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Filip.dat" seems to block download, we'll use a "mirror" + fn = Downloads.download("https://gist.githubusercontent.com/andreasnoack/9dadfb922fc45ca52bc7b2659b7c5b67/raw/f1dcdd2179b606ecbb7e23316da1ede971e7ffe8/Filip.dat") filip_estimates_df = CSV.read(fn, DataFrame; skipto = 31, limit = 11, header = ["parameter", "estimate", "se"], delim = " ", ignorerepeated = true) filip_data_df = CSV.read(fn, DataFrame; skipto = 61, header = ["y", "x"], delim = " ", ignorerepeated = true) X = [filip_data_df.x[i]^j for i in 1:length(filip_data_df.x), j in 0:10]