Skip to content

Commit

Permalink
Merge branch 'master' of github.com:JuliaStats/GLM.jl into pa/resid
Browse files Browse the repository at this point in the history
  • Loading branch information
palday committed Sep 2, 2023
2 parents e9865be + c13577e commit c3fbc4a
Show file tree
Hide file tree
Showing 16 changed files with 775 additions and 282 deletions.
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/" # Location of package manifests
schedule:
interval: "daily"
4 changes: 2 additions & 2 deletions .github/workflows/CI-future.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ jobs:
arch: ['x64']
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
- uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/CI-stable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ jobs:
arch: ['x64']
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
- uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
Expand All @@ -43,7 +43,7 @@ jobs:
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
if: ${{ matrix.os == 'ubuntu-latest' && matrix.version == '1' && matrix.arch == 'x64' }}
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3
if: ${{ matrix.os == 'ubuntu-latest' && matrix.version == '1' && matrix.arch == 'x64' }}
with:
file: lcov.info
51 changes: 36 additions & 15 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,45 @@
name: CompatHelper

on:
schedule:
- cron: '00 * * * *'

- cron: 0 0 * * *
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
CompatHelper:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.2.0]
julia-arch: [x86]
os: [ubuntu-latest]
runs-on: ubuntu-latest
steps:
- uses: julia-actions/setup-julia@latest
- name: Check if Julia is already available in the PATH
id: julia_in_path
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.julia-version }}
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
version: '1'
arch: ${{ runner.arch }}
if: steps.julia_in_path.outcome != 'success'
- name: "Add the General registry via Git"
run: |
import Pkg
ENV["JULIA_PKG_SERVER"] = ""
Pkg.Registry.add("General")
shell: julia --color=yes {0}
- name: "Install CompatHelper"
run: |
import Pkg
name = "CompatHelper"
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
version = "3"
Pkg.add(; name, uuid, version)
shell: julia --color=yes {0}
- name: "Run CompatHelper"
run: |
import CompatHelper
CompatHelper.main()
shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
2 changes: 1 addition & 1 deletion .github/workflows/Documenter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-docdeploy@latest
env:
Expand Down
26 changes: 24 additions & 2 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,33 @@
name: TagBot
on:
schedule:
- cron: 0 * * * *
issue_comment:
types:
- created
workflow_dispatch:
inputs:
lookback:
default: 3
permissions:
actions: read
checks: read
contents: write
deployments: read
issues: read
discussions: read
packages: read
pages: read
pull-requests: read
repository-projects: read
security-events: read
statuses: read
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
# Edit the following line to reflect the actual name of the GitHub Secret containing your private key
ssh: ${{ secrets.DOCUMENTER_KEY }}
# ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "GLM"
uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
version = "1.8.1"
version = "2.0.0-DEV"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand All @@ -25,9 +25,9 @@ RDatasets = "0.5, 0.6, 0.7"
Reexport = "0.1, 0.2, 1.0"
SpecialFunctions = "0.6, 0.7, 0.8, 0.9, 0.10, 1, 2.0"
StatsAPI = "1.4"
StatsBase = "0.33.5"
StatsBase = "0.33.5, 0.34"
StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0"
StatsModels = "0.6.23"
StatsModels = "0.6.23, 0.7"
Tables = "1"
julia = "1.6"

Expand Down
1 change: 0 additions & 1 deletion docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ fit
StatsBase.deviance
GLM.dispersion
GLM.ftest
GLM.installbeta!
StatsBase.nobs
StatsBase.nulldeviance
StatsBase.predict
Expand Down
123 changes: 123 additions & 0 deletions docs/src/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,129 @@ julia> round.(vcov(ols); digits=5)
0.38889 -0.16667
-0.16667 0.08333
```
By default, the `lm` method uses the Cholesky factorization which is known as fast but numerically unstable, especially for ill-conditioned design matrices. Also, the Cholesky method aggressively detects multicollinearity. You can use the `method` keyword argument to apply a more stable QR factorization method.

```jldoctest
julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]);
julia> ols = lm(@formula(Y ~ X), data; method=:qr)
LinearModel
Y ~ 1 + X
Coefficients:
─────────────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
─────────────────────────────────────────────────────────────────────────
(Intercept) -0.666667 0.62361 -1.07 0.4788 -8.59038 7.25704
X 2.5 0.288675 8.66 0.0732 -1.16797 6.16797
─────────────────────────────────────────────────────────────────────────
```
The following example shows that QR decomposition works better for an ill-conditioned design matrix. The linear model with the QR method is a better model than the linear model with Cholesky decomposition method since the estimated loglikelihood of previous model is higher.
Note that, the condition number of the design matrix is quite high (≈ 3.52e7).

```
julia> X = [-0.4011512997627107 0.6368622664511552;
-0.0808472925693535 0.12835204623364604;
-0.16931095045225217 0.2687956795496601;
-0.4110745650568839 0.6526163576003452;
-0.4035951747670475 0.6407421349445884;
-0.4649907741370211 0.7382129928076485;
-0.15772708898883683 0.25040532268222715;
-0.38144358562952446 0.6055745630707645;
-0.1012787681395544 0.16078875117643368;
-0.2741403589052255 0.4352214984054432];
julia> y = [4.362866166172215,
0.8792840060172619,
1.8414020451091684,
4.470790758717395,
4.3894454833815395,
5.0571760643993455,
1.7154177874916376,
4.148527704012107,
1.1014936742570425,
2.9815131910316097];
julia> modelqr = lm(X, y; method=:qr)
LinearModel
Coefficients:
────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
────────────────────────────────────────────────────────────────
x1 5.00389 0.0560164 89.33 <1e-12 4.87472 5.13307
x2 10.0025 0.035284 283.48 <1e-16 9.92109 10.0838
────────────────────────────────────────────────────────────────
julia> modelchol = lm(X, y; method=:cholesky)
LinearModel
Coefficients:
────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
────────────────────────────────────────────────────────────────
x1 5.17647 0.0849184 60.96 <1e-11 4.98065 5.37229
x2 10.1112 0.053489 189.03 <1e-15 9.98781 10.2345
────────────────────────────────────────────────────────────────
julia> loglikelihood(modelqr) > loglikelihood(modelchol)
true
```
Since the Cholesky method with `dropcollinear = true` aggressively detects multicollinearity,
if you ever encounter multicollinearity in any GLM model with Cholesky,
it is worth trying the same model with QR decomposition.
The following example is taken from `Introductory Econometrics: A Modern Approach, 7e" by Jeffrey M. Wooldridge`.
The dataset is used to study the relationship between firm size—often measured by annual sales—and spending on
research and development (R&D).
The following shows that for the given model,
the Cholesky method detects multicollinearity in the design matrix with `dropcollinear=true`
and hence does not estimate all parameters as opposed to QR.

```jldoctest
julia> y = [9.42190647125244, 2.084805727005, 3.9376676082611, 2.61976027488708, 4.04761934280395, 2.15384602546691,
2.66240668296813, 4.39475727081298, 5.74520826339721, 3.59616208076477, 1.54265284538269, 2.59368276596069,
1.80476510524749, 1.69270837306976, 3.04201245307922, 2.18389105796813, 2.73844122886657, 2.88134002685546,
2.46666669845581, 3.80616021156311, 5.12149810791015, 6.80378007888793, 3.73669862747192, 1.21332454681396,
2.54629635810852, 5.1612901687622, 1.86798071861267, 1.21465551853179, 6.31019830703735, 1.02669405937194,
2.50623273849487, 1.5936255455017];
julia> x = [4570.2001953125, 2830, 596.799987792968, 133.600006103515, 42, 390, 93.9000015258789, 907.900024414062,
19773, 39709, 2936.5, 2513.80004882812, 1124.80004882812, 921.599975585937, 2432.60009765625, 6754,
1066.30004882812, 3199.89990234375, 150, 509.700012207031, 1452.69995117187, 8995, 1212.30004882812,
906.599975585937, 2592, 201.5, 2617.80004882812, 502.200012207031, 2824, 292.200012207031, 7621, 1631.5];
julia> rdchem = DataFrame(rdintens=y, sales=x);
julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:cholesky)
LinearModel
rdintens ~ 1 + sales + :(sales ^ 2)
Coefficients:
───────────────────────────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
───────────────────────────────────────────────────────────────────────────────────────
(Intercept) 0.0 NaN NaN NaN NaN NaN
sales 0.000852509 0.000156784 5.44 <1e-05 0.000532313 0.00117271
sales ^ 2 -1.97385e-8 4.56287e-9 -4.33 0.0002 -2.90571e-8 -1.04199e-8
───────────────────────────────────────────────────────────────────────────────────────
julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:qr)
LinearModel
rdintens ~ 1 + sales + :(sales ^ 2)
Coefficients:
─────────────────────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
─────────────────────────────────────────────────────────────────────────────────
(Intercept) 2.61251 0.429442 6.08 <1e-05 1.73421 3.49082
sales 0.000300571 0.000139295 2.16 0.0394 1.56805e-5 0.000585462
sales ^ 2 -6.94594e-9 3.72614e-9 -1.86 0.0725 -1.45667e-8 6.7487e-10
─────────────────────────────────────────────────────────────────────────────────
```


## Probit regression
```jldoctest
Expand Down
10 changes: 10 additions & 0 deletions src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,23 @@ module GLM
pivoted_cholesky!(A; kwargs...) = cholesky!(A, RowMaximum(); kwargs...)
end

@static if VERSION < v"1.7.0"
pivoted_qr!(A; kwargs...) = qr!(A, Val(true); kwargs...)
else
pivoted_qr!(A; kwargs...) = qr!(A, ColumnNorm(); kwargs...)
end

const COMMON_FIT_KWARGS_DOCS = """
- `dropcollinear::Bool=true`: Controls whether or not a model matrix
less-than-full rank is accepted.
If `true` (the default) the coefficient for redundant linearly dependent columns is
`0.0` and all associated statistics are set to `NaN`.
Typically from a set of linearly-dependent columns the last ones are identified as redundant
(however, the exact selection of columns identified as redundant is not guaranteed).
- `method::Symbol=:cholesky`: Controls which decomposition method to use.
If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used.
If `method=:qr`, then the `QR` decomposition method (which is more stable
but slower) will be used.
- `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
Such weights are equivalent to repeating each observation a number of times equal
to its weight. Do note that this interpretation gives equal point estimates but
Expand Down
3 changes: 3 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ function Base.getproperty(mm::LinPredModel, f::Symbol)
return getfield(mm, f)
end
end

@deprecate installbeta!(p) (p.beta0 .= p.delbeta) false
@deprecate installbeta!(p, f) (p.beta0 .+= p.delbeta .* f) false
Loading

0 comments on commit c3fbc4a

Please sign in to comment.