From f067f5d4218239716c5dc59250aaa296cca9fbb5 Mon Sep 17 00:00:00 2001 From: Mousum Dutta <44145580+mousum-github@users.noreply.github.com> Date: Thu, 4 May 2023 12:26:04 +0530 Subject: [PATCH] `QR` decomposition method in GLM. (#507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * PowerLink refers to a class of techniques that use a power function * added a few more testcases * Fixing doctests for PowerLink * Making subheading smaller than heading. * Rounding off a test case result to 2 digits Test was failing in Julia Nightly as: 1) GLM.linkinv(InverseLink(), 10) was 0.01 while GLM.linkinv(PowerLink(-1), 10) was 0.010000000000000002 2) GLM.linkinv(InverseSquareLink(), 10) was -10.01 while GLM.linkinv(PowerLink(-2), 10) was -0.010000000000000002 Rounding off to 2 digits should solve this. Note: These tests were passing in other versions of Julia. * adding Optim dependancy to docs * No need to have optim as a dependancy for the entire package. Only need it for the docs * Rounding off GLM.linkinv(PowerLink(-1), 10) and GLM.linkinv(PowerLink(-2), 10 to 2 digits * Rounding off GLM.mueta(PowerLink(-1), 10) to make the test work on Julia Nightly * Using inexact equality comparison instead of rounding. * Apply suggestions from code review Correcting order of PowerLink, ProbitLink, SqrtLink. Co-authored-by: Milan Bouchet-Valat * Apply suggestions from code review Using ≈ instead of isapprox. Co-authored-by: Milan Bouchet-Valat * Apply suggestions from code review Using alphabetical order in references. Co-authored-by: Milan Bouchet-Valat * Update docs/src/examples.md Writing the example description in plain text. Co-authored-by: Milan Bouchet-Valat * Update docs/src/examples.md Removing extra space to be consistent with the style. Co-authored-by: Milan Bouchet-Valat * Adding the missing comma. * Update src/glmtools.jl Using a better variable name for 1 by lambda. Co-authored-by: Milan Bouchet-Valat * Update src/glmtools.jl Using inline function. Co-authored-by: Milan Bouchet-Valat * Apply suggestions from code review Making the doctest code concise. Co-authored-by: Milan Bouchet-Valat * Follwing alphabetical order in references * Adding suggested changes by nalimilan * Update test/runtests.jl Removing the R code. Co-authored-by: Milan Bouchet-Valat * Update test/runtests.jl Removing test of hashes. Co-authored-by: Milan Bouchet-Valat * replaced `isapprox` function by `≈` symbol whereever possible, added few more metrics to test, also replaced all `=` by `≈` for real numbers * Update src/glmfit.jl Since we are storing the link - the `GLM.Link` function can be defined uniformly for all link functions Co-authored-by: Milan Bouchet-Valat * Update src/glmfit.jl Changing the sentence to make it compact Co-authored-by: Milan Bouchet-Valat * updated test cases, added a few more metrics to check, move link member after d (distribution) and changed in corresponding constructor, remove updateμ! function for PowerLink and modified general updateμ! function. * Rephrased the description of PowerLink * Update docs/src/examples.md Adding break line and removing full stop Co-authored-by: Milan Bouchet-Valat * Removed `GLM.Link(mm::AbstractGLM) = mm.l`, redefine defination of PowerLink and updated test cases * update test case * Update src/glmtools.jl The suggestion is committed. Co-authored-by: Milan Bouchet-Valat * changed the `inverselink` function for PowerLink as suggested in PR * Add files via upload NIST datasets for testing linear models * added qrpred * use qrpred instead of cholpred * removed tests which use chol * added qr decomp with weights * added dropcollinear for qr decomp * added pivot cholesky for pivot qr * added dof for qr * added chol for qr * updated test cases related to QR method * added float conversion * fixed test * updated QR decompositions * removed redundant functions * updating documention * fixed bug * refactored DensePredQR constructor * changed DensePredQR constructor to take Abstract types * removed unused functions * removed unused functions * updated doc string * conditional pivoting in QR * conditional QR pivoting * updated example of Lineam model with QR decomposotion * removed nasty.csv file and commented code * Update src/GLM.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/lm.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * some changes suggested by @nalimilan * Changed pivoted_qr to pivoted_qr! * Changed :stable to :qr and :fast to cholesky * Changed in predict function in lm for qr method, also added some testcases * re arranged all tests related to linear models (Cholesky and QR * An intermidiate commit after merging with master branch * without predict with qr test case. need to re-write predict function in lm * updated example.md file * Added test cases with predictions for QR method * Removed some comments * removed comments, replace Rsub\I by inv(Rsub) * Update test/runtests.jl Co-authored-by: Milan Bouchet-Valat * Update test/runtests.jl Co-authored-by: Milan Bouchet-Valat * Update test/runtests.jl Co-authored-by: Milan Bouchet-Valat * Update test/runtests.jl Co-authored-by: Milan Bouchet-Valat * Update src/GLM.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/lm.jl Co-authored-by: Milan Bouchet-Valat * Update src/lm.jl Co-authored-by: Milan Bouchet-Valat * Update src/lm.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/lm.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Update test/runtests.jl Co-authored-by: Milan Bouchet-Valat * updated test cases, qrpred method * removed extra lines in lm file * updated predict! function in lm in compact form * Optimizing the performnce for full rank matrix * changed outer constructor to inner * Incorporated QR decomposition method in glm related functions * NegativeBinomial Parameter estimation test is failing in a few system. Trying a different minstepfac value * NegativeBinomial Parameter estimation test is failing in a few system. Trying another smaller different minstepfac value * NegativeBinomial Parameter estimation test is failing in a few systemse * added one more example to show when QR method works better * update for doc test failure * Fixing doc test * Fixing doc test failure * Fixing doc test failure * Fixing for doc test failure * updated example for :qr method * Updated example for QR decomposition * Updated example for QR decomposition * Updated example for QR decomposition * Updated instead of generic error * Trying to resolve the conflict * Updated example to show QR performs better * Update src/GLM.jl Co-authored-by: Milan Bouchet-Valat * Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat * Updated test cases. Test cases with cholesky and qr methods are written in compact form * Updated test case due to different pivot in different systems * replaced equality check to egality check in glmfit * updated example and added doctest * updated example without doctest * Avoiding multiple copyies of design matrix * added one more example in qr vs cholesky * updated example, remove unused constructor and added one more test case * updated example to pass doctest * updated example to pass doctest * updated example to pass doctest * updated example to pass doctest * example * updated example with rdchem data * Update docs/src/examples.md Co-authored-by: Bogumił Kamiński --------- Co-authored-by: Mousum Co-authored-by: ayushpatnaikgit Co-authored-by: Milan Bouchet-Valat Co-authored-by: Ayush Patnaik Co-authored-by: harsharora21 Co-authored-by: Bogumił Kamiński (cherry picked from commit a8016bd7b755306b4854742a2915b470fa51ba02) --- docs/src/examples.md | 123 ++++++++++ src/GLM.jl | 6 + src/glmfit.jl | 17 +- src/linpred.jl | 179 ++++++++++++-- src/lm.jl | 61 +++-- src/negbinfit.jl | 13 +- test/runtests.jl | 558 ++++++++++++++++++++++++++++--------------- 7 files changed, 707 insertions(+), 250 deletions(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index 338d01c0..edbfdacf 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -86,6 +86,129 @@ julia> round.(vcov(ols); digits=5) 0.38889 -0.16667 -0.16667 0.08333 ``` +By default, the `lm` method uses the Cholesky factorization which is known as fast but numerically unstable, especially for ill-conditioned design matrices. Also, the Cholesky method aggressively detects multicollinearity. You can use the `method` keyword argument to apply a more stable QR factorization method. + +```jldoctest +julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]); + +julia> ols = lm(@formula(Y ~ X), data; method=:qr) +LinearModel + +Y ~ 1 + X + +Coefficients: +───────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────── +(Intercept) -0.666667 0.62361 -1.07 0.4788 -8.59038 7.25704 +X 2.5 0.288675 8.66 0.0732 -1.16797 6.16797 +───────────────────────────────────────────────────────────────────────── +``` +The following example shows that QR decomposition works better for an ill-conditioned design matrix. The linear model with the QR method is a better model than the linear model with Cholesky decomposition method since the estimated loglikelihood of previous model is higher. +Note that, the condition number of the design matrix is quite high (≈ 3.52e7). + +``` +julia> X = [-0.4011512997627107 0.6368622664511552; + -0.0808472925693535 0.12835204623364604; + -0.16931095045225217 0.2687956795496601; + -0.4110745650568839 0.6526163576003452; + -0.4035951747670475 0.6407421349445884; + -0.4649907741370211 0.7382129928076485; + -0.15772708898883683 0.25040532268222715; + -0.38144358562952446 0.6055745630707645; + -0.1012787681395544 0.16078875117643368; + -0.2741403589052255 0.4352214984054432]; + +julia> y = [4.362866166172215, + 0.8792840060172619, + 1.8414020451091684, + 4.470790758717395, + 4.3894454833815395, + 5.0571760643993455, + 1.7154177874916376, + 4.148527704012107, + 1.1014936742570425, + 2.9815131910316097]; + +julia> modelqr = lm(X, y; method=:qr) +LinearModel + +Coefficients: +──────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +──────────────────────────────────────────────────────────────── +x1 5.00389 0.0560164 89.33 <1e-12 4.87472 5.13307 +x2 10.0025 0.035284 283.48 <1e-16 9.92109 10.0838 +──────────────────────────────────────────────────────────────── + +julia> modelchol = lm(X, y; method=:cholesky) +LinearModel + +Coefficients: +──────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +──────────────────────────────────────────────────────────────── +x1 5.17647 0.0849184 60.96 <1e-11 4.98065 5.37229 +x2 10.1112 0.053489 189.03 <1e-15 9.98781 10.2345 +──────────────────────────────────────────────────────────────── + +julia> loglikelihood(modelqr) > loglikelihood(modelchol) +true +``` +Since the Cholesky method with `dropcollinear = true` aggressively detects multicollinearity, +if you ever encounter multicollinearity in any GLM model with Cholesky, +it is worth trying the same model with QR decomposition. +The following example is taken from `Introductory Econometrics: A Modern Approach, 7e" by Jeffrey M. Wooldridge`. +The dataset is used to study the relationship between firm size—often measured by annual sales—and spending on +research and development (R&D). +The following shows that for the given model, +the Cholesky method detects multicollinearity in the design matrix with `dropcollinear=true` +and hence does not estimate all parameters as opposed to QR. + +```jldoctest +julia> y = [9.42190647125244, 2.084805727005, 3.9376676082611, 2.61976027488708, 4.04761934280395, 2.15384602546691, + 2.66240668296813, 4.39475727081298, 5.74520826339721, 3.59616208076477, 1.54265284538269, 2.59368276596069, + 1.80476510524749, 1.69270837306976, 3.04201245307922, 2.18389105796813, 2.73844122886657, 2.88134002685546, + 2.46666669845581, 3.80616021156311, 5.12149810791015, 6.80378007888793, 3.73669862747192, 1.21332454681396, + 2.54629635810852, 5.1612901687622, 1.86798071861267, 1.21465551853179, 6.31019830703735, 1.02669405937194, + 2.50623273849487, 1.5936255455017]; + +julia> x = [4570.2001953125, 2830, 596.799987792968, 133.600006103515, 42, 390, 93.9000015258789, 907.900024414062, + 19773, 39709, 2936.5, 2513.80004882812, 1124.80004882812, 921.599975585937, 2432.60009765625, 6754, + 1066.30004882812, 3199.89990234375, 150, 509.700012207031, 1452.69995117187, 8995, 1212.30004882812, + 906.599975585937, 2592, 201.5, 2617.80004882812, 502.200012207031, 2824, 292.200012207031, 7621, 1631.5]; + +julia> rdchem = DataFrame(rdintens=y, sales=x); + +julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:cholesky) +LinearModel + +rdintens ~ 1 + sales + :(sales ^ 2) + +Coefficients: +─────────────────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────────────────── +(Intercept) 0.0 NaN NaN NaN NaN NaN +sales 0.000852509 0.000156784 5.44 <1e-05 0.000532313 0.00117271 +sales ^ 2 -1.97385e-8 4.56287e-9 -4.33 0.0002 -2.90571e-8 -1.04199e-8 +─────────────────────────────────────────────────────────────────────────────────────── + +julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:qr) +LinearModel + +rdintens ~ 1 + sales + :(sales ^ 2) + +Coefficients: +───────────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────────────── +(Intercept) 2.61251 0.429442 6.08 <1e-05 1.73421 3.49082 +sales 0.000300571 0.000139295 2.16 0.0394 1.56805e-5 0.000585462 +sales ^ 2 -6.94594e-9 3.72614e-9 -1.86 0.0725 -1.45667e-8 6.7487e-10 +───────────────────────────────────────────────────────────────────────────────── +``` + ## Probit regression ```jldoctest diff --git a/src/GLM.jl b/src/GLM.jl index 4b83d5dc..c69efd97 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -89,6 +89,12 @@ module GLM pivoted_cholesky!(A; kwargs...) = cholesky!(A, RowMaximum(); kwargs...) end + @static if VERSION < v"1.7.0" + pivoted_qr!(A; kwargs...) = qr!(A, Val(true); kwargs...) + else + pivoted_qr!(A; kwargs...) = qr!(A, ColumnNorm(); kwargs...) + end + include("linpred.jl") include("lm.jl") include("glmtools.jl") diff --git a/src/glmfit.jl b/src/glmfit.jl index 33f744e2..2c625d92 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -283,7 +283,8 @@ function nulldeviance(m::GeneralizedLinearModel) X = fill(1.0, length(y), hasint ? 1 : 0) nullm = fit(GeneralizedLinearModel, X, y, d, r.link; wts=wts, offset=offset, - dropcollinear=isa(m.pp.chol, CholeskyPivoted), + dropcollinear=ispivoted(m.pp), + method=decomposition_method(m.pp), maxiter=m.maxiter, minstepfac=m.minstepfac, atol=m.atol, rtol=m.rtol) dev = deviance(nullm) @@ -338,7 +339,8 @@ function nullloglikelihood(m::GeneralizedLinearModel) X = fill(1.0, length(y), hasint ? 1 : 0) nullm = fit(GeneralizedLinearModel, X, y, d, r.link; wts=wts, offset=offset, - dropcollinear=isa(m.pp.chol, CholeskyPivoted), + dropcollinear=ispivoted(m.pp), + method=decomposition_method(m.pp), maxiter=m.maxiter, minstepfac=m.minstepfac, atol=m.atol, rtol=m.rtol) ll = loglikelihood(nullm) @@ -570,6 +572,7 @@ function fit(::Type{M}, l::Link = canonicallink(d); dropcollinear::Bool = true, dofit::Bool = true, + method::Symbol = :cholesky, wts::AbstractVector{<:Real} = similar(y, 0), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} @@ -580,7 +583,15 @@ function fit(::Type{M}, end rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X, dropcollinear), false) + + if method === :cholesky + res = M(rr, cholpred(X, dropcollinear), false) + elseif method === :qr + res = M(rr, qrpred(X, dropcollinear), false) + else + throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) + end + return dofit ? fit!(res; fitargs...) : res end diff --git a/src/linpred.jl b/src/linpred.jl index 770e4062..25425c78 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -35,7 +35,7 @@ end """ DensePredQR -A `LinPred` type with a dense, unpivoted QR decomposition of `X` +A `LinPred` type with a dense QR decomposition of `X` # Members @@ -43,28 +43,32 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `beta0`: base coefficient vector of length `p` - `delbeta`: increment to coefficient vector, also of length `p` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method -- `qr`: a `QRCompactWY` object created from `X`, with optional row weights. +- `qr`: either a `QRCompactWY` or `QRPivoted` object created from `X`, with optional row weights. +- `scratchm1`: scratch Matrix{T} of the same size as `X` """ -mutable struct DensePredQR{T<:BlasReal} <: DensePred +mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY, QRPivoted}} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - qr::QRCompactWY{T} - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T - n, p = size(X) - length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X)) - end - function DensePredQR{T}(X::Matrix{T}) where T + qr::Q + scratchm1::Matrix{T} + + function DensePredQR(X::AbstractMatrix, pivot::Bool=false) n, p = size(X) - new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X)) + T = typeof(float(zero(eltype(X)))) + Q = pivot ? QRPivoted : QRCompactWY + fX = float(X) + cfX = fX === X ? copy(fX) : fX + F = pivot ? pivoted_qr!(cfX) : qr!(cfX) + new{T,Q}(Matrix{T}(X), + zeros(T, p), + zeros(T, p), + zeros(T, p), + F, + similar(X, T)) end end -DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0) -DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) -convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) - """ delbeta!(p::LinPred, r::Vector) @@ -72,8 +76,71 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res """ function delbeta! end -function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) p.delbeta = p.qr\r + mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + return p +end + +function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) + X = p.X + W = Diagonal(wt) + sqrtW = Diagonal(sqrt.(wt)) + mul!(p.scratchm1, sqrtW, X) + mul!(p.delbeta, X'W, r) + qnr = qr(p.scratchm1) + Rinv = inv(qnr.R) + p.delbeta = Rinv * Rinv' * p.delbeta + return p +end + +function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + if rnk == length(p.delbeta) + p.delbeta = p.qr\r + else + R = @view p.qr.R[:, 1:rnk] + Q = @view p.qr.Q[:, 1:size(R, 1)] + piv = p.qr.p + p.delbeta = zeros(size(p.delbeta)) + p.delbeta[1:rnk] = R \ Q'r + invpermute!(p.delbeta, piv) + end + mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + return p +end + +function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + X = p.X + W = Diagonal(wt) + sqrtW = Diagonal(sqrt.(wt)) + delbeta = p.delbeta + scratchm2 = similar(X, T) + mul!(p.scratchm1, sqrtW, X) + mul!(scratchm2, W, X) + mul!(delbeta, transpose(scratchm2), r) + + if rnk == length(p.delbeta) + qnr = qr(p.scratchm1) + Rinv = inv(qnr.R) + p.delbeta = Rinv * Rinv' * delbeta + else + qnr = pivoted_qr!(copy(p.scratchm1)) + R = @view qnr.R[1:rnk, 1:rnk] + Rinv = inv(R) + piv = qnr.p + permute!(delbeta, piv) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + p.delbeta[1:rnk] = Rinv * Rinv' * view(delbeta, 1:rnk) + invpermute!(delbeta, piv) + end return p end @@ -115,6 +182,7 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool) end cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) +qrpred(X::AbstractMatrix, pivot::Bool=false) = DensePredQR(X, pivot) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -124,7 +192,6 @@ function cholesky(p::DensePredChol{T}) where T<:FP c = p.chol Cholesky(copy(cholfactors(c)), c.uplo, c.info) end -cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) @@ -227,7 +294,33 @@ end LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol +function invqr(x::DensePredQR{T,<: QRCompactWY}) where T + Q,R = qr(x.scratchm1) + Rinv = inv(R) + Rinv*Rinv' +end + +function invqr(x::DensePredQR{T,<: QRPivoted}) where T + Q,R,pv = pivoted_qr!(copy(x.scratchm1)) + rnk = rank(R) + p = length(x.delbeta) + if rnk == p + Rinv = inv(R) + xinv = Rinv*Rinv' + ipiv = invperm(pv) + return xinv[ipiv, ipiv] + else + Rsub = R[1:rnk, 1:rnk] + RsubInv = inv(Rsub) + xinv = fill(convert(T, NaN), (p,p)) + xinv[1:rnk, 1:rnk] = RsubInv*RsubInv' + ipiv = invperm(pv) + return xinv[ipiv, ipiv] + end +end + invchol(x::DensePred) = inv(cholesky!(x)) + function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ch = x.chol rnk = rank(ch) @@ -242,8 +335,14 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ipiv = invperm(ch.p) res[ipiv, ipiv] end + invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +inverse(x::DensePred) = invchol(x) +inverse(x::DensePredQR) = invqr(x) +inverse(x::SparsePredChol) = invchol(x) + +vcov(x::LinPredModel) = rmul!(inverse(x.pp), dispersion(x, true)) function cor(x::LinPredModel) Σ = vcov(x) @@ -289,4 +388,46 @@ dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) linpred_rank(x::LinPred) = length(x.beta0) -linpred_rank(x::DensePredChol{<:Any, <:CholeskyPivoted}) = x.chol.rank +linpred_rank(x::DensePredChol{<:Any, <:CholeskyPivoted}) = rank(x.chol) +linpred_rank(x::DensePredChol{<:Any, <:Cholesky}) = rank(x.chol.U) +linpred_rank(x::DensePredQR{<:Any,<:QRPivoted}) = rank(x.qr.R) + +ispivoted(x::LinPred) = false +ispivoted(x::DensePredChol{<:Any, <:CholeskyPivoted}) = true +ispivoted(x::DensePredQR{<:Any,<:QRPivoted}) = true + +decomposition_method(x::LinPred) = isa(x, DensePredQR) ? :qr : :cholesky + +_coltype(::ContinuousTerm{T}) where {T} = T + +# Function common to all LinPred models, but documented separately +# for LinearModel and GeneralizedLinearModel +function StatsBase.predict(mm::LinPredModel, data; + interval::Union{Symbol,Nothing}=nothing, + kwargs...) + Tables.istable(data) || + throw(ArgumentError("expected data in a Table, got $(typeof(data))")) + + f = formula(mm) + t = Tables.columntable(data) + cols, nonmissings = StatsModels.missing_omit(t, f.rhs) + newx = modelcols(f.rhs, cols) + prediction = Tables.allocatecolumn(Union{_coltype(f.lhs), Missing}, length(nonmissings)) + fill!(prediction, missing) + if interval === nothing + predict!(view(prediction, nonmissings), mm, newx; + interval=interval, kwargs...) + return prediction + else + # Finding integer indices once is faster + nonmissinginds = findall(nonmissings) + lower = Vector{Union{Float64, Missing}}(missing, length(nonmissings)) + upper = Vector{Union{Float64, Missing}}(missing, length(nonmissings)) + tup = (prediction=view(prediction, nonmissinginds), + lower=view(lower, nonmissinginds), + upper=view(upper, nonmissinginds)) + predict!(tup, mm, newx; + interval=interval, kwargs...) + return (prediction=prediction, lower=lower, upper=upper) + end +end diff --git a/src/lm.jl b/src/lm.jl index 2ee28022..8d96cace 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -122,10 +122,10 @@ const FIT_LM_DOC = """ """ """ - fit(LinearModel, formula, data, allowrankdeficient=false; - [wts::AbstractVector], dropcollinear::Bool=true) + fit(LinearModel, formula, data; + [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky) fit(LinearModel, X::AbstractMatrix, y::AbstractVector; - wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true) + wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky) Fit a linear model to data. @@ -134,23 +134,31 @@ $FIT_LM_DOC function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; wts::AbstractVector{<:Real}=similar(y, 0), - dropcollinear::Bool=true) + dropcollinear::Bool=true, + method::Symbol=:cholesky) if allowrankdeficient_dep !== nothing @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + + if method === :cholesky + fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + elseif method === :qr + fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear))) + else + throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) + end end """ - lm(formula, data, allowrankdeficient=false; - [wts::AbstractVector], dropcollinear::Bool=true) + lm(formula, data; + [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky, lm(X::AbstractMatrix, y::AbstractVector; - wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true) + wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky) Fit a linear model to data. -An alias for `fit(LinearModel, X, y; wts=wts, dropcollinear=dropcollinear)` +An alias for `fit(LinearModel, X, y; wts=wts, dropcollinear=dropcollinear, method=method)` $FIT_LM_DOC """ @@ -256,31 +264,32 @@ function predict(mm::LinearModel, newx::AbstractMatrix; end if interval === nothing return retmean - elseif mm.pp.chol isa CholeskyPivoted && + elseif mm.pp isa DensePredChol && + mm.pp.chol isa CholeskyPivoted && mm.pp.chol.rank < size(mm.pp.chol, 2) + throw(ArgumentError("prediction intervals are currently not implemented " * + "when some independent variables have been dropped " * + "from the model due to collinearity")) + elseif mm.pp isa DensePredQR && rank(mm.pp.qr.R) < size(mm.pp.qr.R, 2) throw(ArgumentError("prediction intervals are currently not implemented " * "when some independent variables have been dropped " * "from the model due to collinearity")) end length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression") - chol = cholesky!(mm.pp) - # get the R matrix from the QR factorization - if chol isa CholeskyPivoted - ip = invperm(chol.p) - R = chol.U[ip, ip] - else - R = chol.U - end - residvar = ones(size(newx,2)) * deviance(mm)/dof_residual(mm) - if interval == :confidence - retvariance = (newx/R).^2 * residvar - elseif interval == :prediction - retvariance = (newx/R).^2 * residvar .+ deviance(mm)/dof_residual(mm) - else + + dev = deviance(mm) + dofr = dof_residual(mm) + ret = diag(newx*vcov(mm)*newx') + if interval == :prediction + ret .+= dev/dofr + elseif interval != :confidence error("only :confidence and :prediction intervals are defined") end - retinterval = quantile(TDist(dof_residual(mm)), (1. - level)/2) * sqrt.(retvariance) - (prediction = retmean, lower = retmean .+ retinterval, upper = retmean .- retinterval) + ret .= quantile(TDist(dofr), (1 - level)/2) .* sqrt.(ret) + retmean .= newx * coef(mm) + lower = retmean .+ ret + upper = retmean -+ ret + (prediction = retmean, lower = lower, upper = upper) end function confint(obj::LinearModel; level::Real=0.95) diff --git a/src/negbinfit.jl b/src/negbinfit.jl index 7c36127c..52f5cad5 100644 --- a/src/negbinfit.jl +++ b/src/negbinfit.jl @@ -60,6 +60,8 @@ In both cases, `link` may specify the link function # Keyword Arguments - `initialθ::Real=Inf`: Starting value for shape parameter θ. If it is `Inf` then the initial value will be estimated by fitting a Poisson distribution. +- `dropcollinear::Bool=true`: See `dropcollinear` for [`glm`](@ref) +- `method::Symbol=:cholesky`: See `method` for [`glm`](@ref) - `maxiter::Integer=30`: See `maxiter` for [`glm`](@ref) - `atol::Real=1.0e-6`: See `atol` for [`glm`](@ref) - `rtol::Real=1.0e-6`: See `rtol` for [`glm`](@ref) @@ -69,6 +71,8 @@ function negbin(F, D, args...; initialθ::Real=Inf, + dropcollinear::Bool=true, + method::Symbol=:cholesky, maxiter::Integer=30, minstepfac::Real=0.001, atol::Real=1e-6, @@ -103,10 +107,12 @@ function negbin(F, # fit a Poisson regression model if the user does not specify an initial θ if isinf(initialθ) regmodel = glm(F, D, Poisson(), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) else regmodel = glm(F, D, NegativeBinomial(initialθ), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) end μ = regmodel.model.rr.mu @@ -131,7 +137,8 @@ function negbin(F, end verbose && println("[ Alternating iteration ", i, ", θ = ", θ, " ]") regmodel = glm(F, D, NegativeBinomial(θ), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) μ = regmodel.model.rr.mu prevθ = θ θ = mle_for_θ(y, μ, wts; maxiter=maxiter, tol=rtol) diff --git a/test/runtests.jl b/test/runtests.jl index e89e3466..3820a7b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,12 +21,14 @@ end linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y -@testset "lm" begin - lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) - test_show(lm1) - @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) +@testset "LM with $dmethod" for dmethod in (:cholesky, :qr) Σ = [6.136653061224592e-05 -9.464489795918525e-05 -9.464489795918525e-05 1.831836734693908e-04] + + lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod) + test_show(lm1) + @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) + @test isapprox(vcov(lm1), Σ) @test isapprox(cor(lm1.model), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2)) @test dof(lm1) == 3 @@ -41,16 +43,22 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y @test isapprox(aic(lm1), -36.409684288095946) @test isapprox(aicc(lm1), -24.409684288095946) @test isapprox(bic(lm1), -37.03440588041178) - lm2 = fit(LinearModel, hcat(ones(6), 10form.Carb), form.OptDen, true) - @test isa(lm2.pp.chol, CholeskyPivoted) - @test lm2.pp.chol.piv == [2, 1] + lm2 = fit(LinearModel, hcat(ones(6), 10form.Carb), form.OptDen, true; method=dmethod) + if dmethod == :cholesky + @test isa(lm2.pp.chol, CholeskyPivoted) + piv = lm2.pp.chol.piv + elseif dmethod == :qr + @test isa(lm2.pp.qr, QRPivoted) + piv = lm2.pp.qr.p + end + @test piv == [2, 1] @test isapprox(coef(lm1), coef(lm2) .* [1., 10.]) lm3 = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), contrasts=Dict(:x=>DummyCoding())) lm4 = lm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) @test coef(lm3) == coef(lm4) ≈ [11, 1, 2, 3, 4] end -@testset "Linear Model Cook's Distance" begin +@testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr) st_df = DataFrame( Y=[6.4, 7.4, 10.4, 15.1, 12.3 , 11.4], XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5], @@ -63,32 +71,32 @@ end ) # linear regression - t_lm_base = lm(@formula(Y ~ XA), st_df) + t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) # linear regression, no intercept - t_lm_noint = lm(@formula(Y ~ XA +0), st_df) + t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod) @test isapprox(st_df.CooksD_noint, cooksdistance(t_lm_noint)) # linear regression, two collinear variables (Variance inflation factor ≊ 250) - t_lm_multi = lm(@formula(Y ~ XA + XB), st_df) + t_lm_multi = lm(@formula(Y ~ XA + XB), st_df; method=dmethod) @test isapprox(st_df.CooksD_multi, cooksdistance(t_lm_multi)) # linear regression, two full collinear variables (XC = 2 XA) hence should get the same results as the original # after pivoting - t_lm_colli = lm(@formula(Y ~ XA + XC), st_df, dropcollinear=true) + t_lm_colli = lm(@formula(Y ~ XA + XC), st_df; dropcollinear=true, method=dmethod) # Currently fails as the collinear variable is not dropped from `modelmatrix(obj)` @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) - end -@testset "linear model with weights" begin +@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - lm_model = lm(f, df, wts = df.weights) - glm_model = glm(f, df, Normal(), wts = df.weights) + + lm_model = lm(f, df, wts = df.weights; method=dmethod) + glm_model = glm(f, df, Normal(), wts = df.weights; method=dmethod) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -100,10 +108,29 @@ end @test isapprox(loglikelihood(lm_model), -4353.946729075838) @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) - @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + @test isapprox(mean(residuals(lm_model)), -5.412966629787718) +end + +@testset "Linear model with dropcollinearity and $dmethod" for dmethod in (:cholesky, :qr) + # for full rank design matrix, both should give same results + lm1 = lm(@formula(OptDen ~ Carb), form; method=dmethod, dropcollinear=true) + lm2 = lm(@formula(OptDen ~ Carb), form; method=dmethod, dropcollinear=false) + @test coef(lm1) ≈ coef(lm2) + @test stderror(lm1) ≈ stderror(lm2) + @test r2(lm1) ≈ r2(lm2) + @test adjr2(lm1) ≈ adjr2(lm2) + @test vcov(lm1) ≈ vcov(lm2) + @test predict(lm1) ≈ predict(lm2) + @test loglikelihood(lm1) ≈ loglikelihood(lm2) + @test nullloglikelihood(lm1) ≈ nullloglikelihood(lm2) + @test residuals(lm1) ≈ residuals(lm2) + @test aic(lm1) ≈ aic(lm2) + @test aicc(lm1) ≈ aicc(lm2) + @test bic(lm1) ≈ bic(lm2) + @test GLM.dispersion(lm1.model) ≈ GLM.dispersion(lm2.model) end -@testset "rankdeficient" begin +@testset "Linear model with $dmethod and rankdeficieny" for dmethod in (:cholesky, :qr) rng = StableRNG(1234321) # an example of rank deficiency caused by a missing cell in a table dfrm = DataFrame([categorical(repeat(string.('A':'D'), inner = 6)), @@ -113,107 +140,134 @@ end X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) - m1 = fit(LinearModel, X, y) + + m1 = fit(LinearModel, X, y; method=dmethod) @test isapprox(deviance(m1), 0.12160301538297297) Xmissingcell = X[inds, :] ymissingcell = y[inds] - @test_throws PosDefException m2 = fit(LinearModel, Xmissingcell, ymissingcell; dropcollinear=false) - m2p = fit(LinearModel, Xmissingcell, ymissingcell) - @test isa(m2p.pp.chol, CholeskyPivoted) - @test rank(m2p.pp.chol) == 11 - @test isapprox(deviance(m2p), 0.1215758392280204) - @test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, - 3.9661379199401257, 5.079410103608552, 6.1944618141188625, 0.0, 7.930328728005131, - 8.879994918604757, 2.986388408421915, 10.84972230524356, 11.844809275711485]) - @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) - - m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) + m2p = fit(LinearModel, Xmissingcell, ymissingcell; method=dmethod) + m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true; method=dmethod) @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) - @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) - @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) + "argument `dropcollinear` instead. Proceeding with positional argument value: true") + m2p_dep_pos_kw = fit(LinearModel, Xmissingcell, ymissingcell, true; method=dmethod, dropcollinear = false) + + if dmethod == :cholesky + @test_throws PosDefException m2 = fit(LinearModel, Xmissingcell, ymissingcell; + method = dmethod, dropcollinear=false) + @test isa(m2p.pp.chol, CholeskyPivoted) + @test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, + 3.9661379199401257, 5.079410103608552, 6.1944618141188625, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 10.84972230524356, 11.844809275711485]) + + @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) + @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) + elseif dmethod == :qr + @test_throws RankDeficientException m2 = fit(LinearModel, Xmissingcell, ymissingcell; + method = dmethod, dropcollinear=false) + @test isapprox(coef(m2p), [0.9772643585228962, 11.889730016918342, 3.027347397503282, + 3.9661379199401177, 5.079410103608539, 6.194461814118862, + -2.9863884084219015, 7.930328728005132, 8.87999491860477, + 0.0, 10.849722305243555, 11.844809275711487]) || + isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, + 3.9661379199401257, 5.079410103608552, 6.1944618141188625, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 10.84972230524356, 11.844809275711485]) + + @test isa(m2p.pp.qr, QRPivoted) + + @test isa(m2p_dep_pos.pp.qr, QRPivoted) + @test isa(m2p_dep_pos_kw.pp.qr, QRPivoted) + end + + indx = findfirst(item -> item == 0.0, coef(m2p)) + @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[indx,:]) + + @test GLM.linpred_rank(m2p.pp) == 11 + @test isapprox(deviance(m2p), 0.1215758392280204) + + @test GLM.linpred_rank(m2p_dep_pos.pp) == GLM.linpred_rank(m2p.pp) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @test isapprox(coef(m2p_dep_pos), coef(m2p)) - m2p_dep_pos_kw = fit(LinearModel, Xmissingcell, ymissingcell, true; dropcollinear = false) - @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) - @test rank(m2p_dep_pos_kw.pp.chol) == rank(m2p.pp.chol) + @test GLM.linpred_rank(m2p_dep_pos_kw.pp) == GLM.linpred_rank(m2p.pp) @test isapprox(deviance(m2p_dep_pos_kw), deviance(m2p)) @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end -@testset "saturated linear model" begin - df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) - model = lm(@formula(y ~ x), df) +@testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr) + df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + + model = lm(@formula(y ~ x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 1, 2] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = lm(@formula(y ~ 0 + x), df) + model = lm(@formula(y ~ 0 + x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 2, 3] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = glm(@formula(y ~ x), df, Normal(), IdentityLink()) + model = glm(@formula(y ~ x), df1, Normal(), IdentityLink(); method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 1, 2] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = glm(@formula(y ~ 0 + x), df, Normal(), IdentityLink()) + model = glm(@formula(y ~ 0 + x), df1, Normal(), IdentityLink(); method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 2, 3] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) # Saturated and rank-deficient model - df = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) - model = lm(@formula(y ~ x1 + x2), df) - ct = coeftable(model) - @test dof_residual(model) == 0 - @test dof(model) == 4 - @test isinf(GLM.dispersion(model.model)) - @test coef(model) ≈ [1, 1, 2, 0, 0] - @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - NaN NaN NaN NaN NaN - NaN NaN NaN NaN NaN]) - - # TODO: add tests similar to the one above once this model can be fitted - @test_broken glm(@formula(y ~ x1 + x2), df, Normal(), IdentityLink()) + df2 = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) + for model in (lm(@formula(y ~ x1 + x2), df2; method=dmethod), + glm(@formula(y ~ x1 + x2), df2, Normal(), IdentityLink(); method=dmethod)) + ct = coeftable(model) + @test dof_residual(model) == 0 + @test dof(model) == 4 + @test isinf(GLM.dispersion(model.model)) + @test coef(model) ≈ [1, 1, 2, 0, 0] + @test isequal(hcat(ct.cols[2:end]...), + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + NaN NaN NaN NaN NaN + NaN NaN NaN NaN NaN]) + end end -@testset "Linear model with no intercept" begin +@testset "Linear model without intercept and $dmethod" for dmethod in (:cholesky, :qr) @testset "Test with NoInt1 Dataset" begin # test case to test r2 for no intercept model # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat data = DataFrame(x = 60:70, y = 130:140) - mdl = lm(@formula(y ~ 0 + x), data) + + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [2.07438016528926] @test stderror(mdl) ≈ [0.165289256198347E-01] @test GLM.dispersion(mdl.model) ≈ 3.56753034006338 @@ -236,7 +290,8 @@ end # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - mdl = lm(@formula(y ~ 0 + x), data) + + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [0.727272727272727] @test stderror(mdl) ≈ [0.420827318078432E-01] @test GLM.dispersion(mdl.model) ≈ 0.369274472937998 @@ -249,14 +304,15 @@ end @test aic(mdl) ≈ 5.3199453808329 @test loglikelihood(mdl) ≈ -0.6599726904164597 @test nullloglikelihood(mdl) ≈ -8.179255266668315 - @test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, 4.363636363636362] + @test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, + 4.363636363636362] end @testset "Test with without formula" begin X = [4 5 6]' y = [3, 4, 4] - data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - mdl1 = lm(@formula(y ~ 0 + x), data) + + mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod) mdl2 = lm(X, y) @test coef(mdl1) ≈ coef(mdl2) @@ -275,13 +331,24 @@ end end end +@testset "Linear model with QR method and NASTY data" begin + x = [1, 2, 3, 4, 5, 6, 7, 8, 9] + nasty = DataFrame(X = x, TINY = 1.0E-12*x) + mdl = lm(@formula(X ~ TINY), nasty; method=:qr) + + @test coef(mdl) ≈ [0, 1.0E+12] + @test dof(mdl) ≈ 3 + @test r2(mdl) ≈ 1.0 + @test adjr2(mdl) ≈ 1.0 +end + dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], Outcome = categorical(repeat(string.('A':'C'), outer = 3)), Treatment = categorical(repeat(string.('a':'c'), inner = 3))) -@testset "Poisson GLM" begin +@testset "Poisson GLM with $dmethod" for dmethod in (:cholesky, :qr) gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ 1 + Outcome + Treatment), - dobson, Poisson()) + dobson, Poisson(); method=dmethod) @test GLM.cancancel(gm1.model.rr) test_show(gm1) @test dof(gm1) == 5 @@ -301,25 +368,28 @@ admit = CSV.read(joinpath(glm_datadir, "admit.csv"), DataFrame) admit.rank = categorical(admit.rank) @testset "$distr with LogitLink" for distr in (Binomial, Bernoulli) - gm2 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, distr()) - @test GLM.cancancel(gm2.model.rr) - test_show(gm2) - @test dof(gm2) == 6 - @test deviance(gm2) ≈ 458.5174924758994 - @test nulldeviance(gm2) ≈ 499.9765175549154 - @test loglikelihood(gm2) ≈ -229.25874623794968 - @test nullloglikelihood(gm2) ≈ -249.9882587774585 - @test isapprox(aic(gm2), 470.51749247589936) - @test isapprox(aicc(gm2), 470.7312329339146) - @test isapprox(bic(gm2), 494.4662797585473) - @test isapprox(coef(gm2), - [-3.9899786606380756, 0.0022644256521549004, 0.804037453515578, - -0.6754428594116578, -1.340203811748108, -1.5514636444657495]) + for dmethod in (:cholesky, :qr) + gm2 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, distr(); + method=dmethod) + @test GLM.cancancel(gm2.model.rr) + test_show(gm2) + @test dof(gm2) == 6 + @test deviance(gm2) ≈ 458.5174924758994 + @test nulldeviance(gm2) ≈ 499.9765175549154 + @test loglikelihood(gm2) ≈ -229.25874623794968 + @test nullloglikelihood(gm2) ≈ -249.9882587774585 + @test isapprox(aic(gm2), 470.51749247589936) + @test isapprox(aicc(gm2), 470.7312329339146) + @test isapprox(bic(gm2), 494.4662797585473) + @test isapprox(coef(gm2), + [-3.9899786606380756, 0.0022644256521549004, 0.804037453515578, + -0.6754428594116578, -1.340203811748108, -1.5514636444657495]) + end end -@testset "Bernoulli ProbitLink" begin +@testset "Bernoulli ProbitLink with $dmethod" for dmethod in (:cholesky, :qr) gm3 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, - Binomial(), ProbitLink()) + Binomial(), ProbitLink(); method=dmethod) test_show(gm3) @test !GLM.cancancel(gm3.model.rr) @test dof(gm3) == 6 @@ -335,9 +405,9 @@ end -0.4154125854823675, -0.8121458010130354, -0.9359047862425297]) end -@testset "Bernoulli CauchitLink" begin +@testset "Bernoulli CauchitLink with $dmethod" for dmethod in (:cholesky, :qr) gm4 = fit(GeneralizedLinearModel, @formula(admit ~ gre + gpa + rank), admit, - Binomial(), CauchitLink()) + Binomial(), CauchitLink(); method=dmethod) @test !GLM.cancancel(gm4.model.rr) test_show(gm4) @test dof(gm4) == 6 @@ -350,9 +420,9 @@ end @test isapprox(bic(gm4), 495.28889855776214) end -@testset "Bernoulli CloglogLink" begin +@testset "Bernoulli CloglogLink with $dmethod" for dmethod in (:cholesky, :qr) gm5 = fit(GeneralizedLinearModel, @formula(admit ~ gre + gpa + rank), admit, - Binomial(), CloglogLink()) + Binomial(), CloglogLink(); method=dmethod) @test !GLM.cancancel(gm5.model.rr) test_show(gm5) @test dof(gm5) == 6 @@ -373,17 +443,17 @@ end X = [ones(n) randn(rng, n)] y = logistic.(X*ones(2) + 1/10*randn(rng, n)) .> 1/2 - @test coeftable(glm(X, y, Binomial(), CloglogLink())).cols[4][2] < 0.05 + @test coeftable(glm(X, y, Binomial(), CloglogLink(); method=dmethod)).cols[4][2] < 0.05 end end ## Example with offsets from Venables & Ripley (2002, p.189) anorexia = CSV.read(joinpath(glm_datadir, "anorexia.csv"), DataFrame) -@testset "Normal offset" begin +@testset "Normal offset with $dmethod" for dmethod in (:cholesky, :qr) gm6 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), IdentityLink(), offset=Array{Float64}(anorexia.Prewt)) - + Normal(), IdentityLink(), method=dmethod, + offset=Array{Float64}(anorexia.Prewt)) @test GLM.cancancel(gm6.model.rr) test_show(gm6) @test dof(gm6) == 5 @@ -401,10 +471,9 @@ anorexia = CSV.read(joinpath(glm_datadir, "anorexia.csv"), DataFrame) [13.3909581, 0.1611824, 1.8934926, 2.1333359]) end -@testset "Normal LogLink offset" begin +@testset "Normal LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), LogLink(), offset=anorexia.Prewt, rtol=1e-8) - + Normal(), LogLink(), method=dmethod, offset=anorexia.Prewt, rtol=1e-8) @test !GLM.cancancel(gm7.model.rr) test_show(gm7) @test isapprox(deviance(gm7), 3265.207242977156) @@ -419,9 +488,9 @@ end atol=1e-6) end -@testset "Poisson LogLink offset" begin +@testset "Poisson LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7p = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), offset=log.(anorexia.Prewt), rtol=1e-8) + Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), rtol=1e-8) @test GLM.cancancel(gm7p.model.rr) test_show(gm7p) @@ -435,9 +504,9 @@ end [0.2091138392, 0.0025136984, 0.0297381842, 0.0324618795] end -@testset "Poisson LogLink offset with weights" begin +@testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr) gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), offset=log.(anorexia.Prewt), + Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), wts=repeat(1:4, outer=18), rtol=1e-8) @test GLM.cancancel(gm7pw.model.rr) @@ -456,8 +525,8 @@ end clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), lot1 = [118,58,42,35,27,25,21,19,18]) -@testset "Gamma" begin - gm8 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma()) +@testset "Gamma with $dmethod" for dmethod in (:cholesky, :qr) + gm8 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(); method=dmethod) @test !GLM.cancancel(gm8.model.rr) @test isa(GLM.Link(gm8.model), InverseLink) test_show(gm8) @@ -474,8 +543,9 @@ clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), @test isapprox(stderror(gm8), [0.00092754223, 0.000414957683], atol=1e-6) end -@testset "InverseGaussian" begin - gm8a = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, InverseGaussian()) +@testset "InverseGaussian with $dmethod" for dmethod in (:cholesky, :qr) + gm8a = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, InverseGaussian(); + method=dmethod) @test !GLM.cancancel(gm8a.model.rr) @test isa(GLM.Link(gm8a.model), InverseSquareLink) test_show(gm8a) @@ -492,9 +562,9 @@ end @test isapprox(stderror(gm8a), [0.0001675339726910311,9.468485015919463e-5], atol=1e-6) end -@testset "Gamma LogLink" begin +@testset "Gamma LogLink with $dmethod" for dmethod in (:cholesky, :qr) gm9 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), - rtol=1e-8, atol=0.0) + method=dmethod, rtol=1e-8, atol=0.0) @test !GLM.cancancel(gm9.model.rr) test_show(gm9) @test dof(gm9) == 3 @@ -510,9 +580,9 @@ end @test stderror(gm9) ≈ [0.19030107482720, 0.05530784660144] end -@testset "Gamma IdentityLink" begin - gm10 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), - rtol=1e-8, atol=0.0) +@testset "Gamma IdentityLink with $dmethod" for dmethod in (:cholesky, :qr) + gm10 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(); + method=dmethod, rtol=1e-8, atol=0.0) @test !GLM.cancancel(gm10.model.rr) test_show(gm10) @test dof(gm10) == 3 @@ -533,10 +603,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink" begin +@testset "Aggregated Binomial LogitLink with $dmethod" for dmethod in (:cholesky, :qr) for distr in (Binomial, Bernoulli) - gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(); + method=dmethod, wts=Array(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -557,9 +627,9 @@ admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1 admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error -@testset "Binomial LogitLink aggregated" begin +@testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr) gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + method=dmethod, wts=admit_agr2.count) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -574,9 +644,9 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count end # Weighted Gamma example (weights are totally made up) -@testset "Gamma InverseLink Weights" begin +@testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr) gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + method=dmethod, wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -591,9 +661,9 @@ end end # Weighted Poisson example (weights are totally made up) -@testset "Poisson LogLink Weights" begin - gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) +@testset "Poisson LogLink Weights with $dmethod" for dmethod in (:cholesky, :qr) + gm17 = glm(@formula(Counts ~ Outcome + Treatment), dobson, Poisson(), + method=dmethod, wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -604,13 +674,14 @@ end @test isapprox(aicc(gm17), 181.39578038136298) @test isapprox(bic(gm17), 186.5854647596431) @test isapprox(coef(gm17), [3.1218557035404793, -0.5270435906931427,-0.40300384148562746, - -0.017850203824417415,-0.03507851122782909]) + -0.017850203824417415,-0.03507851122782909]) end # "quine" dataset discussed in Section 7.4 of "Modern Applied Statistics with S" quine = dataset("MASS", "quine") -@testset "NegativeBinomial LogLink Fixed θ" begin - gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink()) +@testset "NegativeBinomial LogLink Fixed θ with $dmethod" for dmethod in (:cholesky, :qr) + gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, + NegativeBinomial(2.0), LogLink(), method=dmethod) @test !GLM.cancancel(gm18.model.rr) test_show(gm18) @test dof(gm18) == 8 @@ -641,11 +712,11 @@ end @test isapprox(bic(gm19), 1146.96262250587) @test isapprox(coef(gm19)[1:7], [-0.12737182842213654, -0.055871700989224705, 0.01561618806384601, - -0.041113722732799125, 0.024042387142113462, 0.04400234618798099, 0.035765875508382027, -]) + -0.041113722732799125, 0.024042387142113462, 0.04400234618798099, + 0.035765875508382027]) end -@testset "NegativeBinomial LogLink, θ to be estimated" begin +@testset "NegativeBinomial LogLink, θ to be estimated with Cholesky" begin gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) test_show(gm20) @test dof(gm20) == 8 @@ -680,9 +751,25 @@ end end end -@testset "NegativeBinomial NegativeBinomialLink, θ to be estimated" begin +@testset "NegativeBinomial LogLink, θ to be estimated with QR" begin + gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); method=:qr) + test_show(gm20) + @test dof(gm20) == 8 + @test isapprox(deviance(gm20), 167.9518430624193, rtol = 1e-7) + @test isapprox(nulldeviance(gm20), 195.28668602703388, rtol = 1e-7) + @test isapprox(loglikelihood(gm20), -546.57550938017, rtol = 1e-7) + @test isapprox(nullloglikelihood(gm20), -560.2429308624774, rtol = 1e-7) + @test isapprox(aic(gm20), 1109.15101876034) + @test isapprox(aicc(gm20), 1110.202113650851) + @test isapprox(bic(gm20), 1133.0198717340068) + @test isapprox(coef(gm20)[1:7], + [2.894527697811509, -0.5693411448715979, 0.08238813087070128, -0.4484636623590206, + 0.08805060372902418, 0.3569553124412582, 0.2921383118842893]) +end + +@testset "NegativeBinomial NegativeBinomialLink, θ to be estimated with $dmethod" for dmethod in (:cholesky, :qr) # the default/canonical link is NegativeBinomialLink - gm21 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine) + gm21 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine; method=dmethod) test_show(gm21) @test dof(gm21) == 8 @test isapprox(deviance(gm21), 168.0465485656672, rtol = 1e-7) @@ -697,9 +784,10 @@ end 0.01582155341041012, 0.029074956147127032, 0.023628812427424876]) end -@testset "Geometric LogLink" begin +@testset "Geometric LogLink with $dmethod" for dmethod in (:cholesky, :qr) # the default/canonical link is LogLink - gm22 = fit(GeneralizedLinearModel, @formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric()) + gm22 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(); + method=dmethod) test_show(gm22) @test dof(gm22) == 8 @test deviance(gm22) ≈ 137.8781581814965 @@ -715,9 +803,11 @@ end 0.18553339017028742] end -@testset "Geometric is a special case of NegativeBinomial with θ = 1" begin - gm23 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(), InverseLink()) - gm24 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(1), InverseLink()) +@testset "Geometric is a special case of NegativeBinomial with θ = 1 and $dmethod" for dmethod in (:cholesky, :qr) + gm23 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(), + InverseLink(); method=dmethod) + gm24 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(1), + InverseLink(); method=dmethod) @test coef(gm23) ≈ coef(gm24) @test stderror(gm23) ≈ stderror(gm24) @test confint(gm23) ≈ confint(gm24) @@ -730,7 +820,7 @@ end @test predict(gm23) ≈ predict(gm24) end -@testset "GLM with no intercept" begin +@testset "GLM with no intercept with Cholesky" begin # Gamma with single numeric predictor nointglm1 = fit(GeneralizedLinearModel, @formula(lot1 ~ 0 + u), clotting, Gamma()) @test !hasintercept(nointglm1.model) @@ -785,13 +875,68 @@ end [0.0015910084415445974, 0.13185097176418983, 0.13016395889443858, 0.1336778089431681] end +@testset "GLM with no intercept with $dmethod" for dmethod in (:cholesky, :qr) + # Gamma with single numeric predictor + nointglm1 = glm(@formula(lot1 ~ 0 + u), clotting, Gamma(); method=dmethod) + @test !hasintercept(nointglm1.model) + @test !GLM.cancancel(nointglm1.model.rr) + @test isa(GLM.Link(nointglm1.model), InverseLink) + test_show(nointglm1) + @test dof(nointglm1) == 2 + @test deviance(nointglm1) ≈ 0.6629903395245351 + @test isnan(nulldeviance(nointglm1)) + @test loglikelihood(nointglm1) ≈ -32.60688972888763 + @test_throws DomainError nullloglikelihood(nointglm1) + @test aic(nointglm1) ≈ 69.21377945777526 + @test aicc(nointglm1) ≈ 71.21377945777526 + @test bic(nointglm1) ≈ 69.6082286124477 + @test coef(nointglm1) ≈ [0.009200201253724151] + @test GLM.dispersion(nointglm1.model, true) ≈ 0.10198331431820506 + @test stderror(nointglm1) ≈ [0.000979309363228589] + + # Bernoulli with numeric predictors + nointglm2 = glm(@formula(admit ~ 0 + gre + gpa), admit, Bernoulli(); method=dmethod) + @test !hasintercept(nointglm2.model) + @test GLM.cancancel(nointglm2.model.rr) + test_show(nointglm2) + @test dof(nointglm2) == 2 + @test deviance(nointglm2) ≈ 503.5584368354113 + @test nulldeviance(nointglm2) ≈ 554.5177444479574 + @test loglikelihood(nointglm2) ≈ -251.77921841770578 + @test nullloglikelihood(nointglm2) ≈ -277.2588722239787 + @test aic(nointglm2) ≈ 507.55843683541156 + @test aicc(nointglm2) ≈ 507.58866353566344 + @test bic(nointglm2) ≈ 515.5413659296275 + @test coef(nointglm2) ≈ [0.0015622695743609228, -0.4822556276412118] + @test stderror(nointglm2) ≈ [0.000987218133602179, 0.17522675354523715] + + # Poisson with categorical predictors, weights and offset + nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, + Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), + wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + @test !hasintercept(nointglm3.model) + @test GLM.cancancel(nointglm3.model.rr) + test_show(nointglm3) + @test deviance(nointglm3) ≈ 90.17048668870225 + @test nulldeviance(nointglm3) ≈ 159.32999067102548 + @test loglikelihood(nointglm3) ≈ -610.3058020030296 + @test nullloglikelihood(nointglm3) ≈ -644.885553994191 + @test aic(nointglm3) ≈ 1228.6116040060592 + @test aicc(nointglm3) ≈ 1228.8401754346307 + @test bic(nointglm3) ≈ 1241.38343140962 + @test coef(nointglm3) ≈ + [-0.007008396492196935, 0.6038154674863438, 0.5654250124481003, 0.6931599989992452] + @test stderror(nointglm3) ≈ + [0.0015910084415445974, 0.13185097176418983, 0.13016395889443858, 0.1336778089431681] +end + @testset "Sparse GLM" begin rng = StableRNG(1) X = sprand(rng, 1000, 10, 0.01) β = randn(rng, 10) y = Bool[rand(rng) < logistic(x) for x in X * β] - gmsparse = fit(GeneralizedLinearModel, X, y, Binomial()) - gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial()) + gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=:cholesky) + gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=:cholesky) @test isapprox(deviance(gmsparse), deviance(gmdense)) @test isapprox(coef(gmsparse), coef(gmdense)) @@ -815,12 +960,13 @@ end end end -@testset "Predict" begin +@testset "Predict with $dmethod" for dmethod in (:cholesky, :qr) + # Binomial GLM rng = StableRNG(123) X = rand(rng, 10, 2) Y = logistic.(X * [3; -3]) - gm11 = fit(GeneralizedLinearModel, X, Y, Binomial()) + gm11 = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod) @test isapprox(predict(gm11), Y) @test predict(gm11) == fitted(gm11) @@ -852,7 +998,7 @@ end @test_throws ArgumentError predict(gm11, newX, offset=newoff) - gm12 = fit(GeneralizedLinearModel, X, Y, Binomial(), offset=off) + gm12 = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod, offset=off) @test_throws ArgumentError predict(gm12, newX) @test isapprox(predict(gm12, newX, offset=newoff), logistic.(newX * coef(gm12) .+ newoff)) @@ -861,15 +1007,17 @@ end d = DataFrame(X, :auto) d.y = Y - gm13 = fit(GeneralizedLinearModel, @formula(y ~ 0 + x1 + x2), d, Binomial()) + gm13 = fit(GeneralizedLinearModel, @formula(y ~ 0 + x1 + x2), d, Binomial(); + method=dmethod) @test predict(gm13) ≈ predict(gm13, d[:,[:x1, :x2]]) @test predict(gm13) ≈ predict(gm13, d) + @test predict(gm13) ≈ predict(gm11) newd = DataFrame(newX, :auto) predict(gm13, newd) Ylm = X * [0.8, 1.6] + 0.8randn(rng, 10) - mm = fit(LinearModel, X, Ylm) + mm = fit(LinearModel, X, Ylm; method=dmethod) pred1 = predict(mm, newX) pred2 = predict(mm, newX, interval=:confidence) se_pred = sqrt.(diag(newX*vcov(mm)*newX')) @@ -900,8 +1048,8 @@ end 1.0 2.0 1.0 -1.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y, dropcollinear=true) - m2 = lm(x, y, dropcollinear=false) + m1 = lm(x, y, dropcollinear=true, method=dmethod) + m2 = lm(x, y, dropcollinear=false, method=dmethod) p1 = predict(m1, x, interval=:confidence) p2 = predict(m2, x, interval=:confidence) @@ -916,8 +1064,8 @@ end 1.0 -1000.0 4.6 1.0 5000 2.4] y = [1.0, 3.0, -2.0, 4.5] - m1 = lm(x, y, dropcollinear=true) - m2 = lm(x, y, dropcollinear=false) + m1 = lm(x, y, dropcollinear=true, method=dmethod) + m2 = lm(x, y, dropcollinear=false, method=dmethod) p1 = predict(m1, x, interval=:confidence) p2 = predict(m2, x, interval=:confidence) @@ -935,8 +1083,8 @@ end 1.0 2.0 3.0 1.0 -1.0 0.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y) - m2 = lm(x[:, 1:2], y) + m1 = lm(x, y, method=dmethod) + m2 = lm(x[:, 1:2], y, method=dmethod) @test predict(m1) ≈ predict(m2) @test_broken predict(m1, interval=:confidence) ≈ @@ -947,10 +1095,10 @@ end @test_throws ArgumentError predict(m1, x, interval=:prediction) end -@testset "GLM confidence intervals" begin +@testset "GLM confidence intervals with $dmethod" for dmethod in (:cholesky, :qr) X = [fill(1,50) range(0,1, length=50)] Y = vec([0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1]) - gm = fit(GeneralizedLinearModel, X, Y, Binomial()) + gm = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod) newX = [fill(1,5) [0.0000000, 0.2405063, 0.4936709, 0.7468354, 1.0000000]] @@ -973,7 +1121,7 @@ end @test_throws ArgumentError predict(gm, newX, interval=:undefined) end -@testset "F test comparing to null model" begin +@testset "F test with $dmethod comparing to null model" for dmethod in (:cholesky, :qr) d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.], Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2], Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])) @@ -1229,7 +1377,7 @@ end @testset "Issue 153" begin X = [ones(10) randn(10)] - Test.@inferred cholesky(GLM.DensePredQR{Float64}(X)) + Test.@inferred cholesky(GLM.DensePredQR(X)) end @testset "Issue 224" begin @@ -1301,54 +1449,54 @@ end @test hasintercept(secondcolinterceptmod) end -@testset "Views" begin +@testset "Views with $dmethod method" for dmethod in (:cholesky, :qr) @testset "#444" begin X = randn(10, 2) y = X*ones(2) + randn(10) - @test coef(glm(X, y, Normal(), IdentityLink())) == - coef(glm(view(X, 1:10, :), view(y, 1:10), Normal(), IdentityLink())) + @test coef(glm(X, y, Normal(), IdentityLink(), method=dmethod)) == + coef(glm(view(X, 1:10, :), view(y, 1:10), Normal(), IdentityLink(); method=dmethod)) x, y, w = rand(100, 2), rand(100), rand(100) - lm1 = lm(x, y) - lm2 = lm(x, view(y, :)) - lm3 = lm(view(x, :, :), y) - lm4 = lm(view(x, :, :), view(y, :)) + lm1 = lm(x, y; method=dmethod) + lm2 = lm(x, view(y, :); method=dmethod) + lm3 = lm(view(x, :, :), y; method=dmethod) + lm4 = lm(view(x, :, :), view(y, :); method=dmethod) @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4) - lm5 = lm(x, y, wts=w) - lm6 = lm(x, view(y, :), wts=w) - lm7 = lm(view(x, :, :), y, wts=w) - lm8 = lm(view(x, :, :), view(y, :), wts=w) - lm9 = lm(x, y, wts=view(w, :)) - lm10 = lm(x, view(y, :), wts=view(w, :)) - lm11 = lm(view(x, :, :), y, wts=view(w, :)) - lm12 = lm(view(x, :, :), view(y, :), wts=view(w, :)) + lm5 = lm(x, y, wts=w, method=dmethod) + lm6 = lm(x, view(y, :), method=dmethod, wts=w) + lm7 = lm(view(x, :, :), y, method=dmethod, wts=w) + lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w) + lm9 = lm(x, y, method=dmethod, wts=view(w, :)) + lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :)) + lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :)) + lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :)) @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) == coef(lm11) == coef(lm12) x, y, w = rand(100, 2), rand(Bool, 100), rand(100) - glm1 = glm(x, y, Binomial()) - glm2 = glm(x, view(y, :), Binomial()) - glm3 = glm(view(x, :, :), y, Binomial()) - glm4 = glm(view(x, :, :), view(y, :), Binomial()) + glm1 = glm(x, y, Binomial(), method=dmethod) + glm2 = glm(x, view(y, :), Binomial(), method=dmethod) + glm3 = glm(view(x, :, :), y, Binomial(), method=dmethod) + glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), method=dmethod, wts=w) + glm6 = glm(x, view(y, :), Binomial(), method=dmethod, wts=w) + glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=w) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=w) + glm9 = glm(x, y, Binomial(), method=dmethod, wts=view(w, :)) + glm10 = glm(x, view(y, :), Binomial(), method=dmethod, wts=view(w, :)) + glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=view(w, :)) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=view(w, :)) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end @testset "Views: #213, #470" begin xs = randn(46, 3) ys = randn(46) - glm_dense = lm(xs, ys) - glm_views = lm(@view(xs[1:end, 1:end]), ys) + glm_dense = lm(xs, ys; method=dmethod) + glm_views = lm(@view(xs[1:end, 1:end]), ys; method=dmethod) @test coef(glm_dense) == coef(glm_views) rows = 1:2:size(xs,1) cols = 1:2:size(xs,2) @@ -1356,14 +1504,14 @@ end xs_altview = @view xs[rows, cols] ys_altcopy = ys[rows] ys_altview = @view ys[rows] - glm_dense_alt = lm(xs_altcopy, ys_altcopy) - glm_views_alt = lm(xs_altview, ys_altview) + glm_dense_alt = lm(xs_altcopy, ys_altcopy; method=dmethod) + glm_views_alt = lm(xs_altview, ys_altview; method=dmethod) # exact equality fails in the final decimal digit for Julia 1.9 @test coef(glm_dense_alt) ≈ coef(glm_views_alt) end end -@testset "PowerLink" begin +@testset "PowerLink with $dmethod" for dmethod in (:cholesky, :qr) @testset "Functions related to PowerLink" begin @test GLM.linkfun(IdentityLink(), 10) ≈ GLM.linkfun(PowerLink(1), 10) @test GLM.linkfun(SqrtLink(), 10) ≈ GLM.linkfun(PowerLink(0.5), 10) @@ -1393,7 +1541,8 @@ end end trees = dataset("datasets", "trees") @testset "GLM with PowerLink" begin - mdl = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1 / 3); rtol=1.0e-12, atol=1.0e-12) + mdl = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1 / 3); + method=dmethod, rtol=1.0e-12, atol=1.0e-12) @test coef(mdl) ≈ [-0.05132238692134761, 0.01428684676273272, 0.15033126098228242] @test stderror(mdl) ≈ [0.224095414423756, 0.003342439119757, 0.005838227761632] atol=1.0e-8 @test dof(mdl) == 4 @@ -1404,8 +1553,10 @@ end @test predict(mdl)[1] ≈ 10.59735275421753 end @testset "Compare PowerLink(0) and LogLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), LogLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), LogLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1418,8 +1569,10 @@ end @test predict(mdl1) ≈ predict(mdl2) end @testset "Compare PowerLink(0.5) and SqrtLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0.5)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), SqrtLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0.5); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), SqrtLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1432,8 +1585,10 @@ end @test predict(mdl1) ≈ predict(mdl2) end @testset "Compare PowerLink(1) and IdentityLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), IdentityLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), IdentityLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1447,7 +1602,7 @@ end end end -@testset "dropcollinear with GLMs" begin +@testset "dropcollinear in GLM with Cholesky" begin data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], x3=[4.2, 4.6, 8.4, 6.2, 4.2], y=[14, 14, 24, 20, 11]) @@ -1603,3 +1758,8 @@ end # 3. 44 / wt == y @test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) ≈ GLM.logpdf(Binomial(Int(wt), μ), 44) end + +@testset "GLM with wrong option value in method argument" begin + @test_throws ArgumentError lm(@formula(OptDen ~ Carb), form; method=:pr) + @test_throws ArgumentError glm(@formula(OptDen ~ Carb), form, Normal(); method=:pr) +end