diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/30100.fix.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/30100.fix.rst new file mode 100644 index 0000000000000..4ec508ad984a2 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.linear_model/30100.fix.rst @@ -0,0 +1,5 @@ +- :class:`linear_model.LogisticRegression` and and other linear models that + accept `solver="newton-cholesky"` now report the correct number of iterations + when they fall back to the `"lbfgs"` solver because of a rank deficient + Hessian matrix. + By :user:`Olivier Grisel ` diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index faed4b3697b1a..2967b91225fdb 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -184,7 +184,7 @@ def fallback_lbfgs_solve(self, X, y, sample_weight): method="L-BFGS-B", jac=True, options={ - "maxiter": self.max_iter, + "maxiter": self.max_iter - self.iteration, "maxls": 50, # default is 20 "iprint": self.verbose - 1, "gtol": self.tol, @@ -192,7 +192,7 @@ def fallback_lbfgs_solve(self, X, y, sample_weight): }, args=(X, y, sample_weight, self.l2_reg_strength, self.n_threads), ) - self.n_iter_ = _check_optimize_result("lbfgs", opt_res) + self.iteration += _check_optimize_result("lbfgs", opt_res) self.coef = opt_res.x self.converged = opt_res.status == 0 diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 4f97eacaebf80..38325e4fe4cfd 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -12,7 +12,7 @@ assert_array_equal, ) from scipy import sparse -from scipy.linalg import svd +from scipy.linalg import LinAlgWarning, svd from sklearn import config_context from sklearn._loss import HalfMultinomialLoss @@ -2374,3 +2374,45 @@ def test_multi_class_deprecated(): lrCV = LogisticRegressionCV(multi_class="multinomial") with pytest.warns(FutureWarning, match=msg): lrCV.fit(X, y) + + +def test_newton_cholesky_fallback_to_lbfgs(global_random_seed): + # Wide data matrix should lead to a rank-deficient Hessian matrix + # hence make the Newton-Cholesky solver raise a warning and fallback to + # lbfgs. + X, y = make_classification( + n_samples=10, n_features=20, random_state=global_random_seed + ) + C = 1e30 # very high C to nearly disable regularization + + # Check that LBFGS can converge without any warning on this problem. + lr_lbfgs = LogisticRegression(solver="lbfgs", C=C) + with warnings.catch_warnings(): + warnings.simplefilter("error") + lr_lbfgs.fit(X, y) + n_iter_lbfgs = lr_lbfgs.n_iter_[0] + + assert n_iter_lbfgs >= 1 + + # Check that the Newton-Cholesky solver raises a warning and falls back to + # LBFGS. This should converge with the same number of iterations as the + # above call of lbfgs since the Newton-Cholesky triggers the fallback + # before completing the first iteration, for the problem setting at hand. + lr_nc = LogisticRegression(solver="newton-cholesky", C=C) + with ignore_warnings(category=LinAlgWarning): + lr_nc.fit(X, y) + n_iter_nc = lr_nc.n_iter_[0] + + assert n_iter_nc == n_iter_lbfgs + + # Trying to fit the same model again with a small iteration budget should + # therefore raise a ConvergenceWarning: + lr_nc_limited = LogisticRegression( + solver="newton-cholesky", C=C, max_iter=n_iter_lbfgs - 1 + ) + with ignore_warnings(category=LinAlgWarning): + with pytest.warns(ConvergenceWarning, match="lbfgs failed to converge"): + lr_nc_limited.fit(X, y) + n_iter_nc_limited = lr_nc_limited.n_iter_[0] + + assert n_iter_nc_limited == lr_nc_limited.max_iter - 1