Skip to content

Commit

Permalink
Merge branch 'scikit-learn:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
virchan authored Aug 29, 2024
2 parents 2b32a50 + 6b3f9bd commit 5c46559
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 18 deletions.
8 changes: 8 additions & 0 deletions doc/whats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ Version 1.5.2

**release date of 1.5.2**

Changes impacting many modules
------------------------------

- |Fix| Fixed performance regression in a few Cython modules in
`sklearn._loss`, `sklearn.manifold`, `sklearn.metrics` and `sklearn.utils`,
which were built without OpenMP support.
:pr:`29694` by :user:`Loïc Estèvce <lesteve>`.

Changelog
---------

Expand Down
10 changes: 10 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ Changelog
now accepts string format or callable to generate feature names. :pr:`28934` by
:user:`Marc Bresson <MarcBresson>`.

:mod:`sklearn.cross_decomposition`
..................................

- |Fix| :class:`cross_decomposition.PLSRegression` properly raises an error when
`n_components` is larger than `n_samples`. :pr:`29710` by `Thomas Fan`_.

:mod:`sklearn.datasets`
.......................

Expand Down Expand Up @@ -292,6 +298,10 @@ Changelog
will show the function name in the label.
:pr:`29158` by :user:`Yao Xiao <Charlie-XIAO>`.

- |Fix| :class:`preprocessing.PowerTransformer` now uses `scipy.special.inv_boxcox`
to output `nan` if the input of BoxCox's inverse is invalid.
:pr:`27875` by :user:`Xuefeng Xu <xuefeng-xu>`.

:mod:`sklearn.semi_supervised`
..............................

Expand Down
1 change: 1 addition & 0 deletions sklearn/_loss/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ _loss_pyx = custom_target(
py.extension_module(
'_loss',
_loss_pyx,
dependencies: [openmp_dep],
cython_args: cython_args,
install: true,
subdir: 'sklearn/_loss',
Expand Down
4 changes: 3 additions & 1 deletion sklearn/cross_decomposition/_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ def fit(self, X, y=None, Y=None):
# With PLSRegression n_components is bounded by the rank of (X.T X) see
# Wegelin page 25. With CCA and PLSCanonical, n_components is bounded
# by the rank of X and the rank of Y: see Wegelin page 12
rank_upper_bound = p if self.deflation_mode == "regression" else min(n, p, q)
rank_upper_bound = (
min(n, p) if self.deflation_mode == "regression" else min(n, p, q)
)
if n_components > rank_upper_bound:
raise ValueError(
f"`n_components` upper bound is {rank_upper_bound}. "
Expand Down
11 changes: 11 additions & 0 deletions sklearn/cross_decomposition/tests/test_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ def test_n_components_upper_bounds(Estimator):
est.fit(X, Y)


def test_n_components_upper_PLSRegression():
"""Check the validation of `n_components` upper bounds for PLSRegression."""
rng = np.random.RandomState(0)
X = rng.randn(20, 64)
Y = rng.randn(20, 3)
est = PLSRegression(n_components=30)
err_msg = "`n_components` upper bound is 20. Got 30 instead. Reduce `n_components`."
with pytest.raises(ValueError, match=err_msg):
est.fit(X, Y)


@pytest.mark.parametrize("n_samples, n_features", [(100, 10), (100, 200)])
def test_singular_value_helpers(n_samples, n_features, global_random_seed):
# Make sure SVD and power method give approximately the same results
Expand Down
2 changes: 1 addition & 1 deletion sklearn/manifold/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ py.extension_module(
py.extension_module(
'_barnes_hut_tsne',
'_barnes_hut_tsne.pyx',
dependencies: [np_dep],
dependencies: [np_dep, openmp_dep],
cython_args: cython_args,
subdir: 'sklearn/manifold',
install: true
Expand Down
4 changes: 2 additions & 2 deletions sklearn/metrics/_pairwise_distances_reduction/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ _argkmin_classmode_pyx = custom_target(
_argkmin_classmode = py.extension_module(
'_argkmin_classmode',
_argkmin_classmode_pyx,
dependencies: [np_dep],
dependencies: [np_dep, openmp_dep],
override_options: ['cython_language=cpp'],
cython_args: cython_args,
# XXX: for some reason -fno-sized-deallocation is needed otherwise there is
Expand All @@ -199,7 +199,7 @@ _radius_neighbors_classmode_pyx = custom_target(
_radius_neighbors_classmode = py.extension_module(
'_radius_neighbors_classmode',
_radius_neighbors_classmode_pyx,
dependencies: [np_dep],
dependencies: [np_dep, openmp_dep],
override_options: ['cython_language=cpp'],
cython_args: cython_args,
subdir: 'sklearn/metrics/_pairwise_distances_reduction',
Expand Down
1 change: 1 addition & 0 deletions sklearn/metrics/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ _dist_metrics = py.extension_module(
py.extension_module(
'_pairwise_fast',
['_pairwise_fast.pyx', metrics_cython_tree],
dependencies: [openmp_dep],
cython_args: cython_args,
subdir: 'sklearn/metrics',
install: true
Expand Down
15 changes: 2 additions & 13 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np
from scipy import optimize, sparse, stats
from scipy.special import boxcox
from scipy.special import boxcox, inv_boxcox

from ..base import (
BaseEstimator,
Expand Down Expand Up @@ -3376,7 +3376,7 @@ def inverse_transform(self, X):
X = self._scaler.inverse_transform(X)

inv_fun = {
"box-cox": self._box_cox_inverse_tranform,
"box-cox": inv_boxcox,
"yeo-johnson": self._yeo_johnson_inverse_transform,
}[self.method]
for i, lmbda in enumerate(self.lambdas_):
Expand All @@ -3385,17 +3385,6 @@ def inverse_transform(self, X):

return X

def _box_cox_inverse_tranform(self, x, lmbda):
"""Return inverse-transformed input x following Box-Cox inverse
transform with parameter lambda.
"""
if lmbda == 0:
x_inv = np.exp(x)
else:
x_inv = (x * lmbda + 1) ** (1 / lmbda)

return x_inv

def _yeo_johnson_inverse_transform(self, x, lmbda):
"""Return inverse-transformed input x following Yeo-Johnson inverse
transform with parameter lambda.
Expand Down
13 changes: 13 additions & 0 deletions sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2329,6 +2329,11 @@ def test_optimization_power_transformer(method, lmbda):
n_samples = 20000
X = rng.normal(loc=0, scale=1, size=(n_samples, 1))

if method == "box-cox":
# For box-cox, means that lmbda * y + 1 > 0 or y > - 1 / lmbda
# Clip the data here to make sure the inequality is valid.
X = np.clip(X, -1 / lmbda + 1e-5, None)

pt = PowerTransformer(method=method, standardize=False)
pt.lambdas_ = [lmbda]
X_inv = pt.inverse_transform(X)
Expand All @@ -2341,6 +2346,14 @@ def test_optimization_power_transformer(method, lmbda):
assert_almost_equal(1, X_inv_trans.std(), decimal=1)


def test_invserse_box_cox():
# output nan if the input is invalid
pt = PowerTransformer(method="box-cox", standardize=False)
pt.lambdas_ = [0.5]
X_inv = pt.inverse_transform([[-2.1]])
assert np.isnan(X_inv)


def test_yeo_johnson_darwin_example():
# test from original paper "A new family of power transformations to
# improve normality or symmetry" by Yeo and Johnson.
Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ utils_extension_metadata = {
'sparsefuncs_fast':
{'sources': ['sparsefuncs_fast.pyx']},
'_cython_blas': {'sources': ['_cython_blas.pyx']},
'arrayfuncs': {'sources': ['arrayfuncs.pyx']},
'arrayfuncs': {'sources': ['arrayfuncs.pyx'], 'dependencies': [openmp_dep]},
'murmurhash': {
'sources': ['murmurhash.pyx', 'src' / 'MurmurHash3.cpp'],
},
Expand Down

0 comments on commit 5c46559

Please sign in to comment.