Skip to content

Commit

Permalink
ENH Use scipy.special.inv_boxcox in PowerTransformer (scikit-learn#27875
Browse files Browse the repository at this point in the history
)
  • Loading branch information
xuefeng-xu authored Aug 29, 2024
1 parent 602aaaa commit 6b3f9bd
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 13 deletions.
4 changes: 4 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ Changelog
will show the function name in the label.
:pr:`29158` by :user:`Yao Xiao <Charlie-XIAO>`.

- |Fix| :class:`preprocessing.PowerTransformer` now uses `scipy.special.inv_boxcox`
to output `nan` if the input of BoxCox's inverse is invalid.
:pr:`27875` by :user:`Xuefeng Xu <xuefeng-xu>`.

:mod:`sklearn.semi_supervised`
..............................

Expand Down
15 changes: 2 additions & 13 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np
from scipy import optimize, sparse, stats
from scipy.special import boxcox
from scipy.special import boxcox, inv_boxcox

from ..base import (
BaseEstimator,
Expand Down Expand Up @@ -3376,7 +3376,7 @@ def inverse_transform(self, X):
X = self._scaler.inverse_transform(X)

inv_fun = {
"box-cox": self._box_cox_inverse_tranform,
"box-cox": inv_boxcox,
"yeo-johnson": self._yeo_johnson_inverse_transform,
}[self.method]
for i, lmbda in enumerate(self.lambdas_):
Expand All @@ -3385,17 +3385,6 @@ def inverse_transform(self, X):

return X

def _box_cox_inverse_tranform(self, x, lmbda):
"""Return inverse-transformed input x following Box-Cox inverse
transform with parameter lambda.
"""
if lmbda == 0:
x_inv = np.exp(x)
else:
x_inv = (x * lmbda + 1) ** (1 / lmbda)

return x_inv

def _yeo_johnson_inverse_transform(self, x, lmbda):
"""Return inverse-transformed input x following Yeo-Johnson inverse
transform with parameter lambda.
Expand Down
13 changes: 13 additions & 0 deletions sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2329,6 +2329,11 @@ def test_optimization_power_transformer(method, lmbda):
n_samples = 20000
X = rng.normal(loc=0, scale=1, size=(n_samples, 1))

if method == "box-cox":
# For box-cox, means that lmbda * y + 1 > 0 or y > - 1 / lmbda
# Clip the data here to make sure the inequality is valid.
X = np.clip(X, -1 / lmbda + 1e-5, None)

pt = PowerTransformer(method=method, standardize=False)
pt.lambdas_ = [lmbda]
X_inv = pt.inverse_transform(X)
Expand All @@ -2341,6 +2346,14 @@ def test_optimization_power_transformer(method, lmbda):
assert_almost_equal(1, X_inv_trans.std(), decimal=1)


def test_invserse_box_cox():
# output nan if the input is invalid
pt = PowerTransformer(method="box-cox", standardize=False)
pt.lambdas_ = [0.5]
X_inv = pt.inverse_transform([[-2.1]])
assert np.isnan(X_inv)


def test_yeo_johnson_darwin_example():
# test from original paper "A new family of power transformations to
# improve normality or symmetry" by Yeo and Johnson.
Expand Down

0 comments on commit 6b3f9bd

Please sign in to comment.