Skip to content

Commit

Permalink
MAINT remove deprecated params for 0.13 (#1112)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre authored Dec 20, 2024
1 parent ffa98a3 commit 03078d5
Show file tree
Hide file tree
Showing 19 changed files with 666 additions and 1,015 deletions.
1 change: 0 additions & 1 deletion imblearn/combine/_smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ def _validate_estimator(self):
self.smote_ = SMOTE(
sampling_strategy=self.sampling_strategy,
random_state=self.random_state,
n_jobs=self.n_jobs,
)

if self.enn is not None:
Expand Down
1 change: 0 additions & 1 deletion imblearn/combine/_smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def _validate_estimator(self):
self.smote_ = SMOTE(
sampling_strategy=self.sampling_strategy,
random_state=self.random_state,
n_jobs=self.n_jobs,
)

if self.tomek is not None:
Expand Down
2 changes: 0 additions & 2 deletions imblearn/combine/tests/test_smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,10 @@ def test_parallelisation():
smt = SMOTEENN(random_state=RND_SEED)
smt._validate_estimator()
assert smt.n_jobs is None
assert smt.smote_.n_jobs is None
assert smt.enn_.n_jobs is None

# Check if job count is set
smt = SMOTEENN(random_state=RND_SEED, n_jobs=8)
smt._validate_estimator()
assert smt.n_jobs == 8
assert smt.smote_.n_jobs == 8
assert smt.enn_.n_jobs == 8
2 changes: 0 additions & 2 deletions imblearn/combine/tests/test_smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,10 @@ def test_parallelisation():
smt = SMOTETomek(random_state=RND_SEED)
smt._validate_estimator()
assert smt.n_jobs is None
assert smt.smote_.n_jobs is None
assert smt.tomek_.n_jobs is None

# Check if job count is set
smt = SMOTETomek(random_state=RND_SEED, n_jobs=8)
smt._validate_estimator()
assert smt.n_jobs == 8
assert smt.smote_.n_jobs == 8
assert smt.tomek_.n_jobs == 8
57 changes: 2 additions & 55 deletions imblearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,17 @@
import numpy as np
from sklearn.base import clone
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble._bagging import _parallel_decision_function
from sklearn.ensemble._base import _partition_estimators
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils._param_validation import HasMethods, Interval, StrOptions
from sklearn.utils.fixes import parse_version
from sklearn.utils.metaestimators import available_if
from sklearn.utils.parallel import Parallel, delayed
from sklearn.utils.validation import check_is_fitted

from ..pipeline import Pipeline
from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_sampling_strategy, check_target_type
from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
from ..utils._sklearn_compat import _fit_context, sklearn_version, validate_data
from ._common import _bagging_parameter_constraints, _estimator_has
from ..utils._sklearn_compat import _fit_context, sklearn_version
from ._common import _bagging_parameter_constraints


@Substitution(
Expand Down Expand Up @@ -356,54 +351,6 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
# None.
return super()._fit(X, y, self.max_samples)

# TODO: remove when minimum supported version of scikit-learn is 1.1
@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
"""Average of the decision functions of the base classifiers.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The training input samples. Sparse matrices are accepted only if
they are supported by the base estimator.
Returns
-------
score : ndarray of shape (n_samples, k)
The decision function of the input samples. The columns correspond
to the classes in sorted order, as they appear in the attribute
``classes_``. Regression and binary classification are special
cases with ``k == 1``, otherwise ``k==n_classes``.
"""
check_is_fitted(self)

# Check data
X = validate_data(
self,
X=X,
accept_sparse=["csr", "csc"],
dtype=None,
ensure_all_finite=False,
reset=False,
)

# Parallel loop
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_decision_function)(
self.estimators_[starts[i] : starts[i + 1]],
self.estimators_features_[starts[i] : starts[i + 1]],
X,
)
for i in range(n_jobs)
)

# Reduce
decisions = sum(all_decisions) / self.n_estimators

return decisions

@property
def base_estimator_(self):
"""Attribute for older sklearn version compatibility."""
Expand Down
15 changes: 0 additions & 15 deletions imblearn/ensemble/_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import copy
import numbers
import warnings

import numpy as np
from sklearn.base import clone
Expand Down Expand Up @@ -248,20 +247,6 @@ def _validate_estimator(self, default=AdaBoostClassifier(algorithm="SAMME")):
)
self.estimator_ = Pipeline([("sampler", sampler), ("classifier", estimator)])

# TODO: remove when supporting scikit-learn>=1.2
@property
def n_features_(self):
"""Number of features when ``fit`` is performed."""
warnings.warn(
(
"`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
"not be accessible when the minimum supported version of scikit-learn "
"is 1.2."
),
FutureWarning,
)
return self.n_features_in_

@_fit_context(prefer_skip_nested_validation=False)
def fit(self, X, y):
"""Build a Bagging ensemble of estimators from the training set (X, y).
Expand Down
59 changes: 8 additions & 51 deletions imblearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,10 @@ def __init__(
max_features="sqrt",
max_leaf_nodes=None,
min_impurity_decrease=0.0,
bootstrap="warn",
bootstrap=False,
oob_score=False,
sampling_strategy="warn",
replacement="warn",
sampling_strategy="all",
replacement=True,
n_jobs=None,
random_state=None,
verbose=0,
Expand Down Expand Up @@ -498,7 +498,7 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):

self.base_sampler_ = RandomUnderSampler(
sampling_strategy=self._sampling_strategy,
replacement=self._replacement,
replacement=self.replacement,
)

def _make_sampler_estimator(self, random_state=None):
Expand Down Expand Up @@ -544,49 +544,6 @@ def fit(self, X, y, sample_weight=None):
The fitted instance.
"""
self._validate_params()
# TODO: remove in 0.13
if self.sampling_strategy == "warn":
warn(
(
"The default of `sampling_strategy` will change from `'auto'` to"
" `'all'` in version 0.13. This change will follow the"
" implementation proposed in the original paper. Set to `'all'` to"
" silence this warning and adopt the future behaviour."
),
FutureWarning,
)
self._sampling_strategy = "auto"
else:
self._sampling_strategy = self.sampling_strategy

if self.replacement == "warn":
warn(
(
"The default of `replacement` will change from `False` to `True` in"
" version 0.13. This change will follow the implementation proposed"
" in the original paper. Set to `True` to silence this warning and"
" adopt the future behaviour."
),
FutureWarning,
)
self._replacement = False
else:
self._replacement = self.replacement

if self.bootstrap == "warn":
warn(
(
"The default of `bootstrap` will change from `True` to `False` in"
" version 0.13. This change will follow the implementation proposed"
" in the original paper. Set to `False` to silence this warning and"
" adopt the future behaviour."
),
FutureWarning,
)
self._bootstrap = True
else:
self._bootstrap = self.bootstrap

# Validate or convert input data
if issparse(y):
raise ValueError("sparse multilabel-indicator for y is not supported.")
Expand Down Expand Up @@ -657,7 +614,7 @@ def fit(self, X, y, sample_weight=None):
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y_encoded = np.ascontiguousarray(y_encoded, dtype=DOUBLE)

if isinstance(self._sampling_strategy, dict):
if isinstance(self.sampling_strategy, dict):
self._sampling_strategy = {
np.where(self.classes_[0] == key)[0][0]: value
for key, value in check_sampling_strategy(
Expand All @@ -667,7 +624,7 @@ def fit(self, X, y, sample_weight=None):
).items()
}
else:
self._sampling_strategy = self._sampling_strategy
self._sampling_strategy = self.sampling_strategy

if expanded_class_weight is not None:
if sample_weight is not None:
Expand All @@ -683,7 +640,7 @@ def fit(self, X, y, sample_weight=None):
# Check parameters
self._validate_estimator()

if not self._bootstrap and self.oob_score:
if not self.bootstrap and self.oob_score:
raise ValueError("Out of bag estimation only available if bootstrap=True")

random_state = check_random_state(self.random_state)
Expand Down Expand Up @@ -735,7 +692,7 @@ def fit(self, X, y, sample_weight=None):
delayed(_local_parallel_build_trees)(
s,
t,
self._bootstrap,
self.bootstrap,
X,
y_encoded,
sample_weight,
Expand Down
18 changes: 0 additions & 18 deletions imblearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,24 +217,6 @@ def test_balanced_random_forest_oob_binomial(ratio):
assert np.abs(erf.oob_score_ - 0.5) < 0.1


# TODO: remove in 0.13
def test_balanced_random_forest_change_behaviour(imbalanced_dataset):
"""Check that we raise a change of behaviour for the parameters `sampling_strategy`
and `replacement`.
"""
estimator = BalancedRandomForestClassifier(sampling_strategy="all", bootstrap=False)
with pytest.warns(FutureWarning, match="The default of `replacement`"):
estimator.fit(*imbalanced_dataset)
estimator = BalancedRandomForestClassifier(replacement=True, bootstrap=False)
with pytest.warns(FutureWarning, match="The default of `sampling_strategy`"):
estimator.fit(*imbalanced_dataset)
estimator = BalancedRandomForestClassifier(
sampling_strategy="all", replacement=True
)
with pytest.warns(FutureWarning, match="The default of `bootstrap`"):
estimator.fit(*imbalanced_dataset)


@pytest.mark.skipif(
parse_version(sklearn_version.base_version) < parse_version("1.4"),
reason="scikit-learn should be >= 1.4",
Expand Down
26 changes: 1 addition & 25 deletions imblearn/over_sampling/_adasyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,19 @@
# License: MIT

import numbers
import warnings

import numpy as np
from scipy import sparse
from sklearn.utils import _safe_indexing, check_random_state
from sklearn.utils._param_validation import HasMethods, Interval

from ..utils import Substitution, check_neighbors_object
from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
from ..utils._docstring import _random_state_docstring
from .base import BaseOverSampler


@Substitution(
sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class ADASYN(BaseOverSampler):
Expand Down Expand Up @@ -50,14 +48,6 @@ class ADASYN(BaseOverSampler):
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
any compatible class.
{n_jobs}
.. deprecated:: 0.10
`n_jobs` has been deprecated in 0.10 and will be removed in 0.12.
It was previously used to set `n_jobs` of nearest neighbors
algorithm. From now on, you can pass an estimator where `n_jobs` is
already set instead.
Attributes
----------
sampling_strategy_ : dict
Expand Down Expand Up @@ -128,7 +118,6 @@ class ADASYN(BaseOverSampler):
Interval(numbers.Integral, 1, None, closed="left"),
HasMethods(["kneighbors", "kneighbors_graph"]),
],
"n_jobs": [numbers.Integral, None],
}

def __init__(
Expand All @@ -137,12 +126,10 @@ def __init__(
sampling_strategy="auto",
random_state=None,
n_neighbors=5,
n_jobs=None,
):
super().__init__(sampling_strategy=sampling_strategy)
self.random_state = random_state
self.n_neighbors = n_neighbors
self.n_jobs = n_jobs

def _validate_estimator(self):
"""Create the necessary objects for ADASYN"""
Expand All @@ -151,17 +138,6 @@ def _validate_estimator(self):
)

def _fit_resample(self, X, y):
# FIXME: to be removed in 0.12
if self.n_jobs is not None:
warnings.warn(
(
"The parameter `n_jobs` has been deprecated in 0.10 and will be"
" removed in 0.12. You can pass an nearest neighbors estimator"
" where `n_jobs` is already set instead."
),
FutureWarning,
)

self._validate_estimator()
random_state = check_random_state(self.random_state)

Expand Down
Loading

0 comments on commit 03078d5

Please sign in to comment.