Skip to content

Commit

Permalink
ENH Implement inverse_transform in DictionaryLearning, `SparseCod…
Browse files Browse the repository at this point in the history
…er` and `MiniBatchDictionaryLearning` (scikit-learn#30443)

Co-authored-by: Jérémie du Boisberranger <[email protected]>
  • Loading branch information
rflamary and jeremiedbb authored Dec 19, 2024
1 parent 7f0215f commit 4ad187a
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- :class:`~sklearn.decomposition.DictionaryLearning`,
:class:`~sklearn.decomposition.SparseCoder` and
:class:`~sklearn.decomposition.MiniBatchDictionaryLearning` now have a
``inverse_transform`` method. By :user:`Rémi Flamary <rflamary>`
54 changes: 54 additions & 0 deletions sklearn/decomposition/_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,44 @@ def transform(self, X):
check_is_fitted(self)
return self._transform(X, self.components_)

def _inverse_transform(self, code, dictionary):
"""Private method allowing to accommodate both DictionaryLearning and
SparseCoder."""
code = check_array(code)
# compute number of expected features in code
expected_n_components = dictionary.shape[0]
if self.split_sign:
expected_n_components += expected_n_components
if not code.shape[1] == expected_n_components:
raise ValueError(
"The number of components in the code is different from the "
"number of components in the dictionary."
f"Expected {expected_n_components}, got {code.shape[1]}."
)
if self.split_sign:
n_samples, n_features = code.shape
n_features //= 2
code = code[:, :n_features] - code[:, n_features:]

return code @ dictionary

def inverse_transform(self, X):
"""Transform data back to its original space.
Parameters
----------
X : array-like of shape (n_samples, n_components)
Data to be transformed back. Must have the same number of
components as the data used to train the model.
Returns
-------
X_new : ndarray of shape (n_samples, n_features)
Transformed data.
"""
check_is_fitted(self)
return self._inverse_transform(X, self.components_)


class SparseCoder(_BaseSparseCoding, BaseEstimator):
"""Sparse coding.
Expand Down Expand Up @@ -1329,6 +1367,22 @@ def transform(self, X, y=None):
"""
return super()._transform(X, self.dictionary)

def inverse_transform(self, X):
"""Transform data back to its original space.
Parameters
----------
X : array-like of shape (n_samples, n_components)
Data to be transformed back. Must have the same number of
components as the data used to train the model.
Returns
-------
X_new : ndarray of shape (n_samples, n_features)
Transformed data.
"""
return self._inverse_transform(X, self.dictionary)

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.requires_fit = False
Expand Down
20 changes: 17 additions & 3 deletions sklearn/decomposition/tests/test_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,16 @@ def test_dict_learning_reconstruction():
)
code = dico.fit(X).transform(X)
assert_array_almost_equal(np.dot(code, dico.components_), X)
assert_array_almost_equal(dico.inverse_transform(code), X)

dico.set_params(transform_algorithm="lasso_lars")
code = dico.transform(X)
assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
assert_array_almost_equal(dico.inverse_transform(code), X, decimal=2)

# test error raised for wrong code size
with pytest.raises(ValueError, match="Expected 12, got 11."):
dico.inverse_transform(code[:, :-1])

# used to test lars here too, but there's no guarantee the number of
# nonzero atoms is right.
Expand Down Expand Up @@ -268,13 +274,18 @@ def test_dict_learning_split():
n_components, transform_algorithm="threshold", random_state=0
)
code = dico.fit(X).transform(X)
Xr = dico.inverse_transform(code)

dico.split_sign = True
split_code = dico.transform(X)

assert_array_almost_equal(
split_code[:, :n_components] - split_code[:, n_components:], code
)

Xr2 = dico.inverse_transform(split_code)
assert_array_almost_equal(Xr, Xr2)


def test_dict_learning_online_shapes():
rng = np.random.RandomState(0)
Expand Down Expand Up @@ -591,9 +602,12 @@ def test_sparse_coder_estimator():
V /= np.sum(V**2, axis=1)[:, np.newaxis]
coder = SparseCoder(
dictionary=V, transform_algorithm="lasso_lars", transform_alpha=0.001
).transform(X)
assert not np.all(coder == 0)
assert np.sqrt(np.sum((np.dot(coder, V) - X) ** 2)) < 0.1
)
code = coder.fit_transform(X)
Xr = coder.inverse_transform(code)
assert not np.all(code == 0)
assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
np.testing.assert_allclose(Xr, np.dot(code, V))


def test_sparse_coder_estimator_clone():
Expand Down

0 comments on commit 4ad187a

Please sign in to comment.