ENH Implement inverse_transform in DictionaryLearning, `SparseCod…

…er` and `MiniBatchDictionaryLearning` (scikit-learn#30443) Co-authored-by: Jérémie du Boisberranger <[email protected]>
virchan · Dec 19, 2024 · 4ad187a · 4ad187a
1 parent 7f0215f
commit 4ad187a
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 3 deletions.
diff --git a/doc/whats_new/upcoming_changes/sklearn.decomposition/30443.feature.rst b/doc/whats_new/upcoming_changes/sklearn.decomposition/30443.feature.rst
@@ -0,0 +1,4 @@
+- :class:`~sklearn.decomposition.DictionaryLearning`,
+  :class:`~sklearn.decomposition.SparseCoder`  and
+  :class:`~sklearn.decomposition.MiniBatchDictionaryLearning` now have a
+  ``inverse_transform`` method. By :user:`Rémi Flamary <rflamary>`
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
@@ -1142,6 +1142,44 @@ def transform(self, X):
         check_is_fitted(self)
         return self._transform(X, self.components_)
 
+    def _inverse_transform(self, code, dictionary):
+        """Private method allowing to accommodate both DictionaryLearning and
+        SparseCoder."""
+        code = check_array(code)
+        # compute number of expected features in code
+        expected_n_components = dictionary.shape[0]
+        if self.split_sign:
+            expected_n_components += expected_n_components
+        if not code.shape[1] == expected_n_components:
+            raise ValueError(
+                "The number of components in the code is different from the "
+                "number of components in the dictionary."
+                f"Expected {expected_n_components}, got {code.shape[1]}."
+            )
+        if self.split_sign:
+            n_samples, n_features = code.shape
+            n_features //= 2
+            code = code[:, :n_features] - code[:, n_features:]
+
+        return code @ dictionary
+
+    def inverse_transform(self, X):
+        """Transform data back to its original space.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_components)
+            Data to be transformed back. Must have the same number of
+            components as the data used to train the model.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_features)
+            Transformed data.
+        """
+        check_is_fitted(self)
+        return self._inverse_transform(X, self.components_)
+
 
 class SparseCoder(_BaseSparseCoding, BaseEstimator):
     """Sparse coding.
@@ -1329,6 +1367,22 @@ def transform(self, X, y=None):
         """
         return super()._transform(X, self.dictionary)
 
+    def inverse_transform(self, X):
+        """Transform data back to its original space.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_components)
+            Data to be transformed back. Must have the same number of
+            components as the data used to train the model.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_features)
+            Transformed data.
+        """
+        return self._inverse_transform(X, self.dictionary)
+
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.requires_fit = False

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -202,10 +202,16 @@ def test_dict_learning_reconstruction():
     )
     code = dico.fit(X).transform(X)
     assert_array_almost_equal(np.dot(code, dico.components_), X)
+    assert_array_almost_equal(dico.inverse_transform(code), X)
 
     dico.set_params(transform_algorithm="lasso_lars")
     code = dico.transform(X)
     assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
+    assert_array_almost_equal(dico.inverse_transform(code), X, decimal=2)
+
+    # test error raised for wrong code size
+    with pytest.raises(ValueError, match="Expected 12, got 11."):
+        dico.inverse_transform(code[:, :-1])
 
     # used to test lars here too, but there's no guarantee the number of
     # nonzero atoms is right.
@@ -268,13 +274,18 @@ def test_dict_learning_split():
         n_components, transform_algorithm="threshold", random_state=0
     )
     code = dico.fit(X).transform(X)
+    Xr = dico.inverse_transform(code)
+
     dico.split_sign = True
     split_code = dico.transform(X)
 
     assert_array_almost_equal(
         split_code[:, :n_components] - split_code[:, n_components:], code
     )
 
+    Xr2 = dico.inverse_transform(split_code)
+    assert_array_almost_equal(Xr, Xr2)
+
 
 def test_dict_learning_online_shapes():
     rng = np.random.RandomState(0)
@@ -591,9 +602,12 @@ def test_sparse_coder_estimator():
     V /= np.sum(V**2, axis=1)[:, np.newaxis]
     coder = SparseCoder(
         dictionary=V, transform_algorithm="lasso_lars", transform_alpha=0.001
-    ).transform(X)
-    assert not np.all(coder == 0)
-    assert np.sqrt(np.sum((np.dot(coder, V) - X) ** 2)) < 0.1
+    )
+    code = coder.fit_transform(X)
+    Xr = coder.inverse_transform(code)
+    assert not np.all(code == 0)
+    assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
+    np.testing.assert_allclose(Xr, np.dot(code, V))
 
 
 def test_sparse_coder_estimator_clone():