From 0d9d09d426dc72030880c01019e76230e5c14b8d Mon Sep 17 00:00:00 2001 From: William Hicks Date: Thu, 23 Jan 2025 14:18:27 -0800 Subject: [PATCH] Enable CPU execution of IncrementalPCA --- .../cuml/decomposition/incremental_pca.py | 46 +++-- python/cuml/cuml/internals/global_settings.py | 6 +- .../cuml/cuml/tests/test_incremental_pca.py | 169 ++++++++++-------- 3 files changed, 128 insertions(+), 93 deletions(-) diff --git a/python/cuml/cuml/decomposition/incremental_pca.py b/python/cuml/cuml/decomposition/incremental_pca.py index 925219d0bc..b36012abc8 100644 --- a/python/cuml/cuml/decomposition/incremental_pca.py +++ b/python/cuml/cuml/decomposition/incremental_pca.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,6 +20,11 @@ from cuml.internals.input_utils import input_to_cupy_array from cuml.common import input_to_cuml_array from cuml import Base +from cuml.internals.api_decorators import ( + device_interop_preparation, + enable_device_interop, +) +from cuml.internals.global_settings import GlobalSettings from cuml.internals.safe_imports import cpu_only_import import numbers @@ -195,6 +200,9 @@ class IncrementalPCA(PCA): 0.0037122774558343763 """ + _cpu_estimator_import_path = "sklearn.decomposition.IncrementalPCA" + + @device_interop_preparation def __init__( self, *, @@ -218,6 +226,7 @@ def __init__( self.batch_size = batch_size self._sparse_model = True + @enable_device_interop def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA": """ Fit the model with X, using minibatches of size batch_size. @@ -255,10 +264,10 @@ def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA": check_dtype=[cp.float32, cp.float64], ) - n_samples, n_features = X.shape + n_samples, self.n_features_in_ = X.shape if self.batch_size is None: - self.batch_size_ = 5 * n_features + self.batch_size_ = 5 * self.n_features_in_ else: self.batch_size_ = self.batch_size @@ -305,25 +314,30 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": self._set_output_type(X) - X, n_samples, n_features, self.dtype = input_to_cupy_array( + ( + X, + n_samples, + self.n_features_in_, + self.dtype, + ) = input_to_cupy_array( X, order="K", check_dtype=[cp.float32, cp.float64] ) else: - n_samples, n_features = X.shape + n_samples, self.n_features_in_ = X.shape if not hasattr(self, "components_"): self.components_ = None if self.n_components is None: if self.components_ is None: - self.n_components_ = min(n_samples, n_features) + self.n_components_ = min(n_samples, self.n_features_in_) else: self.n_components_ = self.components_.shape[0] - elif not 1 <= self.n_components <= n_features: + elif not 1 <= self.n_components <= self.n_features_in_: raise ValueError( "n_components=%r invalid for n_features=%d, need " "more rows than columns for IncrementalPCA " - "processing" % (self.n_components, n_features) + "processing" % (self.n_components, self.n_features_in_) ) elif not self.n_components <= n_samples: raise ValueError( @@ -394,7 +408,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": self.explained_variance_ratio_ = explained_variance_ratio[ : self.n_components_ ] - if self.n_components_ < n_features: + if self.n_components_ < self.n_features_in_: self.noise_variance_ = explained_variance[ self.n_components_ : ].mean() @@ -403,6 +417,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": return self + @enable_device_interop def transform(self, X, convert_dtype=False) -> CumlArray: """ Apply dimensionality reduction to X. @@ -678,16 +693,17 @@ def _svd_flip(u, v, u_based_decision=True): u_adjusted, v_adjusted : arrays with the same dimensions as the input. """ + xpy = GlobalSettings().xpy if u_based_decision: # columns of u, rows of v - max_abs_cols = cp.argmax(cp.abs(u), axis=0) - signs = cp.sign(u[max_abs_cols, list(range(u.shape[1]))]) + max_abs_cols = xpy.argmax(xpy.abs(u), axis=0) + signs = xpy.sign(u[max_abs_cols, list(range(u.shape[1]))]) u *= signs - v *= signs[:, cp.newaxis] + v *= signs[:, xpy.newaxis] else: # rows of v, columns of u - max_abs_rows = cp.argmax(cp.abs(v), axis=1) - signs = cp.sign(v[list(range(v.shape[0])), max_abs_rows]) + max_abs_rows = xpy.argmax(xpy.abs(v), axis=1) + signs = xpy.sign(v[list(range(v.shape[0])), max_abs_rows]) u *= signs - v *= signs[:, cp.newaxis] + v *= signs[:, xpy.newaxis] return u, v diff --git a/python/cuml/cuml/internals/global_settings.py b/python/cuml/cuml/internals/global_settings.py index 9dae3ceac1..1ab5b9578a 100644 --- a/python/cuml/cuml/internals/global_settings.py +++ b/python/cuml/cuml/internals/global_settings.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -134,3 +134,7 @@ def output_type(self, value): @property def xpy(self): return self.memory_type.xpy + + @property + def xsparse(self): + return self.memory_type.xsparse diff --git a/python/cuml/cuml/tests/test_incremental_pca.py b/python/cuml/cuml/tests/test_incremental_pca.py index 8adc19b01f..cc5a3ef9d3 100644 --- a/python/cuml/cuml/tests/test_incremental_pca.py +++ b/python/cuml/cuml/tests/test_incremental_pca.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,9 @@ # limitations under the License. # +from cuml import global_settings from cuml.common.exceptions import NotFittedError +from cuml.common.device_selection import using_device_type from cuml.testing.utils import array_equal from cuml.decomposition.incremental_pca import _svd_flip from cuml.decomposition import IncrementalPCA as cuIPCA @@ -40,6 +42,7 @@ (500, 250, 14, True, 0.07, "csr", 1, True), ], ) +@pytest.mark.parametrize("device", ["gpu", "cpu"]) @pytest.mark.no_bad_cuml_array_check def test_fit( nrows, @@ -50,48 +53,53 @@ def test_fit( sparse_format, batch_size_divider, whiten, + device, ): - - if sparse_format == "csc": - pytest.skip( - "cupyx.scipy.sparse.csc.csc_matrix does not support" - " indexing as of cupy 7.6.0" + with using_device_type(device): + + if sparse_format == "csc": + pytest.skip( + "cupyx.scipy.sparse.csc.csc_matrix does not support" + " indexing as of cupy 7.6.0" + ) + + if sparse_input: + X = global_settings.xsparse.random( + nrows, + ncols, + density=density, + random_state=10, + format=sparse_format, + ) + else: + X, _ = make_blobs( + n_samples=nrows, n_features=ncols, random_state=10 + ) + + cu_ipca = cuIPCA( + n_components=n_components, + whiten=whiten, + batch_size=int(nrows / batch_size_divider), ) - - if sparse_input: - X = cupyx.scipy.sparse.random( - nrows, - ncols, - density=density, - random_state=10, - format=sparse_format, + cu_ipca.fit(X) + cu_t = cu_ipca.transform(X) + cu_inv = cu_ipca.inverse_transform(cu_t) + + sk_ipca = skIPCA( + n_components=n_components, + whiten=whiten, + batch_size=int(nrows / batch_size_divider), ) - else: - X, _ = make_blobs(n_samples=nrows, n_features=ncols, random_state=10) + if device == "gpu": + if sparse_input: + X = X.get() + else: + X = cp.asnumpy(X) + sk_ipca.fit(X) + sk_t = sk_ipca.transform(X) + sk_inv = sk_ipca.inverse_transform(sk_t) - cu_ipca = cuIPCA( - n_components=n_components, - whiten=whiten, - batch_size=int(nrows / batch_size_divider), - ) - cu_ipca.fit(X) - cu_t = cu_ipca.transform(X) - cu_inv = cu_ipca.inverse_transform(cu_t) - - sk_ipca = skIPCA( - n_components=n_components, - whiten=whiten, - batch_size=int(nrows / batch_size_divider), - ) - if sparse_input: - X = X.get() - else: - X = cp.asnumpy(X) - sk_ipca.fit(X) - sk_t = sk_ipca.transform(X) - sk_inv = sk_ipca.inverse_transform(sk_t) - - assert array_equal(cu_inv, sk_inv, 5e-5, with_sign=True) + assert array_equal(cu_inv, sk_inv, 5e-5, with_sign=True) @pytest.mark.parametrize( @@ -105,62 +113,69 @@ def test_fit( (5000, 4, 2, 0.1, 100, False), ], ) +@pytest.mark.parametrize("device", ["gpu", "cpu"]) @pytest.mark.no_bad_cuml_array_check def test_partial_fit( - nrows, ncols, n_components, density, batch_size_divider, whiten + nrows, ncols, n_components, density, batch_size_divider, whiten, device ): - X, _ = make_blobs(n_samples=nrows, n_features=ncols, random_state=10) + with using_device_type(device): + X, _ = make_blobs(n_samples=nrows, n_features=ncols, random_state=10) - cu_ipca = cuIPCA(n_components=n_components, whiten=whiten) + cu_ipca = cuIPCA(n_components=n_components, whiten=whiten) - sample_size = int(nrows / batch_size_divider) - for i in range(0, nrows, sample_size): - cu_ipca.partial_fit(X[i : i + sample_size].copy()) + sample_size = int(nrows / batch_size_divider) + for i in range(0, nrows, sample_size): + cu_ipca.partial_fit(X[i : i + sample_size].copy()) - cu_t = cu_ipca.transform(X) - cu_inv = cu_ipca.inverse_transform(cu_t) + cu_t = cu_ipca.transform(X) + cu_inv = cu_ipca.inverse_transform(cu_t) - sk_ipca = skIPCA(n_components=n_components, whiten=whiten) + sk_ipca = skIPCA(n_components=n_components, whiten=whiten) - X = cp.asnumpy(X) + if device == "gpu": + X = cp.asnumpy(X) - for i in range(0, nrows, sample_size): - sk_ipca.partial_fit(X[i : i + sample_size].copy()) + for i in range(0, nrows, sample_size): + sk_ipca.partial_fit(X[i : i + sample_size].copy()) - sk_t = sk_ipca.transform(X) - sk_inv = sk_ipca.inverse_transform(sk_t) + sk_t = sk_ipca.transform(X) + sk_inv = sk_ipca.inverse_transform(sk_t) - assert array_equal(cu_inv, sk_inv, 6e-5, with_sign=True) + assert array_equal(cu_inv, sk_inv, 6e-5, with_sign=True) -def test_exceptions(): - X = cupyx.scipy.sparse.eye(10) - ipca = cuIPCA() - with pytest.raises(TypeError): - ipca.partial_fit(X) +@pytest.mark.parametrize("device", ["gpu", "cpu"]) +def test_exceptions(device): + with using_device_type(device): + X = global_settings.xsparse.eye(10) + ipca = cuIPCA() + with pytest.raises(TypeError): + ipca.partial_fit(X) - X = X.toarray() - with pytest.raises(NotFittedError): - ipca.transform(X) + X = X.toarray() + with pytest.raises(ValueError): + ipca.transform(X) - with pytest.raises(NotFittedError): - ipca.inverse_transform(X) + with pytest.raises(ValueError): + ipca.inverse_transform(X) - with pytest.raises(ValueError): - cuIPCA(n_components=8).fit(X[:5]) + with pytest.raises(ValueError): + cuIPCA(n_components=8).fit(X[:5]) - with pytest.raises(ValueError): - cuIPCA(n_components=8).fit(X[:, :5]) + with pytest.raises(ValueError): + cuIPCA(n_components=8).fit(X[:, :5]) -def test_svd_flip(): - x = cp.array(range(-10, 80)).reshape((9, 10)) - u, s, v = cp.linalg.svd(x, full_matrices=False) - u_true, v_true = _svd_flip(u, v, u_based_decision=True) - reco_true = cp.dot(u_true * s, v_true) - u_false, v_false = _svd_flip(u, v, u_based_decision=False) - reco_false = cp.dot(u_false * s, v_false) +@pytest.mark.parametrize("device", ["gpu", "cpu"]) +def test_svd_flip(device): + with using_device_type(device): + x = global_settings.xpy.array(range(-10, 80)).reshape((9, 10)) + u, s, v = global_settings.xpy.linalg.svd(x, full_matrices=False) + u_true, v_true = _svd_flip(u, v, u_based_decision=True) + reco_true = global_settings.xpy.dot(u_true * s, v_true) + u_false, v_false = _svd_flip(u, v, u_based_decision=False) + reco_false = global_settings.xpy.dot(u_false * s, v_false) - assert array_equal(reco_true, x) - assert array_equal(reco_false, x) + assert array_equal(reco_true, x) + assert array_equal(reco_false, x)