diff --git a/docs/cudf/source/developer_guide/cudf_pandas.md b/docs/cudf/source/developer_guide/cudf_pandas.md index 911a64fa152..b653b786129 100644 --- a/docs/cudf/source/developer_guide/cudf_pandas.md +++ b/docs/cudf/source/developer_guide/cudf_pandas.md @@ -11,7 +11,8 @@ In the rest of this document, to maintain a concrete pair of libraries in mind, For example, future support could include pairs such as CuPy (as the "fast" library) and NumPy (as the "slow" library). ```{note} -We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type. +1. We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type. +2. There is a `custom_iter` method defined to always utilize slow objects `iter` method, that way we don't move the objects to GPU and trigger an error and again move the object to CPU to execute the iteration successfully. ``` ### Types: diff --git a/python/cudf/cudf/pandas/_wrappers/common.py b/python/cudf/cudf/pandas/_wrappers/common.py index 66a51a83896..b801654068e 100644 --- a/python/cudf/cudf/pandas/_wrappers/common.py +++ b/python/cudf/cudf/pandas/_wrappers/common.py @@ -52,4 +52,13 @@ def array_interface(self: _FastSlowProxy): def custom_iter(self: _FastSlowProxy): - return iter(self._fsproxy_slow) + """ + Custom iter method to handle the case where only the slow + object's iter method is used. + """ + # NOTE: Do not remove this method. This is required to avoid + # falling back to GPU for iter method. + return _maybe_wrap_result( + iter(self._fsproxy_slow), + None, # type: ignore + ) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 73afde407db..99c0cb82f41 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1099,7 +1099,9 @@ def _maybe_wrap_result(result: Any, func: Callable, /, *args, **kwargs) -> Any: """ Wraps "result" in a fast-slow proxy if is a "proxiable" object. """ - if _is_final_type(result): + if isinstance(result, (int, str, float, bool, type(None))): + return result + elif _is_final_type(result): typ = get_final_type_map()[type(result)] return typ._fsproxy_wrap(result, func) elif _is_intermediate_type(result): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 3e7d1cf3c4c..e260b448219 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1777,3 +1777,21 @@ def test_cudf_pandas_util_version(attrs): assert not hasattr(pd.util, attrs) else: assert hasattr(pd.util, attrs) + + +def test_iteration_over_dataframe_dtypes_produces_proxy_objects(dataframe): + _, xdf = dataframe + xdf["b"] = xpd.IntervalIndex.from_arrays(xdf["a"], xdf["b"]) + xdf["a"] = xpd.Series([1, 1, 1, 2, 3], dtype="category") + dtype_series = xdf.dtypes + assert all(is_proxy_object(x) for x in dtype_series) + assert isinstance(dtype_series.iloc[0], xpd.CategoricalDtype) + assert isinstance(dtype_series.iloc[1], xpd.IntervalDtype) + + +def test_iter_doesnot_raise(monkeypatch): + s = xpd.Series([1, 2, 3]) + with monkeypatch.context() as monkeycontext: + monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True") + for _ in s: + pass