Skip to content

Commit

Permalink
Merge branch 'branch-24.12' of github.com:rapidsai/cudf into wheel-va…
Browse files Browse the repository at this point in the history
…lidation
  • Loading branch information
jameslamb committed Nov 8, 2024
2 parents 700078d + 0f1ae26 commit 48ca41f
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 3 deletions.
3 changes: 2 additions & 1 deletion docs/cudf/source/developer_guide/cudf_pandas.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ In the rest of this document, to maintain a concrete pair of libraries in mind,
For example, future support could include pairs such as CuPy (as the "fast" library) and NumPy (as the "slow" library).

```{note}
We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type.
1. We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type.
2. There is a `custom_iter` method defined to always utilize slow objects `iter` method, that way we don't move the objects to GPU and trigger an error and again move the object to CPU to execute the iteration successfully.
```

### Types:
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/pandas/_wrappers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,13 @@ def array_interface(self: _FastSlowProxy):


def custom_iter(self: _FastSlowProxy):
return iter(self._fsproxy_slow)
"""
Custom iter method to handle the case where only the slow
object's iter method is used.
"""
# NOTE: Do not remove this method. This is required to avoid
# falling back to GPU for iter method.
return _maybe_wrap_result(
iter(self._fsproxy_slow),
None, # type: ignore
)
4 changes: 3 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,9 @@ def _maybe_wrap_result(result: Any, func: Callable, /, *args, **kwargs) -> Any:
"""
Wraps "result" in a fast-slow proxy if is a "proxiable" object.
"""
if _is_final_type(result):
if isinstance(result, (int, str, float, bool, type(None))):
return result
elif _is_final_type(result):
typ = get_final_type_map()[type(result)]
return typ._fsproxy_wrap(result, func)
elif _is_intermediate_type(result):
Expand Down
18 changes: 18 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,3 +1777,21 @@ def test_cudf_pandas_util_version(attrs):
assert not hasattr(pd.util, attrs)
else:
assert hasattr(pd.util, attrs)


def test_iteration_over_dataframe_dtypes_produces_proxy_objects(dataframe):
_, xdf = dataframe
xdf["b"] = xpd.IntervalIndex.from_arrays(xdf["a"], xdf["b"])
xdf["a"] = xpd.Series([1, 1, 1, 2, 3], dtype="category")
dtype_series = xdf.dtypes
assert all(is_proxy_object(x) for x in dtype_series)
assert isinstance(dtype_series.iloc[0], xpd.CategoricalDtype)
assert isinstance(dtype_series.iloc[1], xpd.IntervalDtype)


def test_iter_doesnot_raise(monkeypatch):
s = xpd.Series([1, 2, 3])
with monkeypatch.context() as monkeycontext:
monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
for _ in s:
pass

0 comments on commit 48ca41f

Please sign in to comment.