Skip to content

Commit

Permalink
docs(python): Expand docstrings for to_numpy methods (#16394)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored May 22, 2024
1 parent d1cf113 commit b5a8a50
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 29 deletions.
76 changes: 59 additions & 17 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1498,13 +1498,23 @@ def to_numpy(
structured: bool = False, # noqa: FBT001
*,
order: IndexOrder = "fortran",
allow_copy: bool = True,
writable: bool = False,
allow_copy: bool = True,
use_pyarrow: bool | None = None,
) -> np.ndarray[Any, Any]:
"""
Convert this DataFrame to a NumPy ndarray.
This operation copies data only when necessary. The conversion is zero copy when
all of the following hold:
- The DataFrame is fully contiguous in memory, with all Series back-to-back and
all Series consisting of a single chunk.
- The data type is an integer or float.
- The DataFrame contains no null values.
- The `order` parameter is set to `fortran` (default).
- The `writable` parameter is set to `False` (default).
Parameters
----------
structured
Expand All @@ -1519,13 +1529,13 @@ def to_numpy(
However, the C-like order might be more appropriate to use for downstream
applications to prevent cloning data, e.g. when reshaping into a
one-dimensional array.
allow_copy
Allow memory to be copied to perform the conversion. If set to `False`,
causes conversions that are not zero-copy to fail.
writable
Ensure the resulting array is writable. This will force a copy of the data
if the array was created without copy, as the underlying Arrow data is
immutable.
allow_copy
Allow memory to be copied to perform the conversion. If set to `False`,
causes conversions that are not zero-copy to fail.
use_pyarrow
Use `pyarrow.Array.to_numpy
Expand All @@ -1538,6 +1548,48 @@ def to_numpy(
Examples
--------
Numeric data without nulls can be converted without copying data in some cases.
The resulting array will not be writable.
>>> df = pl.DataFrame({"a": [1, 2, 3]})
>>> arr = df.to_numpy()
>>> arr
array([[1],
[2],
[3]])
>>> arr.flags.writeable
False
Set `writable=True` to force data copy to make the array writable.
>>> df.to_numpy(writable=True).flags.writeable
True
If the DataFrame contains different numeric data types, the resulting data type
will be the supertype. This requires data to be copied. Integer types with
nulls are cast to a float type with `nan` representing a null value.
>>> df = pl.DataFrame({"a": [1, 2, None], "b": [4.0, 5.0, 6.0]})
>>> df.to_numpy()
array([[ 1., 4.],
[ 2., 5.],
[nan, 6.]])
Set `allow_copy=False` to raise an error if data would be copied.
>>> s.to_numpy(allow_copy=False) # doctest: +SKIP
Traceback (most recent call last):
...
RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data
Polars defaults to F-contiguous order. Use `order="c"` to force the resulting
array to be C-contiguous.
>>> df.to_numpy(order="c").flags.c_contiguous
True
DataFrames with mixed types will result in an array with an object dtype.
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3],
Expand All @@ -1546,28 +1598,18 @@ def to_numpy(
... },
... schema_overrides={"foo": pl.UInt8, "bar": pl.Float32},
... )
Export to a standard 2D numpy array.
>>> df.to_numpy()
array([[1, 6.5, 'a'],
[2, 7.0, 'b'],
[3, 8.5, 'c']], dtype=object)
Export to a structured array, which can better-preserve individual
column data, such as name and dtype...
Set `structured=True` to convert to a structured array, which can better
preserve individual column data such as name and data type.
>>> df.to_numpy(structured=True)
array([(1, 6.5, 'a'), (2, 7. , 'b'), (3, 8.5, 'c')],
dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
...optionally going on to view as a record array:
>>> import numpy as np
>>> df.to_numpy(structured=True).view(np.recarray)
rec.array([(1, 6.5, 'a'), (2, 7. , 'b'), (3, 8.5, 'c')],
dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
"""
""" # noqa: W505
if use_pyarrow is not None:
issue_deprecation_warning(
"The `use_pyarrow` parameter for `DataFrame.to_numpy` is deprecated."
Expand Down
54 changes: 42 additions & 12 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4427,13 +4427,13 @@ def to_numpy(
"""
Convert this Series to a NumPy ndarray.
This operation may copy data, but is completely safe. Note that:
This operation copies data only when necessary. The conversion is zero copy when
all of the following hold:
- Data which is purely numeric AND without null values is not cloned
- Floating point `nan` values can be zero-copied
- Booleans cannot be zero-copied
To ensure that no data is copied, set `allow_copy=False`.
- The data type is an integer, float, `Datetime`, `Duration`, or `Array`.
- The Series contains no null values.
- The Series consists of a single chunk.
- The `writable` parameter is set to `False` (default).
Parameters
----------
Expand Down Expand Up @@ -4466,13 +4466,43 @@ def to_numpy(
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
Numeric data without nulls can be converted without copying data.
The resulting array will not be writable.
>>> s = pl.Series([1, 2, 3], dtype=pl.Int8)
>>> arr = s.to_numpy()
>>> arr # doctest: +IGNORE_RESULT
array([1, 2, 3], dtype=int64)
>>> type(arr)
<class 'numpy.ndarray'>
"""
>>> arr
array([1, 2, 3], dtype=int8)
>>> arr.flags.writeable
False
Set `writable=True` to force data copy to make the array writable.
>>> s.to_numpy(writable=True).flags.writeable
True
Integer Series containing nulls will be cast to a float type with `nan`
representing a null value. This requires data to be copied.
>>> s = pl.Series([1, 2, None], dtype=pl.UInt16)
>>> s.to_numpy()
array([ 1., 2., nan], dtype=float32)
Set `allow_copy=False` to raise an error if data would be copied.
>>> s.to_numpy(allow_copy=False) # doctest: +SKIP
Traceback (most recent call last):
...
RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data
Series of data type `Array` and `Struct` will result in an array with more than
one dimension.
>>> s = pl.Series([[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3))
>>> s.to_numpy()
array([[1, 2, 3],
[4, 5, 6]])
""" # noqa: W505
if zero_copy_only is not None:
issue_deprecation_warning(
"The `zero_copy_only` parameter for `Series.to_numpy` is deprecated."
Expand Down

0 comments on commit b5a8a50

Please sign in to comment.