Skip to content

Commit

Permalink
fix(python): clarify has_validity docstring and fix several cases w…
Browse files Browse the repository at this point in the history
…here we were incorrectly using the presence of a bitmask to infer the existence of `null` values
  • Loading branch information
alexander-beedie committed Sep 26, 2023
1 parent 27e32dc commit 0dbdcf3
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2091,7 +2091,7 @@ def to_numpy(
a = s.to_numpy()
arrays.append(
a.astype(str, copy=False)
if tp == Utf8 and not s.has_validity()
if tp == Utf8 and not s.null_count()
else a
)

Expand Down
16 changes: 11 additions & 5 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,7 @@ def __array__(self, dtype: Any = None) -> np.ndarray[Any, Any]:
Ensures that `np.asarray(pl.Series(..))` works as expected, see
https://numpy.org/devdocs/user/basics.interoperability.html#the-array-method.
"""
if not dtype and self.dtype == Utf8 and not self.has_validity():
if not dtype and self.dtype == Utf8 and not self.null_count():
dtype = np.dtype("U")
if dtype:
return self.to_numpy().__array__(dtype)
Expand Down Expand Up @@ -3289,8 +3289,14 @@ def has_validity(self) -> bool:
"""
Return True if the Series has a validity bitmask.
If there is none, it means that there are no null values.
Use this to swiftly assert a Series does not have null values.
If there is no mask, it means that there are no ``null`` values.
Notes
-----
While the _absence_ of a validity bitmask guarantees that a Series does not
have ``null`` values, the converse is not true, eg: the _presence_ of a
bitmask does not mean that there _are_ null values, as every value of the
bitmask could be ``false``.
"""
return self._s.has_validity()
Expand Down Expand Up @@ -4051,7 +4057,7 @@ def view(self, *, ignore_nulls: bool = False) -> SeriesView:
"""
if not ignore_nulls:
assert not self.has_validity()
assert not self.null_count()

from polars.series._numpy import SeriesView, _ptr_to_numpy

Expand Down Expand Up @@ -4149,7 +4155,7 @@ def raise_no_zero_copy() -> None:
# note: there is no native numpy "time" dtype
return np.array(self.to_list(), dtype="object")
else:
if not self.has_validity():
if not self.null_count():
if self.is_temporal():
np_array = convert_to_date(self.view(ignore_nulls=True))
elif self.is_numeric():
Expand Down
12 changes: 11 additions & 1 deletion py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,17 @@ def test_to_numpy(order: IndexOrder, f_contiguous: bool, c_contiguous: bool) ->
assert_array_equal(structured_array, expected_array)
assert structured_array.flags["F_CONTIGUOUS"]

# check string conversion; if no nulls can optimise as a fixed-width dtype
df = pl.DataFrame({"s": ["x", "y", None]})
assert_array_equal(
df.to_numpy(structured=True),
np.array([("x",), ("y",), (None,)], dtype=[("s", "O")]),
)
assert_array_equal(
df[:2].to_numpy(structured=True),
np.array([("x",), ("y",)], dtype=[("s", "<U1")]),
)


def test_to_numpy_structured() -> None:
# round-trip structured array: validate init/export
Expand All @@ -1336,7 +1347,6 @@ def test_to_numpy_structured() -> None:
]
),
)

df = pl.from_numpy(structured_array)
assert df.schema == {
"product": pl.Utf8,
Expand Down
10 changes: 8 additions & 2 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,9 +713,15 @@ def test_arrow() -> None:


def test_view() -> None:
a = pl.Series("a", [1.0, 2.0, 3.0])
a = pl.Series("a", [1.0, 2.5, 3.0])
assert isinstance(a.view(), np.ndarray)
assert np.all(a.view() == np.array([1, 2, 3]))
assert np.all(a.view() == np.array([1.0, 2.5, 3.0]))

b = pl.Series("b", [1, 2, None])
with pytest.raises(AssertionError):
b.view()

assert np.all(b[:2].view() == np.array([1, 2]))


def test_ufunc() -> None:
Expand Down

0 comments on commit 0dbdcf3

Please sign in to comment.