diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c0ef8427cf15f..8c68710e5392e 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2091,7 +2091,7 @@ def to_numpy( a = s.to_numpy() arrays.append( a.astype(str, copy=False) - if tp == Utf8 and not s.has_validity() + if tp == Utf8 and not s.null_count() else a ) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index ed14a8b673f5d..3279eb50c2bc2 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1098,7 +1098,7 @@ def __array__(self, dtype: Any = None) -> np.ndarray[Any, Any]: Ensures that `np.asarray(pl.Series(..))` works as expected, see https://numpy.org/devdocs/user/basics.interoperability.html#the-array-method. """ - if not dtype and self.dtype == Utf8 and not self.has_validity(): + if not dtype and self.dtype == Utf8 and not self.null_count(): dtype = np.dtype("U") if dtype: return self.to_numpy().__array__(dtype) @@ -3289,8 +3289,14 @@ def has_validity(self) -> bool: """ Return True if the Series has a validity bitmask. - If there is none, it means that there are no null values. - Use this to swiftly assert a Series does not have null values. + If there is no mask, it means that there are no ``null`` values. + + Notes + ----- + While the _absence_ of a validity bitmask guarantees that a Series does not + have ``null`` values, the converse is not true, eg: the _presence_ of a + bitmask does not mean that there _are_ null values, as every value of the + bitmask could be ``false``. """ return self._s.has_validity() @@ -4051,7 +4057,7 @@ def view(self, *, ignore_nulls: bool = False) -> SeriesView: """ if not ignore_nulls: - assert not self.has_validity() + assert not self.null_count() from polars.series._numpy import SeriesView, _ptr_to_numpy @@ -4149,7 +4155,7 @@ def raise_no_zero_copy() -> None: # note: there is no native numpy "time" dtype return np.array(self.to_list(), dtype="object") else: - if not self.has_validity(): + if not self.null_count(): if self.is_temporal(): np_array = convert_to_date(self.view(ignore_nulls=True)) elif self.is_numeric(): diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 605c36fb57915..086498239ba38 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1318,6 +1318,17 @@ def test_to_numpy(order: IndexOrder, f_contiguous: bool, c_contiguous: bool) -> assert_array_equal(structured_array, expected_array) assert structured_array.flags["F_CONTIGUOUS"] + # check string conversion; if no nulls can optimise as a fixed-width dtype + df = pl.DataFrame({"s": ["x", "y", None]}) + assert_array_equal( + df.to_numpy(structured=True), + np.array([("x",), ("y",), (None,)], dtype=[("s", "O")]), + ) + assert_array_equal( + df[:2].to_numpy(structured=True), + np.array([("x",), ("y",)], dtype=[("s", " None: # round-trip structured array: validate init/export @@ -1336,7 +1347,6 @@ def test_to_numpy_structured() -> None: ] ), ) - df = pl.from_numpy(structured_array) assert df.schema == { "product": pl.Utf8, diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 7f20a3e9da76f..99cfdb6f13913 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -713,9 +713,15 @@ def test_arrow() -> None: def test_view() -> None: - a = pl.Series("a", [1.0, 2.0, 3.0]) + a = pl.Series("a", [1.0, 2.5, 3.0]) assert isinstance(a.view(), np.ndarray) - assert np.all(a.view() == np.array([1, 2, 3])) + assert np.all(a.view() == np.array([1.0, 2.5, 3.0])) + + b = pl.Series("b", [1, 2, None]) + with pytest.raises(AssertionError): + b.view() + + assert np.all(b[:2].view() == np.array([1, 2])) def test_ufunc() -> None: