From 2296900c9ed14817a31d3eae5aac69e2a3a01e92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rodrigo=20Gir=C3=A3o=20Serr=C3=A3o?=
 <5621605+rodrigogiraoserrao@users.noreply.github.com>
Date: Wed, 13 Nov 2024 07:11:02 +0000
Subject: [PATCH 01/18] docs: Fix join API reference links (#19745)

---
 docs/source/user-guide/transformations/joins.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/user-guide/transformations/joins.md b/docs/source/user-guide/transformations/joins.md
index b135a45f53d3..5b55386b70f0 100644
--- a/docs/source/user-guide/transformations/joins.md
+++ b/docs/source/user-guide/transformations/joins.md
@@ -15,14 +15,14 @@ If you want to learn about joins in general and how to work with them in Polars,
 === ":fontawesome-brands-python: Python"
 
     [:material-api: `join`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join.html)
-    [:material-api: `join_where`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html)
-    [:material-api: `join_asof`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_where.html)
+    [:material-api: `join_where`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_where.html)
+    [:material-api: `join_asof`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html)
 
 === ":fontawesome-brands-rust: Rust"
 
     [:material-api: `join`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.DataFrameJoinOps.html#method.join)
     ([:material-flag-plus: semi_anti_join](/user-guide/installation/#feature-flags "Enable the feature flag semi_anti_join for semi and for anti joins"){.feature-flag} needed for some options.)
-    [:material-api: `join_asof_by`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoin.html#method.join_asof)
+    [:material-api: `join_asof_by`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoinBy.html#method.join_asof_by)
     [:material-flag-plus: Available on feature asof_join](/user-guide/installation/#feature-flags "To use this functionality enable the feature flag asof_join"){.feature-flag}
     [:material-api: `join_where`](https://docs.rs/polars/latest/polars/prelude/struct.JoinBuilder.html#method.join_where)
     [:material-flag-plus: Available on feature iejoin](/user-guide/installation/#feature-flags "To use this functionality enable the feature flag iejoin"){.feature-flag}

From 861481c6b46d320e3ddc889a2d3b59e05a9964d6 Mon Sep 17 00:00:00 2001
From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Date: Wed, 13 Nov 2024 08:11:15 +0100
Subject: [PATCH 02/18] docs: Add `meta.is_column` to API docs (#19744)

---
 py-polars/docs/source/reference/expressions/meta.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/py-polars/docs/source/reference/expressions/meta.rst b/py-polars/docs/source/reference/expressions/meta.rst
index e70283c4c9b4..514067e0166f 100644
--- a/py-polars/docs/source/reference/expressions/meta.rst
+++ b/py-polars/docs/source/reference/expressions/meta.rst
@@ -11,6 +11,7 @@ The following methods are available under the `expr.meta` attribute.
 
     Expr.meta.eq
     Expr.meta.has_multiple_outputs
+    Expr.meta.is_column
     Expr.meta.is_column_selection
     Expr.meta.is_regex_projection
     Expr.meta.ne

From 6808bd8e83ff985da17f0b21382c7b9707578d62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rodrigo=20Gir=C3=A3o=20Serr=C3=A3o?=
 <5621605+rodrigogiraoserrao@users.noreply.github.com>
Date: Wed, 13 Nov 2024 07:11:42 +0000
Subject: [PATCH 03/18] docs: Fix formatting of nested list (#19746)

---
 docs/source/user-guide/expressions/index.md | 24 +++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/source/user-guide/expressions/index.md b/docs/source/user-guide/expressions/index.md
index 7e4b6f0a8b1a..b4442d6f4289 100644
--- a/docs/source/user-guide/expressions/index.md
+++ b/docs/source/user-guide/expressions/index.md
@@ -4,19 +4,21 @@ We [introduced the concept of “expressions” in a previous section](../concep
 In this section we will focus on exploring the types of expressions that Polars offers.
 Each section gives an overview of what they do and provides additional examples.
 
+<!-- dprint-ignore-start -->
 - Essentials:
-  - [Basic operations](basic-operations.md) – how to do basic operations on dataframe columns, like arithmetic calculations, comparisons, and other common, general-purpose operations
-  - [Expression expansion](expression-expansion.md) – what is expression expansion and how to use it
-  - [Casting](casting.md) – how to convert / cast values to different data types
+    - [Basic operations](basic-operations.md) – how to do basic operations on dataframe columns, like arithmetic calculations, comparisons, and other common, general-purpose operations
+    - [Expression expansion](expression-expansion.md) – what is expression expansion and how to use it
+    - [Casting](casting.md) – how to convert / cast values to different data types
 - How to work with specific types of data or data type namespaces:
-  - [Strings](strings.md) – how to work with strings and the namespace `str`
-  - [Lists and arrays](lists-and-arrays.md) – the differences between the data types `List` and `Array`, when to use them, and how to use them
-  - [Categorical data and enums](categorical-data-and-enums.md) – the differences between the data types `Categorical` and `Enum`, when to use them, and how to use them
-  - [Structs](structs.md) – when to use the data type `Struct` and how to use it
-  - [Missing data](missing-data.md) – how to work with missing data and how to fill missing data
+    - [Strings](strings.md) – how to work with strings and the namespace `str`
+    - [Lists and arrays](lists-and-arrays.md) – the differences between the data types `List` and `Array`, when to use them, and how to use them
+    - [Categorical data and enums](categorical-data-and-enums.md) – the differences between the data types `Categorical` and `Enum`, when to use them, and how to use them
+    - [Structs](structs.md) – when to use the data type `Struct` and how to use it
+    - [Missing data](missing-data.md) – how to work with missing data and how to fill missing data
 - Types of operations:
-  - [Aggregation](aggregation.md) – how to work with aggregating contexts like `group_by`
-  - [Window functions](window-functions.md) – how to apply window functions over columns in a dataframe
-  - [Folds](folds.md) – how to perform arbitrary computations horizontally across columns
+    - [Aggregation](aggregation.md) – how to work with aggregating contexts like `group_by`
+    - [Window functions](window-functions.md) – how to apply window functions over columns in a dataframe
+    - [Folds](folds.md) – how to perform arbitrary computations horizontally across columns
 - [User-defined Python functions](user-defined-python-functions.md) – how to apply user-defined Python functions to dataframe columns or to column values
 - [Numpy functions](numpy-functions.md) – how to use NumPy native functions on Polars dataframes and series
+<!-- dprint-ignore-end -->

From 37ae8e7c67514b4dfd895724aa3a387ceb321851 Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Wed, 13 Nov 2024 11:12:35 +0400
Subject: [PATCH 04/18] fix(python): Address incorrect `selector & col`
 expansion (#19742)

---
 py-polars/polars/selectors.py          | 30 ++++++++++----------------
 py-polars/tests/unit/test_selectors.py | 10 +++++++--
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py
index 4cb11506b3f6..9d3cedb47e85 100644
--- a/py-polars/polars/selectors.py
+++ b/py-polars/polars/selectors.py
@@ -385,10 +385,6 @@ def __and__(self, other: Any) -> Expr: ...
     def __and__(self, other: Any) -> SelectorType | Expr:
         if is_column(other):
             colname = other.meta.output_name()
-            if self._attrs["name"] == "by_name" and (
-                params := self._attrs["params"]
-            ).get("require_all", True):
-                return by_name(*params["*names"], colname)
             other = by_name(colname)
         if is_selector(other):
             return _selector_proxy_(
@@ -399,6 +395,12 @@ def __and__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__and__(other)
 
+    def __rand__(self, other: Any) -> Expr:
+        if is_column(other):
+            colname = other.meta.output_name()
+            return by_name(colname) & self
+        return self.as_expr().__rand__(other)
+
     @overload
     def __or__(self, other: SelectorType) -> SelectorType: ...
 
@@ -417,6 +419,11 @@ def __or__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__or__(other)
 
+    def __ror__(self, other: Any) -> Expr:
+        if is_column(other):
+            other = by_name(other.meta.output_name())
+        return self.as_expr().__ror__(other)
+
     @overload
     def __xor__(self, other: SelectorType) -> SelectorType: ...
 
@@ -435,21 +442,6 @@ def __xor__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__or__(other)
 
-    def __rand__(self, other: Any) -> Expr:
-        if is_column(other):
-            colname = other.meta.output_name()
-            if self._attrs["name"] == "by_name" and (
-                params := self._attrs["params"]
-            ).get("require_all", True):
-                return by_name(colname, *params["*names"])
-            other = by_name(colname)
-        return self.as_expr().__rand__(other)
-
-    def __ror__(self, other: Any) -> Expr:
-        if is_column(other):
-            other = by_name(other.meta.output_name())
-        return self.as_expr().__ror__(other)
-
     def __rxor__(self, other: Any) -> Expr:
         if is_column(other):
             other = by_name(other.meta.output_name())
diff --git a/py-polars/tests/unit/test_selectors.py b/py-polars/tests/unit/test_selectors.py
index dd2c415c9a13..f4e29e9194c6 100644
--- a/py-polars/tests/unit/test_selectors.py
+++ b/py-polars/tests/unit/test_selectors.py
@@ -182,11 +182,17 @@ def test_selector_by_name(df: pl.DataFrame) -> None:
 
     # check "by_name & col"
     for selector_expr, expected in (
-        (cs.by_name("abc", "cde") & pl.col("ghi"), ["abc", "cde", "ghi"]),
-        (pl.col("ghi") & cs.by_name("cde", "abc"), ["ghi", "cde", "abc"]),
+        (cs.by_name("abc", "cde") & pl.col("ghi"), []),
+        (cs.by_name("abc", "cde") & pl.col("cde"), ["cde"]),
+        (pl.col("cde") & cs.by_name("cde", "abc"), ["cde"]),
     ):
         assert df.select(selector_expr).columns == expected
 
+    # check "by_name & by_name"
+    assert df.select(
+        cs.by_name("abc", "cde", "def", "eee") & cs.by_name("cde", "eee", "fgg")
+    ).columns == ["cde", "eee"]
+
     # expected errors
     with pytest.raises(ColumnNotFoundError, match="xxx"):
         df.select(cs.by_name("xxx", "fgg", "!!!"))

From 87367e978ec62163d345aaca0b8c0b6bbbdae380 Mon Sep 17 00:00:00 2001
From: nameexhaustion <simonlin.rqmmw@slmail.me>
Date: Wed, 13 Nov 2024 18:48:24 +1100
Subject: [PATCH 05/18] fix: Fix incorrect lazy schema for aggregations
 (#19753)

---
 .github/workflows/test-coverage.yml          |   8 +-
 crates/polars-plan/src/plans/aexpr/schema.rs | 223 +++++++++++--------
 py-polars/tests/unit/test_schema.py          |  32 +++
 3 files changed, 169 insertions(+), 94 deletions(-)

diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
index 232f79fb8947..531add1428e6 100644
--- a/.github/workflows/test-coverage.yml
+++ b/.github/workflows/test-coverage.yml
@@ -96,9 +96,13 @@ jobs:
         with:
           python-version: '3.12'
 
-      - name: Create virtual environment
+      - name: Install uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+
+      - name: Create virtual environment
+        run: |
           uv venv
           echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
           echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> $GITHUB_ENV
@@ -165,7 +169,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-        # Needed to fetch the Codecov config file
+      # Needed to fetch the Codecov config file
       - uses: actions/checkout@v4
 
       - name: Download coverage reports
diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs
index 7105855636c5..6c1b675b2bd8 100644
--- a/crates/polars-plan/src/plans/aexpr/schema.rs
+++ b/crates/polars-plan/src/plans/aexpr/schema.rs
@@ -32,50 +32,57 @@ impl AExpr {
         ctx: Context,
         arena: &Arena<AExpr>,
     ) -> PolarsResult<Field> {
-        // During aggregation a column that isn't aggregated gets an extra nesting level
-        //      col(foo: i64) -> list[i64]
-        // But not if we do an aggregation:
-        //      col(foo: i64).sum() -> i64
-        // The `nested` keeps track of the nesting we need to add.
-        let mut nested = matches!(ctx, Context::Aggregation) as u8;
-        let mut field = self.to_field_impl(schema, ctx, arena, &mut nested)?;
+        // Indicates whether we should auto-implode the result. This is initialized to true if we are
+        // in an aggregation context, so functions that return scalars should explicitly set this
+        // to false in `to_field_impl`.
+        let mut agg_list = matches!(ctx, Context::Aggregation);
+        let mut field = self.to_field_impl(schema, ctx, arena, &mut agg_list)?;
 
-        if nested >= 1 {
+        if agg_list {
             field.coerce(field.dtype().clone().implode());
         }
+
         Ok(field)
     }
 
     /// Get Field result of the expression. The schema is the input data.
+    ///
+    /// This is taken as `&mut bool` as for some expressions this is determined by the upper node
+    /// (e.g. `alias`, `cast`).
     #[recursive]
     pub fn to_field_impl(
         &self,
         schema: &Schema,
         ctx: Context,
         arena: &Arena<AExpr>,
-        nested: &mut u8,
+        agg_list: &mut bool,
     ) -> PolarsResult<Field> {
         use AExpr::*;
         use DataType::*;
         match self {
             Len => {
-                *nested = 0;
+                *agg_list = false;
                 Ok(Field::new(PlSmallStr::from_static(LEN), IDX_DTYPE))
             },
             Window {
                 function, options, ..
             } => {
-                if let WindowType::Over(mapping) = options {
-                    *nested += matches!(mapping, WindowMapping::Join) as u8;
+                if let WindowType::Over(WindowMapping::Join) = options {
+                    // expr.over(..), defaults to agg-list unless explicitly unset
+                    // by the `to_field_impl` of the `expr`
+                    *agg_list = true;
                 }
+
                 let e = arena.get(*function);
-                e.to_field_impl(schema, ctx, arena, nested)
+                e.to_field_impl(schema, ctx, arena, agg_list)
             },
             Explode(expr) => {
                 // `Explode` is a "flatten" operation, which is not the same as returning a scalar.
                 // Namely, it should be auto-imploded in the aggregation context, so we don't update
-                // the `nested` state here.
-                let field = arena.get(*expr).to_field_impl(schema, ctx, arena, &mut 0)?;
+                // the `agg_list` state here.
+                let field = arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, &mut false)?;
 
                 if let List(inner) = field.dtype() {
                     Ok(Field::new(field.name().clone(), *inner.clone()))
@@ -87,14 +94,14 @@ impl AExpr {
                 name.clone(),
                 arena
                     .get(*expr)
-                    .to_field_impl(schema, ctx, arena, nested)?
+                    .to_field_impl(schema, ctx, arena, agg_list)?
                     .dtype,
             )),
             Column(name) => schema
                 .get_field(name)
                 .ok_or_else(|| PolarsError::ColumnNotFound(name.to_string().into())),
             Literal(sv) => {
-                *nested = 0;
+                *agg_list = false;
                 Ok(match sv {
                     LiteralValue::Series(s) => s.field().into_owned(),
                     _ => Field::new(sv.output_name().clone(), sv.get_datatype()),
@@ -116,35 +123,42 @@ impl AExpr {
                     | Operator::LogicalOr => {
                         let out_field;
                         let out_name = {
-                            out_field =
-                                arena.get(*left).to_field_impl(schema, ctx, arena, nested)?;
+                            out_field = arena
+                                .get(*left)
+                                .to_field_impl(schema, ctx, arena, agg_list)?;
                             out_field.name()
                         };
                         Field::new(out_name.clone(), Boolean)
                     },
                     Operator::TrueDivide => {
-                        return get_truediv_field(*left, *right, arena, ctx, schema, nested)
+                        return get_truediv_field(*left, *right, arena, ctx, schema, agg_list)
                     },
                     _ => {
-                        return get_arithmetic_field(*left, *right, arena, *op, ctx, schema, nested)
+                        return get_arithmetic_field(
+                            *left, *right, arena, *op, ctx, schema, agg_list,
+                        )
                     },
                 };
 
                 Ok(field)
             },
-            Sort { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, nested),
+            Sort { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, agg_list),
             Gather {
                 expr,
                 returns_scalar,
                 ..
             } => {
                 if *returns_scalar {
-                    *nested = nested.saturating_sub(1);
+                    *agg_list = false;
                 }
-                arena.get(*expr).to_field_impl(schema, ctx, arena, nested)
+                arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, &mut false)
             },
-            SortBy { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, nested),
-            Filter { input, .. } => arena.get(*input).to_field_impl(schema, ctx, arena, nested),
+            SortBy { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, agg_list),
+            Filter { input, .. } => arena
+                .get(*input)
+                .to_field_impl(schema, ctx, arena, agg_list),
             Agg(agg) => {
                 use IRAggExpr::*;
                 match agg {
@@ -152,13 +166,16 @@ impl AExpr {
                     | Min { input: expr, .. }
                     | First(expr)
                     | Last(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        arena.get(*expr).to_field_impl(schema, ctx, arena, nested)
+                        *agg_list = false;
+                        arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)
                     },
                     Sum(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         let dt = match field.dtype() {
                             Boolean => Some(IDX_DTYPE),
                             UInt8 | Int8 | Int16 | UInt16 => Some(Int64),
@@ -170,9 +187,10 @@ impl AExpr {
                         Ok(field)
                     },
                     Median(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         match field.dtype {
                             Date => field.coerce(Datetime(TimeUnit::Milliseconds, None)),
                             _ => float_type(&mut field),
@@ -180,9 +198,10 @@ impl AExpr {
                         Ok(field)
                     },
                     Mean(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         match field.dtype {
                             Date => field.coerce(Datetime(TimeUnit::Milliseconds, None)),
                             _ => float_type(&mut field),
@@ -190,69 +209,80 @@ impl AExpr {
                         Ok(field)
                     },
                     Implode(expr) => {
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(DataType::List(field.dtype().clone().into()));
                         Ok(field)
                     },
                     Std(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     Var(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     NUnique(expr) => {
-                        *nested = 0;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(IDX_DTYPE);
                         Ok(field)
                     },
                     Count(expr, _) => {
-                        *nested = 0;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(IDX_DTYPE);
                         Ok(field)
                     },
                     AggGroups(expr) => {
-                        *nested = 1;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = true;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(List(IDX_DTYPE.into()));
                         Ok(field)
                     },
                     Quantile { expr, .. } => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     #[cfg(feature = "bitwise")]
                     Bitwise(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let field = arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         // @Q? Do we need to coerce here?
                         Ok(field)
                     },
                 }
             },
             Cast { expr, dtype, .. } => {
-                let field = arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                let field = arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, agg_list)?;
                 Ok(Field::new(field.name().clone(), dtype.clone()))
             },
             Ternary { truthy, falsy, .. } => {
-                let mut nested_truthy = *nested;
-                let mut nested_falsy = *nested;
+                let mut agg_list_truthy = *agg_list;
+                let mut agg_list_falsy = *agg_list;
 
                 // During aggregation:
                 // left: col(foo):              list<T>         nesting: 1
@@ -261,11 +291,11 @@ impl AExpr {
                 let mut truthy =
                     arena
                         .get(*truthy)
-                        .to_field_impl(schema, ctx, arena, &mut nested_truthy)?;
+                        .to_field_impl(schema, ctx, arena, &mut agg_list_truthy)?;
                 let falsy =
                     arena
                         .get(*falsy)
-                        .to_field_impl(schema, ctx, arena, &mut nested_falsy)?;
+                        .to_field_impl(schema, ctx, arena, &mut agg_list_falsy)?;
 
                 let st = if let DataType::Null = *truthy.dtype() {
                     falsy.dtype().clone()
@@ -273,7 +303,7 @@ impl AExpr {
                     try_get_supertype(truthy.dtype(), falsy.dtype())?
                 };
 
-                *nested = std::cmp::max(nested_truthy, nested_falsy);
+                *agg_list = agg_list_truthy | agg_list_falsy;
 
                 truthy.coerce(st);
                 Ok(truthy)
@@ -284,14 +314,14 @@ impl AExpr {
                 options,
                 ..
             } => {
-                let fields = func_args_to_fields(input, ctx, schema, arena, nested)?;
+                let fields = func_args_to_fields(input, ctx, schema, arena, agg_list)?;
                 polars_ensure!(!fields.is_empty(), ComputeError: "expression: '{}' didn't get any inputs", options.fmt_str);
                 let out = output_type.get_field(schema, ctx, &fields)?;
 
                 if options.flags.contains(FunctionFlags::RETURNS_SCALAR) {
-                    *nested = 0;
+                    *agg_list = false;
                 } else if matches!(ctx, Context::Aggregation) {
-                    *nested += 1;
+                    *agg_list = true;
                 }
 
                 Ok(out)
@@ -301,19 +331,21 @@ impl AExpr {
                 input,
                 options,
             } => {
-                let fields = func_args_to_fields(input, ctx, schema, arena, nested)?;
+                let fields = func_args_to_fields(input, ctx, schema, arena, agg_list)?;
                 polars_ensure!(!fields.is_empty(), ComputeError: "expression: '{}' didn't get any inputs", function);
                 let out = function.get_field(schema, ctx, &fields)?;
 
                 if options.flags.contains(FunctionFlags::RETURNS_SCALAR) {
-                    *nested = 0;
+                    *agg_list = false;
                 } else if matches!(ctx, Context::Aggregation) {
-                    *nested += 1;
+                    *agg_list = true;
                 }
 
                 Ok(out)
             },
-            Slice { input, .. } => arena.get(*input).to_field_impl(schema, ctx, arena, nested),
+            Slice { input, .. } => arena
+                .get(*input)
+                .to_field_impl(schema, ctx, arena, agg_list),
         }
     }
 }
@@ -323,25 +355,28 @@ fn func_args_to_fields(
     ctx: Context,
     schema: &Schema,
     arena: &Arena<AExpr>,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Vec<Field>> {
-    let mut first = true;
     input
         .iter()
+        .enumerate()
         // Default context because `col()` would return a list in aggregation context
-        .map(|e| {
-            // Only mutate first nested as that is the dtype of the function.
-            let mut nested_tmp = *nested;
-            let nested = if first {
-                first = false;
-                &mut *nested
-            } else {
-                &mut nested_tmp
-            };
+        .map(|(i, e)| {
+            let tmp = &mut false;
 
             arena
                 .get(e.node())
-                .to_field_impl(schema, ctx, arena, nested)
+                .to_field_impl(
+                    schema,
+                    ctx,
+                    arena,
+                    if i == 0 {
+                        // Only mutate first agg_list as that is the dtype of the function.
+                        agg_list
+                    } else {
+                        tmp
+                    },
+                )
                 .map(|mut field| {
                     field.name = e.output_name().clone();
                     field
@@ -357,7 +392,7 @@ fn get_arithmetic_field(
     op: Operator,
     ctx: Context,
     schema: &Schema,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Field> {
     use DataType::*;
     let left_ae = arena.get(left);
@@ -371,11 +406,11 @@ fn get_arithmetic_field(
     // leading to quadratic behavior. # 4736
     //
     // further right_type is only determined when needed.
-    let mut left_field = left_ae.to_field_impl(schema, ctx, arena, nested)?;
+    let mut left_field = left_ae.to_field_impl(schema, ctx, arena, agg_list)?;
 
     let super_type = match op {
         Operator::Minus => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
             match (&left_field.dtype, &right_type) {
                 #[cfg(feature = "dtype-struct")]
                 (Struct(_), Struct(_)) => {
@@ -430,7 +465,7 @@ fn get_arithmetic_field(
             }
         },
         Operator::Plus => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
             match (&left_field.dtype, &right_type) {
                 (Duration(_), Datetime(_, _))
                 | (Datetime(_, _), Duration(_))
@@ -472,7 +507,7 @@ fn get_arithmetic_field(
             }
         },
         _ => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
 
             match (&left_field.dtype, &right_type) {
                 #[cfg(feature = "dtype-struct")]
@@ -558,10 +593,14 @@ fn get_truediv_field(
     arena: &Arena<AExpr>,
     ctx: Context,
     schema: &Schema,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Field> {
-    let mut left_field = arena.get(left).to_field_impl(schema, ctx, arena, nested)?;
-    let right_field = arena.get(right).to_field_impl(schema, ctx, arena, nested)?;
+    let mut left_field = arena
+        .get(left)
+        .to_field_impl(schema, ctx, arena, agg_list)?;
+    let right_field = arena
+        .get(right)
+        .to_field_impl(schema, ctx, arena, agg_list)?;
     use DataType::*;
 
     // TODO: Re-investigate this. A lot of "_" is being used on the RHS match because this code
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index 78a277a3662f..a8f9e43d84c0 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -246,3 +246,35 @@ def test_lf_agg_lit_explode() -> None:
     schema = {"k": pl.Int64, "o": pl.List(pl.Int64)}
     assert q.collect_schema() == schema
     assert_frame_equal(q.collect(), pl.DataFrame({"k": 1, "o": [[1]]}, schema=schema))  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize("expr_op", [
+    "approx_n_unique", "arg_max", "arg_min", "bitwise_and", "bitwise_or",
+    "bitwise_xor", "count", "entropy", "first", "has_nulls", "implode", "kurtosis",
+    "last", "len", "lower_bound", "max", "mean", "median", "min", "n_unique", "nan_max",
+    "nan_min", "null_count", "product", "sample", "skew", "std", "sum", "upper_bound",
+    "var"
+])  # fmt: skip
+def test_lf_agg_auto_agg_list_19752(expr_op: str) -> None:
+    op = getattr(pl.Expr, expr_op)
+
+    lf = pl.LazyFrame({"a": 1, "b": 1})
+
+    q = lf.group_by("a").agg(pl.col("b").reverse().pipe(op))
+    assert q.collect_schema() == q.collect().collect_schema()
+
+    q = lf.group_by("a").agg(pl.col("b").shuffle().reverse().pipe(op))
+
+    assert q.collect_schema() == q.collect().collect_schema()
+
+
+@pytest.mark.parametrize(
+    "expr", [pl.col("b"), pl.col("b").sum(), pl.col("b").reverse()]
+)
+@pytest.mark.parametrize("mapping_strategy", ["explode", "join", "group_to_rows"])
+def test_lf_window_schema(expr: pl.Expr, mapping_strategy: str) -> None:
+    q = pl.LazyFrame({"a": 1, "b": 1}).select(
+        expr.over("a", mapping_strategy=mapping_strategy)  # type: ignore[arg-type]
+    )
+
+    assert q.collect_schema() == q.collect().collect_schema()

From 18786acd8d1eb68fc87982b07ce29ecbae0923f0 Mon Sep 17 00:00:00 2001
From: Itamar Turner-Trauring <itamar@itamarst.org>
Date: Wed, 13 Nov 2024 03:01:31 -0500
Subject: [PATCH 06/18] fix(python): Release GIL in Python APIs, part 1
 (#19705)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
---
 .../polars-python/src/series/aggregation.rs   | 118 +++++-----
 crates/polars-python/src/series/arithmetic.rs |  33 +--
 crates/polars-python/src/series/buffers.rs    |   9 +-
 crates/polars-python/src/series/comparison.rs |  79 ++++---
 .../polars-python/src/series/construction.rs  |  18 +-
 crates/polars-python/src/series/export.rs     |  14 +-
 crates/polars-python/src/series/general.rs    | 205 +++++++++++-------
 crates/polars-python/src/series/scatter.rs    |   5 +-
 8 files changed, 267 insertions(+), 214 deletions(-)

diff --git a/crates/polars-python/src/series/aggregation.rs b/crates/polars-python/src/series/aggregation.rs
index 5aa8ee16639e..c4fe8d3447ec 100644
--- a/crates/polars-python/src/series/aggregation.rs
+++ b/crates/polars-python/src/series/aggregation.rs
@@ -8,37 +8,39 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn any(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
-        let s = self.series.bool().map_err(PyPolarsErr::from)?;
-        Ok(if ignore_nulls {
-            Some(s.any())
-        } else {
-            s.any_kleene()
+    fn any(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<bool>> {
+        py.allow_threads(|| {
+            let s = self.series.bool().map_err(PyPolarsErr::from)?;
+            Ok(if ignore_nulls {
+                Some(s.any())
+            } else {
+                s.any_kleene()
+            })
         })
     }
 
-    fn all(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
-        let s = self.series.bool().map_err(PyPolarsErr::from)?;
-        Ok(if ignore_nulls {
-            Some(s.all())
-        } else {
-            s.all_kleene()
+    fn all(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<bool>> {
+        py.allow_threads(|| {
+            let s = self.series.bool().map_err(PyPolarsErr::from)?;
+            Ok(if ignore_nulls {
+                Some(s.all())
+            } else {
+                s.all_kleene()
+            })
         })
     }
 
-    fn arg_max(&self) -> Option<usize> {
-        self.series.arg_max()
+    fn arg_max(&self, py: Python) -> Option<usize> {
+        py.allow_threads(|| self.series.arg_max())
     }
 
-    fn arg_min(&self) -> Option<usize> {
-        self.series.arg_min()
+    fn arg_min(&self, py: Python) -> Option<usize> {
+        py.allow_threads(|| self.series.arg_min())
     }
 
     fn max(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .max_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.max_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -47,49 +49,42 @@ impl PySeries {
     fn mean(&self, py: Python) -> PyResult<PyObject> {
         match self.series.dtype() {
             Boolean => Ok(Wrap(
-                self.series
-                    .cast(&DataType::UInt8)
-                    .unwrap()
-                    .mean_reduce()
+                py.allow_threads(|| self.series.cast(&DataType::UInt8).unwrap().mean_reduce())
                     .as_any_value(),
             )
             .into_py(py)),
             // For non-numeric output types we require mean_reduce.
-            dt if dt.is_temporal() => {
-                Ok(Wrap(self.series.mean_reduce().as_any_value()).into_py(py))
-            },
-            _ => Ok(self.series.mean().into_py(py)),
+            dt if dt.is_temporal() => Ok(Wrap(
+                py.allow_threads(|| self.series.mean_reduce())
+                    .as_any_value(),
+            )
+            .into_py(py)),
+            _ => Ok(py.allow_threads(|| self.series.mean()).into_py(py)),
         }
     }
 
     fn median(&self, py: Python) -> PyResult<PyObject> {
         match self.series.dtype() {
             Boolean => Ok(Wrap(
-                self.series
-                    .cast(&DataType::UInt8)
-                    .unwrap()
-                    .median_reduce()
+                py.allow_threads(|| self.series.cast(&DataType::UInt8).unwrap().median_reduce())
                     .map_err(PyPolarsErr::from)?
                     .as_any_value(),
             )
             .into_py(py)),
             // For non-numeric output types we require median_reduce.
             dt if dt.is_temporal() => Ok(Wrap(
-                self.series
-                    .median_reduce()
+                py.allow_threads(|| self.series.median_reduce())
                     .map_err(PyPolarsErr::from)?
                     .as_any_value(),
             )
             .into_py(py)),
-            _ => Ok(self.series.median().into_py(py)),
+            _ => Ok(py.allow_threads(|| self.series.median()).into_py(py)),
         }
     }
 
     fn min(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .min_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.min_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -97,26 +92,27 @@ impl PySeries {
 
     fn product(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .product()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.product().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
     }
 
-    fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> PyResult<PyObject> {
-        let bind = self.series.quantile_reduce(quantile, interpolation.0);
+    fn quantile(
+        &self,
+        py: Python,
+        quantile: f64,
+        interpolation: Wrap<QuantileMethod>,
+    ) -> PyResult<PyObject> {
+        let bind = py.allow_threads(|| self.series.quantile_reduce(quantile, interpolation.0));
         let sc = bind.map_err(PyPolarsErr::from)?;
 
-        Ok(Python::with_gil(|py| Wrap(sc.as_any_value()).into_py(py)))
+        Ok(Wrap(sc.as_any_value()).into_py(py))
     }
 
     fn std(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .std_reduce(ddof)
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.std_reduce(ddof).map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -124,9 +120,7 @@ impl PySeries {
 
     fn var(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .var_reduce(ddof)
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.var_reduce(ddof).map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -134,37 +128,31 @@ impl PySeries {
 
     fn sum(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .sum_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.sum_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
     }
 
     fn first(&self, py: Python) -> PyObject {
-        Wrap(self.series.first().as_any_value()).into_py(py)
+        Wrap(py.allow_threads(|| self.series.first()).as_any_value()).into_py(py)
     }
 
     fn last(&self, py: Python) -> PyObject {
-        Wrap(self.series.last().as_any_value()).into_py(py)
+        Wrap(py.allow_threads(|| self.series.last()).as_any_value()).into_py(py)
     }
 
     #[cfg(feature = "approx_unique")]
     fn approx_n_unique(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self
-            .series
-            .approx_n_unique()
-            .map_err(PyPolarsErr::from)?
+        Ok(py
+            .allow_threads(|| self.series.approx_n_unique().map_err(PyPolarsErr::from))?
             .into_py(py))
     }
 
     #[cfg(feature = "bitwise")]
     fn bitwise_and(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .and_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.and_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -173,9 +161,7 @@ impl PySeries {
     #[cfg(feature = "bitwise")]
     fn bitwise_or(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .or_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.or_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -184,9 +170,7 @@ impl PySeries {
     #[cfg(feature = "bitwise")]
     fn bitwise_xor(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .xor_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.xor_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
diff --git a/crates/polars-python/src/series/arithmetic.rs b/crates/polars-python/src/series/arithmetic.rs
index c5483aced1e7..62edd00a7656 100644
--- a/crates/polars-python/src/series/arithmetic.rs
+++ b/crates/polars-python/src/series/arithmetic.rs
@@ -6,28 +6,33 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn add(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series + &other.series)
+    fn add(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series + &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn sub(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series - &other.series)
+    fn sub(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series - &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn div(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series / &other.series)
+    fn div(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series / &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn mul(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series * &other.series)
+    fn mul(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series * &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn rem(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series % &other.series)
+    fn rem(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series % &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
@@ -37,8 +42,8 @@ macro_rules! impl_arithmetic {
     ($name:ident, $type:ty, $operand:tt) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, other: $type) -> PyResult<Self> {
-                Ok((&self.series $operand other).into())
+            fn $name(&self, py: Python, other: $type) -> PyResult<Self> {
+                Ok(py.allow_threads(|| {&self.series $operand other}).into())
             }
         }
     };
@@ -103,8 +108,8 @@ macro_rules! impl_rhs_arithmetic {
     ($name:ident, $type:ty, $operand:ident) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, other: $type) -> PyResult<Self> {
-                Ok(other.$operand(&self.series).into())
+            fn $name(&self, py: Python, other: $type) -> PyResult<Self> {
+                Ok(py.allow_threads(|| other.$operand(&self.series)).into())
             }
         }
     };
diff --git a/crates/polars-python/src/series/buffers.rs b/crates/polars-python/src/series/buffers.rs
index 939159220277..e3b9402d4d47 100644
--- a/crates/polars-python/src/series/buffers.rs
+++ b/crates/polars-python/src/series/buffers.rs
@@ -82,9 +82,9 @@ impl PySeries {
     }
 
     /// Return the underlying values, validity, and offsets buffers as Series.
-    fn _get_buffers(&self) -> PyResult<(Self, Option<Self>, Option<Self>)> {
+    fn _get_buffers(&self, py: Python) -> PyResult<(Self, Option<Self>, Option<Self>)> {
         let s = &self.series;
-        match s.dtype().to_physical() {
+        py.allow_threads(|| match s.dtype().to_physical() {
             dt if dt.is_numeric() => get_buffers_from_primitive(s),
             DataType::Boolean => get_buffers_from_primitive(s),
             DataType::String => get_buffers_from_string(s),
@@ -92,7 +92,7 @@ impl PySeries {
                 let msg = format!("`_get_buffers` not implemented for `dtype` {dt}");
                 Err(PyTypeError::new_err(msg))
             },
-        }
+        })
     }
 }
 
@@ -253,6 +253,7 @@ impl PySeries {
     #[staticmethod]
     #[pyo3(signature = (dtype, data, validity=None))]
     unsafe fn _from_buffers(
+        py: Python,
         dtype: Wrap<DataType>,
         data: Vec<PySeries>,
         validity: Option<PySeries>,
@@ -320,7 +321,7 @@ impl PySeries {
                     )),
                 };
                 let values = series_to_buffer::<UInt8Type>(values);
-                from_buffers_string_impl(values, validity, offsets)?
+                py.allow_threads(|| from_buffers_string_impl(values, validity, offsets))?
             },
             dt => {
                 let msg = format!("`_from_buffers` not implemented for `dtype` {dt}");
diff --git a/crates/polars-python/src/series/comparison.rs b/crates/polars-python/src/series/comparison.rs
index 7064edb7698a..2b7de37931f9 100644
--- a/crates/polars-python/src/series/comparison.rs
+++ b/crates/polars-python/src/series/comparison.rs
@@ -6,36 +6,45 @@ use crate::PySeries;
 
 #[pymethods]
 impl PySeries {
-    fn eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.equal(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.equal(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn neq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self
-            .series
-            .not_equal(&rhs.series)
+    fn neq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.not_equal(&rhs.series))
             .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn gt(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.gt(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn gt(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.gt(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn gt_eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.gt_eq(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn gt_eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.gt_eq(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn lt(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.lt(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn lt(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.lt(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn lt_eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.lt_eq(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn lt_eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.lt_eq(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 }
@@ -44,8 +53,10 @@ macro_rules! impl_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.equal(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.equal(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -69,8 +80,10 @@ macro_rules! impl_neq_num {
         #[allow(clippy::nonstandard_macro_braces)]
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.not_equal(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.not_equal(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -93,8 +106,10 @@ macro_rules! impl_gt_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.gt(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.gt(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -117,8 +132,10 @@ macro_rules! impl_gt_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.gt_eq(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.gt_eq(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -142,8 +159,10 @@ macro_rules! impl_lt_num {
         #[allow(clippy::nonstandard_macro_braces)]
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.lt(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.lt(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -166,8 +185,10 @@ macro_rules! impl_lt_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.lt_eq(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.lt_eq(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -226,12 +247,14 @@ macro_rules! impl_decimal {
     ($name:ident, $method:ident) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: PyDecimal) -> PyResult<Self> {
+            fn $name(&self, py: Python, rhs: PyDecimal) -> PyResult<Self> {
                 let rhs = Series::new(
                     PlSmallStr::from_static("decimal"),
                     &[AnyValue::Decimal(rhs.0, rhs.1)],
                 );
-                let s = self.series.$method(&rhs).map_err(PyPolarsErr::from)?;
+                let s = py
+                    .allow_threads(|| self.series.$method(&rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
diff --git a/crates/polars-python/src/series/construction.rs b/crates/polars-python/src/series/construction.rs
index 5935f1e7b0ce..e9dbdf264d8c 100644
--- a/crates/polars-python/src/series/construction.rs
+++ b/crates/polars-python/src/series/construction.rs
@@ -71,10 +71,11 @@ impl PySeries {
         if nan_is_null {
             let array = array.readonly();
             let vals = array.as_slice().unwrap();
-            let ca: Float32Chunked = vals
-                .iter()
-                .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
-                .collect_trusted();
+            let ca: Float32Chunked = py.allow_threads(|| {
+                vals.iter()
+                    .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
+                    .collect_trusted()
+            });
             ca.with_name(name.into()).into_series().into()
         } else {
             mmap_numpy_array(py, name, array)
@@ -86,10 +87,11 @@ impl PySeries {
         if nan_is_null {
             let array = array.readonly();
             let vals = array.as_slice().unwrap();
-            let ca: Float64Chunked = vals
-                .iter()
-                .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
-                .collect_trusted();
+            let ca: Float64Chunked = py.allow_threads(|| {
+                vals.iter()
+                    .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
+                    .collect_trusted()
+            });
             ca.with_name(name.into()).into_series().into()
         } else {
             mmap_numpy_array(py, name, array)
diff --git a/crates/polars-python/src/series/export.rs b/crates/polars-python/src/series/export.rs
index 886b6114427a..959b2dd47293 100644
--- a/crates/polars-python/src/series/export.rs
+++ b/crates/polars-python/src/series/export.rs
@@ -147,17 +147,11 @@ impl PySeries {
 
     /// Return the underlying Arrow array.
     #[allow(clippy::wrong_self_convention)]
-    fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult<PyObject> {
-        self.rechunk(true);
-        Python::with_gil(|py| {
-            let pyarrow = py.import_bound("pyarrow")?;
+    fn to_arrow(&mut self, py: Python, compat_level: PyCompatLevel) -> PyResult<PyObject> {
+        self.rechunk(py, true);
+        let pyarrow = py.import_bound("pyarrow")?;
 
-            interop::arrow::to_py::to_py_array(
-                self.series.to_arrow(0, compat_level.0),
-                py,
-                &pyarrow,
-            )
-        })
+        interop::arrow::to_py::to_py_array(self.series.to_arrow(0, compat_level.0), py, &pyarrow)
     }
 
     #[allow(unused_variables)]
diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs
index 7312995d7606..3134f5354f09 100644
--- a/crates/polars-python/src/series/general.rs
+++ b/crates/polars-python/src/series/general.rs
@@ -16,9 +16,9 @@ use crate::py_modules::POLARS;
 
 #[pymethods]
 impl PySeries {
-    fn struct_unnest(&self) -> PyResult<PyDataFrame> {
+    fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
         let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
-        let df: DataFrame = ca.clone().unnest();
+        let df: DataFrame = py.allow_threads(|| ca.clone().unnest());
         Ok(df.into())
     }
 
@@ -56,9 +56,9 @@ impl PySeries {
         Ok(ca.get_rev_map().is_local())
     }
 
-    pub fn cat_to_local(&self) -> PyResult<Self> {
+    pub fn cat_to_local(&self, py: Python) -> PyResult<Self> {
         let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
-        Ok(ca.to_local().into_series().into())
+        Ok(py.allow_threads(|| ca.to_local().into_series().into()))
     }
 
     fn estimated_size(&self) -> usize {
@@ -78,15 +78,14 @@ impl PySeries {
     }
 
     #[cfg(feature = "dtype-array")]
-    fn reshape(&self, dims: Vec<i64>) -> PyResult<Self> {
+    fn reshape(&self, py: Python, dims: Vec<i64>) -> PyResult<Self> {
         let dims = dims
             .into_iter()
             .map(ReshapeDimension::new)
             .collect::<Vec<_>>();
 
-        let out = self
-            .series
-            .reshape_array(&dims)
+        let out = py
+            .allow_threads(|| self.series.reshape_array(&dims))
             .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -114,8 +113,8 @@ impl PySeries {
         }
     }
 
-    pub fn rechunk(&mut self, in_place: bool) -> Option<Self> {
-        let series = self.series.rechunk();
+    pub fn rechunk(&mut self, py: Python, in_place: bool) -> Option<Self> {
+        let series = py.allow_threads(|| self.series.rechunk());
         if in_place {
             self.series = series;
             None
@@ -167,16 +166,23 @@ impl PySeries {
         self.get_index(py, index)
     }
 
-    fn bitand(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series & &other.series).map_err(PyPolarsErr::from)?;
+    fn bitand(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series & &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
-    fn bitor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series | &other.series).map_err(PyPolarsErr::from)?;
+
+    fn bitor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series | &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
-    fn bitxor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series ^ &other.series).map_err(PyPolarsErr::from)?;
+    fn bitxor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series ^ &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 
@@ -217,48 +223,58 @@ impl PySeries {
         Ok(())
     }
 
-    fn extend(&mut self, other: &PySeries) -> PyResult<()> {
-        self.series
-            .extend(&other.series)
+    fn extend(&mut self, py: Python, other: &PySeries) -> PyResult<()> {
+        py.allow_threads(|| self.series.extend(&other.series))
             .map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    fn new_from_index(&self, index: usize, length: usize) -> PyResult<Self> {
+    fn new_from_index(&self, py: Python, index: usize, length: usize) -> PyResult<Self> {
         if index >= self.series.len() {
             Err(PyValueError::new_err("index is out of bounds"))
         } else {
-            Ok(self.series.new_from_index(index, length).into())
+            Ok(py.allow_threads(|| self.series.new_from_index(index, length).into()))
         }
     }
 
-    fn filter(&self, filter: &PySeries) -> PyResult<Self> {
+    fn filter(&self, py: Python, filter: &PySeries) -> PyResult<Self> {
         let filter_series = &filter.series;
         if let Ok(ca) = filter_series.bool() {
-            let series = self.series.filter(ca).map_err(PyPolarsErr::from)?;
+            let series = py
+                .allow_threads(|| self.series.filter(ca))
+                .map_err(PyPolarsErr::from)?;
             Ok(PySeries { series })
         } else {
             Err(PyRuntimeError::new_err("Expected a boolean mask"))
         }
     }
 
-    fn sort(&mut self, descending: bool, nulls_last: bool, multithreaded: bool) -> PyResult<Self> {
-        Ok(self
-            .series
-            .sort(
-                SortOptions::default()
-                    .with_order_descending(descending)
-                    .with_nulls_last(nulls_last)
-                    .with_multithreaded(multithreaded),
-            )
+    fn sort(
+        &mut self,
+        py: Python,
+        descending: bool,
+        nulls_last: bool,
+        multithreaded: bool,
+    ) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| {
+                self.series.sort(
+                    SortOptions::default()
+                        .with_order_descending(descending)
+                        .with_nulls_last(nulls_last)
+                        .with_multithreaded(multithreaded),
+                )
+            })
             .map_err(PyPolarsErr::from)?
             .into())
     }
 
-    fn gather_with_series(&self, indices: &PySeries) -> PyResult<Self> {
-        let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
-        let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
-        Ok(s.into())
+    fn gather_with_series(&self, py: Python, indices: &PySeries) -> PyResult<Self> {
+        py.allow_threads(|| {
+            let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
+            let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
+            Ok(s.into())
+        })
     }
 
     fn null_count(&self) -> PyResult<usize> {
@@ -271,6 +287,7 @@ impl PySeries {
 
     fn equals(
         &self,
+        py: Python,
         other: &PySeries,
         check_dtypes: bool,
         check_names: bool,
@@ -283,9 +300,9 @@ impl PySeries {
             return false;
         }
         if null_equal {
-            self.series.equals_missing(&other.series)
+            py.allow_threads(|| self.series.equals_missing(&other.series))
         } else {
-            self.series.equals(&other.series)
+            py.allow_threads(|| self.series.equals(&other.series))
         }
     }
 
@@ -300,8 +317,10 @@ impl PySeries {
 
     /// Rechunk and return a pointer to the start of the Series.
     /// Only implemented for numeric types
-    fn as_single_ptr(&mut self) -> PyResult<usize> {
-        let ptr = self.series.as_single_ptr().map_err(PyPolarsErr::from)?;
+    fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
+        let ptr = py
+            .allow_threads(|| self.series.as_single_ptr())
+            .map_err(PyPolarsErr::from)?;
         Ok(ptr)
     }
 
@@ -309,20 +328,23 @@ impl PySeries {
         self.series.clone().into()
     }
 
-    fn zip_with(&self, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
+    fn zip_with(&self, py: Python, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
         let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
-        let s = self
-            .series
-            .zip_with(mask, &other.series)
+        let s = py
+            .allow_threads(|| self.series.zip_with(mask, &other.series))
             .map_err(PyPolarsErr::from)?;
         Ok(s.into())
     }
 
     #[pyo3(signature = (separator, drop_first=false))]
-    fn to_dummies(&self, separator: Option<&str>, drop_first: bool) -> PyResult<PyDataFrame> {
-        let df = self
-            .series
-            .to_dummies(separator, drop_first)
+    fn to_dummies(
+        &self,
+        py: Python,
+        separator: Option<&str>,
+        drop_first: bool,
+    ) -> PyResult<PyDataFrame> {
+        let df = py
+            .allow_threads(|| self.series.to_dummies(separator, drop_first))
             .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
@@ -332,18 +354,22 @@ impl PySeries {
         Some(ca.get_as_series(index)?.into())
     }
 
-    fn n_unique(&self) -> PyResult<usize> {
-        let n = self.series.n_unique().map_err(PyPolarsErr::from)?;
+    fn n_unique(&self, py: Python) -> PyResult<usize> {
+        let n = py
+            .allow_threads(|| self.series.n_unique())
+            .map_err(PyPolarsErr::from)?;
         Ok(n)
     }
 
-    fn floor(&self) -> PyResult<Self> {
-        let s = self.series.floor().map_err(PyPolarsErr::from)?;
+    fn floor(&self, py: Python) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.floor())
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into())
     }
 
-    fn shrink_to_fit(&mut self) {
-        self.series.shrink_to_fit();
+    fn shrink_to_fit(&mut self, py: Python) {
+        py.allow_threads(|| self.series.shrink_to_fit());
     }
 
     fn dot(&self, other: &PySeries, py: Python) -> PyResult<PyObject> {
@@ -358,15 +384,11 @@ impl PySeries {
         }
 
         let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
-            (&self.series * &other.series)
-                .map_err(PyPolarsErr::from)?
-                .sum::<f64>()
+            py.allow_threads(|| (&self.series * &other.series)?.sum::<f64>())
                 .map_err(PyPolarsErr::from)?
                 .into()
         } else {
-            (&self.series * &other.series)
-                .map_err(PyPolarsErr::from)?
-                .sum::<i64>()
+            py.allow_threads(|| (&self.series * &other.series)?.sum::<i64>())
                 .map_err(PyPolarsErr::from)?
                 .into()
         };
@@ -413,20 +435,27 @@ impl PySeries {
         }
     }
 
-    fn skew(&self, bias: bool) -> PyResult<Option<f64>> {
-        let out = self.series.skew(bias).map_err(PyPolarsErr::from)?;
+    fn skew(&self, py: Python, bias: bool) -> PyResult<Option<f64>> {
+        let out = py
+            .allow_threads(|| self.series.skew(bias))
+            .map_err(PyPolarsErr::from)?;
         Ok(out)
     }
 
-    fn kurtosis(&self, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
-        let out = self
-            .series
-            .kurtosis(fisher, bias)
+    fn kurtosis(&self, py: Python, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
+        let out = py
+            .allow_threads(|| self.series.kurtosis(fisher, bias))
             .map_err(PyPolarsErr::from)?;
         Ok(out)
     }
 
-    fn cast(&self, dtype: Wrap<DataType>, strict: bool, wrap_numerical: bool) -> PyResult<Self> {
+    fn cast(
+        &self,
+        py: Python,
+        dtype: Wrap<DataType>,
+        strict: bool,
+        wrap_numerical: bool,
+    ) -> PyResult<Self> {
         let options = if wrap_numerical {
             CastOptions::Overflowing
         } else if strict {
@@ -436,7 +465,7 @@ impl PySeries {
         };
 
         let dtype = dtype.0;
-        let out = self.series.cast_with_options(&dtype, options);
+        let out = py.allow_threads(|| self.series.cast_with_options(&dtype, options));
         let out = out.map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -451,7 +480,7 @@ impl PySeries {
         })
     }
 
-    fn is_sorted(&self, descending: bool, nulls_last: bool) -> PyResult<bool> {
+    fn is_sorted(&self, py: Python, descending: bool, nulls_last: bool) -> PyResult<bool> {
         let options = SortOptions {
             descending,
             nulls_last,
@@ -459,31 +488,36 @@ impl PySeries {
             maintain_order: false,
             limit: None,
         };
-        Ok(self.series.is_sorted(options).map_err(PyPolarsErr::from)?)
+        Ok(py
+            .allow_threads(|| self.series.is_sorted(options))
+            .map_err(PyPolarsErr::from)?)
     }
 
     fn clear(&self) -> Self {
         self.series.clear().into()
     }
 
-    fn head(&self, n: usize) -> Self {
-        self.series.head(Some(n)).into()
+    fn head(&self, py: Python, n: usize) -> Self {
+        py.allow_threads(|| self.series.head(Some(n))).into()
     }
 
-    fn tail(&self, n: usize) -> Self {
-        self.series.tail(Some(n)).into()
+    fn tail(&self, py: Python, n: usize) -> Self {
+        py.allow_threads(|| self.series.tail(Some(n))).into()
     }
 
     fn value_counts(
         &self,
+        py: Python,
         sort: bool,
         parallel: bool,
         name: String,
         normalize: bool,
     ) -> PyResult<PyDataFrame> {
-        let out = self
-            .series
-            .value_counts(sort, parallel, name.into(), normalize)
+        let out = py
+            .allow_threads(|| {
+                self.series
+                    .value_counts(sort, parallel, name.into(), normalize)
+            })
             .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -494,8 +528,10 @@ impl PySeries {
         self.series.slice(offset, length).into()
     }
 
-    pub fn not_(&self) -> PyResult<Self> {
-        let out = polars_ops::series::negate_bitwise(&self.series).map_err(PyPolarsErr::from)?;
+    pub fn not_(&self, py: Python) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| polars_ops::series::negate_bitwise(&self.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 }
@@ -516,8 +552,15 @@ macro_rules! impl_set_with_mask {
         #[pymethods]
         impl PySeries {
             #[pyo3(signature = (filter, value))]
-            fn $name(&self, filter: &PySeries, value: Option<$native>) -> PyResult<Self> {
-                let series = $name(&self.series, filter, value).map_err(PyPolarsErr::from)?;
+            fn $name(
+                &self,
+                py: Python,
+                filter: &PySeries,
+                value: Option<$native>,
+            ) -> PyResult<Self> {
+                let series = py
+                    .allow_threads(|| $name(&self.series, filter, value))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(Self::new(series))
             }
         }
diff --git a/crates/polars-python/src/series/scatter.rs b/crates/polars-python/src/series/scatter.rs
index 97df60ef205b..798cd189a9b6 100644
--- a/crates/polars-python/src/series/scatter.rs
+++ b/crates/polars-python/src/series/scatter.rs
@@ -7,11 +7,12 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn scatter(&mut self, idx: PySeries, values: PySeries) -> PyResult<()> {
+    fn scatter(&mut self, py: Python, idx: PySeries, values: PySeries) -> PyResult<()> {
         // we take the value because we want a ref count of 1 so that we can
         // have mutable access cheaply via _get_inner_mut().
         let s = std::mem::take(&mut self.series);
-        match scatter(s, &idx.series, &values.series) {
+        let result = py.allow_threads(|| scatter(s, &idx.series, &values.series));
+        match result {
             Ok(out) => {
                 self.series = out;
                 Ok(())

From 8cb78391619968b1f596938d9a734b20b58f544e Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Wed, 13 Nov 2024 10:08:32 +0100
Subject: [PATCH 07/18] feat: Add IPC source node for new streaming engine
 (#19454)

Co-authored-by: Orson Peters <orsonpeters@gmail.com>
---
 crates/polars-arrow/src/io/ipc/read/common.rs |  18 +-
 crates/polars-arrow/src/io/ipc/read/file.rs   |   2 +-
 crates/polars-arrow/src/io/ipc/read/mod.rs    |   1 +
 crates/polars-arrow/src/io/ipc/read/reader.rs |  90 ++-
 crates/polars-arrow/src/io/ipc/read/stream.rs |  17 +-
 crates/polars-arrow/src/record_batch.rs       |  22 +-
 crates/polars-core/src/frame/mod.rs           |  26 +
 .../polars-core/src/frame/upstream_traits.rs  |  28 +
 crates/polars-core/src/scalar/from.rs         |   3 +
 crates/polars-io/src/utils/other.rs           |   8 +-
 crates/polars-plan/src/plans/optimizer/mod.rs |   3 +-
 .../src/plans/optimizer/slice_pushdown_lp.rs  |  30 +-
 .../polars-stream/src/nodes/io_sources/ipc.rs | 557 ++++++++++++++++++
 .../polars-stream/src/nodes/io_sources/mod.rs |   1 +
 crates/polars-stream/src/nodes/mod.rs         |   1 +
 .../src/physical_plan/lower_ir.rs             |  67 ++-
 .../src/physical_plan/to_graph.rs             |  17 +
 py-polars/tests/unit/io/test_lazy_ipc.py      |   1 +
 18 files changed, 843 insertions(+), 49 deletions(-)
 create mode 100644 crates/polars-stream/src/nodes/io_sources/ipc.rs
 create mode 100644 crates/polars-stream/src/nodes/io_sources/mod.rs

diff --git a/crates/polars-arrow/src/io/ipc/read/common.rs b/crates/polars-arrow/src/io/ipc/read/common.rs
index 6b893c0e8ce3..0a1297bf1184 100644
--- a/crates/polars-arrow/src/io/ipc/read/common.rs
+++ b/crates/polars-arrow/src/io/ipc/read/common.rs
@@ -318,10 +318,14 @@ pub fn read_dictionary<R: Read + Seek>(
     Ok(())
 }
 
-pub fn prepare_projection(
-    schema: &ArrowSchema,
-    mut projection: Vec<usize>,
-) -> (Vec<usize>, PlHashMap<usize, usize>, ArrowSchema) {
+#[derive(Clone)]
+pub struct ProjectionInfo {
+    pub columns: Vec<usize>,
+    pub map: PlHashMap<usize, usize>,
+    pub schema: ArrowSchema,
+}
+
+pub fn prepare_projection(schema: &ArrowSchema, mut projection: Vec<usize>) -> ProjectionInfo {
     let schema = projection
         .iter()
         .map(|x| {
@@ -355,7 +359,11 @@ pub fn prepare_projection(
         }
     }
 
-    (projection, map, schema)
+    ProjectionInfo {
+        columns: projection,
+        map,
+        schema,
+    }
 }
 
 pub fn apply_projection(
diff --git a/crates/polars-arrow/src/io/ipc/read/file.rs b/crates/polars-arrow/src/io/ipc/read/file.rs
index a83e1b758d80..e75fae36730e 100644
--- a/crates/polars-arrow/src/io/ipc/read/file.rs
+++ b/crates/polars-arrow/src/io/ipc/read/file.rs
@@ -305,7 +305,7 @@ fn get_message_from_block_offset<'a, R: Read + Seek>(
         .map_err(|err| polars_err!(oos = OutOfSpecKind::InvalidFlatbufferMessage(err)))
 }
 
-fn get_message_from_block<'a, R: Read + Seek>(
+pub(super) fn get_message_from_block<'a, R: Read + Seek>(
     reader: &mut R,
     block: &arrow_format::ipc::Block,
     message_scratch: &'a mut Vec<u8>,
diff --git a/crates/polars-arrow/src/io/ipc/read/mod.rs b/crates/polars-arrow/src/io/ipc/read/mod.rs
index 88411f9b905f..f4430db7dea2 100644
--- a/crates/polars-arrow/src/io/ipc/read/mod.rs
+++ b/crates/polars-arrow/src/io/ipc/read/mod.rs
@@ -19,6 +19,7 @@ mod schema;
 mod stream;
 
 pub(crate) use common::first_dict_field;
+pub use common::{prepare_projection, ProjectionInfo};
 pub use error::OutOfSpecKind;
 pub use file::{
     deserialize_footer, get_row_count, read_batch, read_file_dictionaries, read_file_metadata,
diff --git a/crates/polars-arrow/src/io/ipc/read/reader.rs b/crates/polars-arrow/src/io/ipc/read/reader.rs
index 8369d2960233..e9523477fe39 100644
--- a/crates/polars-arrow/src/io/ipc/read/reader.rs
+++ b/crates/polars-arrow/src/io/ipc/read/reader.rs
@@ -1,9 +1,9 @@
 use std::io::{Read, Seek};
 
 use polars_error::PolarsResult;
-use polars_utils::aliases::PlHashMap;
 
 use super::common::*;
+use super::file::{get_message_from_block, get_record_batch};
 use super::{read_batch, read_file_dictionaries, Dictionaries, FileMetadata};
 use crate::array::Array;
 use crate::datatypes::ArrowSchema;
@@ -16,7 +16,7 @@ pub struct FileReader<R: Read + Seek> {
     // the dictionaries are going to be read
     dictionaries: Option<Dictionaries>,
     current_block: usize,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     remaining: usize,
     data_scratch: Vec<u8>,
     message_scratch: Vec<u8>,
@@ -32,10 +32,29 @@ impl<R: Read + Seek> FileReader<R> {
         projection: Option<Vec<usize>>,
         limit: Option<usize>,
     ) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
+        Self {
+            reader,
+            metadata,
+            dictionaries: Default::default(),
+            projection,
+            remaining: limit.unwrap_or(usize::MAX),
+            current_block: 0,
+            data_scratch: Default::default(),
+            message_scratch: Default::default(),
+        }
+    }
+
+    /// Creates a new [`FileReader`]. Use `projection` to only take certain columns.
+    /// # Panic
+    /// Panics iff the projection is not in increasing order (e.g. `[1, 0]` nor `[0, 1, 1]` are valid)
+    pub fn new_with_projection_info(
+        reader: R,
+        metadata: FileMetadata,
+        projection: Option<ProjectionInfo>,
+        limit: Option<usize>,
+    ) -> Self {
         Self {
             reader,
             metadata,
@@ -52,7 +71,7 @@ impl<R: Read + Seek> FileReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
@@ -66,9 +85,23 @@ impl<R: Read + Seek> FileReader<R> {
         self.reader
     }
 
+    pub fn set_current_block(&mut self, idx: usize) {
+        self.current_block = idx;
+    }
+
+    pub fn get_current_block(&self) -> usize {
+        self.current_block
+    }
+
+    /// Get the inner memory scratches so they can be reused in a new writer.
+    /// This can be utilized to save memory allocations for performance reasons.
+    pub fn take_projection_info(&mut self) -> Option<ProjectionInfo> {
+        std::mem::take(&mut self.projection)
+    }
+
     /// Get the inner memory scratches so they can be reused in a new writer.
     /// This can be utilized to save memory allocations for performance reasons.
-    pub fn get_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
+    pub fn take_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
         (
             std::mem::take(&mut self.data_scratch),
             std::mem::take(&mut self.message_scratch),
@@ -91,6 +124,43 @@ impl<R: Read + Seek> FileReader<R> {
         };
         Ok(())
     }
+
+    /// Skip over blocks until we have seen at most `offset` rows, returning how many rows we are
+    /// still too see.  
+    ///
+    /// This will never go over the `offset`. Meaning that if the `offset < current_block.len()`,
+    /// the block will not be skipped.
+    pub fn skip_blocks_till_limit(&mut self, offset: u64) -> PolarsResult<u64> {
+        let mut remaining_offset = offset;
+
+        for (i, block) in self.metadata.blocks.iter().enumerate() {
+            let message =
+                get_message_from_block(&mut self.reader, block, &mut self.message_scratch)?;
+            let record_batch = get_record_batch(message)?;
+
+            let length = record_batch.length()?;
+            let length = length as u64;
+
+            if length > remaining_offset {
+                self.current_block = i;
+                return Ok(remaining_offset);
+            }
+
+            remaining_offset -= length;
+        }
+
+        self.current_block = self.metadata.blocks.len();
+        Ok(remaining_offset)
+    }
+
+    pub fn next_record_batch(
+        &mut self,
+    ) -> Option<PolarsResult<arrow_format::ipc::RecordBatchRef<'_>>> {
+        let block = self.metadata.blocks.get(self.current_block)?;
+        self.current_block += 1;
+        let message = get_message_from_block(&mut self.reader, block, &mut self.message_scratch);
+        Some(message.and_then(|m| get_record_batch(m)))
+    }
 }
 
 impl<R: Read + Seek> Iterator for FileReader<R> {
@@ -114,7 +184,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
             &mut self.reader,
             self.dictionaries.as_ref().unwrap(),
             &self.metadata,
-            self.projection.as_ref().map(|x| x.0.as_ref()),
+            self.projection.as_ref().map(|x| x.columns.as_ref()),
             Some(self.remaining),
             block,
             &mut self.message_scratch,
@@ -122,7 +192,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
         );
         self.remaining -= chunk.as_ref().map(|x| x.len()).unwrap_or_default();
 
-        let chunk = if let Some((_, map, _)) = &self.projection {
+        let chunk = if let Some(ProjectionInfo { map, .. }) = &self.projection {
             // re-order according to projection
             chunk.map(|chunk| apply_projection(chunk, map))
         } else {
diff --git a/crates/polars-arrow/src/io/ipc/read/stream.rs b/crates/polars-arrow/src/io/ipc/read/stream.rs
index 87241596cdbe..b2cfb727b385 100644
--- a/crates/polars-arrow/src/io/ipc/read/stream.rs
+++ b/crates/polars-arrow/src/io/ipc/read/stream.rs
@@ -2,7 +2,6 @@ use std::io::Read;
 
 use arrow_format::ipc::planus::ReadAsRoot;
 use polars_error::{polars_bail, polars_err, PolarsError, PolarsResult};
-use polars_utils::aliases::PlHashMap;
 
 use super::super::CONTINUATION_MARKER;
 use super::common::*;
@@ -93,7 +92,7 @@ fn read_next<R: Read>(
     dictionaries: &mut Dictionaries,
     message_buffer: &mut Vec<u8>,
     data_buffer: &mut Vec<u8>,
-    projection: &Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: &Option<ProjectionInfo>,
     scratch: &mut Vec<u8>,
 ) -> PolarsResult<Option<StreamState>> {
     // determine metadata length
@@ -169,7 +168,7 @@ fn read_next<R: Read>(
                 batch,
                 &metadata.schema,
                 &metadata.ipc_schema,
-                projection.as_ref().map(|x| x.0.as_ref()),
+                projection.as_ref().map(|x| x.columns.as_ref()),
                 None,
                 dictionaries,
                 metadata.version,
@@ -179,7 +178,7 @@ fn read_next<R: Read>(
                 scratch,
             );
 
-            if let Some((_, map, _)) = projection {
+            if let Some(ProjectionInfo { map, .. }) = projection {
                 // re-order according to projection
                 chunk
                     .map(|chunk| apply_projection(chunk, map))
@@ -238,7 +237,7 @@ pub struct StreamReader<R: Read> {
     finished: bool,
     data_buffer: Vec<u8>,
     message_buffer: Vec<u8>,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     scratch: Vec<u8>,
 }
 
@@ -249,10 +248,8 @@ impl<R: Read> StreamReader<R> {
     /// encounter a schema.
     /// To check if the reader is done, use `is_finished(self)`
     pub fn new(reader: R, metadata: StreamMetadata, projection: Option<Vec<usize>>) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
 
         Self {
             reader,
@@ -275,7 +272,7 @@ impl<R: Read> StreamReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
diff --git a/crates/polars-arrow/src/record_batch.rs b/crates/polars-arrow/src/record_batch.rs
index f58d129831f1..2b0b8112ea9e 100644
--- a/crates/polars-arrow/src/record_batch.rs
+++ b/crates/polars-arrow/src/record_batch.rs
@@ -9,7 +9,7 @@ use crate::array::{Array, ArrayRef};
 /// the same length, [`RecordBatchT::len`].
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RecordBatchT<A: AsRef<dyn Array>> {
-    length: usize,
+    height: usize,
     arrays: Vec<A>,
 }
 
@@ -29,14 +29,14 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
     ///
     /// # Error
     ///
-    /// I.f.f. the length does not match the length of any of the arrays
-    pub fn try_new(length: usize, arrays: Vec<A>) -> PolarsResult<Self> {
+    /// I.f.f. the height does not match the length of any of the arrays
+    pub fn try_new(height: usize, arrays: Vec<A>) -> PolarsResult<Self> {
         polars_ensure!(
-            arrays.iter().all(|arr| arr.as_ref().len() == length),
+            arrays.iter().all(|arr| arr.as_ref().len() == height),
             ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
         );
 
-        Ok(Self { length, arrays })
+        Ok(Self { height, arrays })
     }
 
     /// returns the [`Array`]s in [`RecordBatchT`]
@@ -51,7 +51,17 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
 
     /// returns the number of rows of every array
     pub fn len(&self) -> usize {
-        self.length
+        self.height
+    }
+
+    /// returns the number of rows of every array
+    pub fn height(&self) -> usize {
+        self.height
+    }
+
+    /// returns the number of arrays
+    pub fn width(&self) -> usize {
+        self.arrays.len()
     }
 
     /// returns whether the columns have any rows
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index 7e2d7b050dcf..0d8fef7f4c4a 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -3,6 +3,7 @@
 use std::borrow::Cow;
 use std::{mem, ops};
 
+use polars_row::ArrayRef;
 use polars_utils::itertools::Itertools;
 use rayon::prelude::*;
 
@@ -3334,6 +3335,31 @@ impl DataFrame {
     pub(crate) fn infer_height(cols: &[Column]) -> usize {
         cols.first().map_or(0, Column::len)
     }
+
+    pub fn append_record_batch(&mut self, rb: RecordBatchT<ArrayRef>) -> PolarsResult<()> {
+        polars_ensure!(
+            rb.arrays().len() == self.width(),
+            InvalidOperation: "attempt to extend dataframe of width {} with record batch of width {}",
+            self.width(),
+            rb.arrays().len(),
+        );
+
+        if rb.height() == 0 {
+            return Ok(());
+        }
+
+        // SAFETY:
+        // - we don't adjust the names of the columns
+        // - each column gets appended the same number of rows, which is an invariant of
+        //   record_batch.
+        let columns = unsafe { self.get_columns_mut() };
+        for (col, arr) in columns.iter_mut().zip(rb.into_arrays()) {
+            let arr_series = Series::from_arrow_chunks(PlSmallStr::EMPTY, vec![arr])?.into_column();
+            col.append(&arr_series)?;
+        }
+
+        Ok(())
+    }
 }
 
 pub struct RecordBatchIter<'a> {
diff --git a/crates/polars-core/src/frame/upstream_traits.rs b/crates/polars-core/src/frame/upstream_traits.rs
index 38b346ace652..1392f87c052f 100644
--- a/crates/polars-core/src/frame/upstream_traits.rs
+++ b/crates/polars-core/src/frame/upstream_traits.rs
@@ -1,5 +1,7 @@
 use std::ops::{Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
 
+use arrow::record_batch::RecordBatchT;
+
 use crate::prelude::*;
 
 impl FromIterator<Series> for DataFrame {
@@ -22,6 +24,32 @@ impl FromIterator<Column> for DataFrame {
     }
 }
 
+impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch)?;
+        }
+
+        Ok(())
+    }
+}
+
+impl TryExtend<PolarsResult<RecordBatchT<Box<dyn Array>>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = PolarsResult<RecordBatchT<Box<dyn Array>>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch?)?;
+        }
+
+        Ok(())
+    }
+}
+
 impl Index<usize> for DataFrame {
     type Output = Column;
 
diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs
index 3af8671dadd1..c104c2ea8573 100644
--- a/crates/polars-core/src/scalar/from.rs
+++ b/crates/polars-core/src/scalar/from.rs
@@ -1,3 +1,5 @@
+use polars_utils::pl_str::PlSmallStr;
+
 use super::{AnyValue, DataType, Scalar};
 
 macro_rules! impl_from {
@@ -25,4 +27,5 @@ impl_from! {
     (u64, UInt64, UInt64)
     (f32, Float32, Float32)
     (f64, Float64, Float64)
+    (PlSmallStr, StringOwned, String)
 }
diff --git a/crates/polars-io/src/utils/other.rs b/crates/polars-io/src/utils/other.rs
index 4e039124933f..f4ef629821a9 100644
--- a/crates/polars-io/src/utils/other.rs
+++ b/crates/polars-io/src/utils/other.rs
@@ -45,7 +45,7 @@ pub fn get_reader_bytes<R: Read + MmapBytesReader + ?Sized>(
     feature = "parquet",
     feature = "avro"
 ))]
-pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
+pub fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
     projection
         .iter()
         .map(|idx| schema.get_at_index(*idx).unwrap())
@@ -59,14 +59,14 @@ pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> Ar
     feature = "avro",
     feature = "parquet"
 ))]
-pub(crate) fn columns_to_projection(
-    columns: &[String],
+pub fn columns_to_projection<T: AsRef<str>>(
+    columns: &[T],
     schema: &ArrowSchema,
 ) -> PolarsResult<Vec<usize>> {
     let mut prj = Vec::with_capacity(columns.len());
 
     for column in columns {
-        let i = schema.try_index_of(column)?;
+        let i = schema.try_index_of(column.as_ref())?;
         prj.push(i);
     }
 
diff --git a/crates/polars-plan/src/plans/optimizer/mod.rs b/crates/polars-plan/src/plans/optimizer/mod.rs
index 70880ca78359..dc0d330d8b86 100644
--- a/crates/polars-plan/src/plans/optimizer/mod.rs
+++ b/crates/polars-plan/src/plans/optimizer/mod.rs
@@ -89,6 +89,7 @@ pub fn optimize(
     let simplify_expr = opt_state.contains(OptFlags::SIMPLIFY_EXPR);
     let slice_pushdown = opt_state.contains(OptFlags::SLICE_PUSHDOWN);
     let streaming = opt_state.contains(OptFlags::STREAMING);
+    let new_streaming = opt_state.contains(OptFlags::NEW_STREAMING);
     let fast_projection = opt_state.contains(OptFlags::FAST_PROJECTION);
 
     // Don't run optimizations that don't make sense on a single node.
@@ -181,7 +182,7 @@ pub fn optimize(
     }
 
     if slice_pushdown {
-        let slice_pushdown_opt = SlicePushDown::new(streaming);
+        let slice_pushdown_opt = SlicePushDown::new(streaming, new_streaming);
         let alp = lp_arena.take(lp_top);
         let alp = slice_pushdown_opt.optimize(alp, lp_arena, expr_arena)?;
 
diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
index 9c2f8497fac8..a5ff806abae9 100644
--- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
+++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
@@ -5,6 +5,7 @@ use crate::prelude::*;
 
 pub(super) struct SlicePushDown {
     streaming: bool,
+    new_streaming: bool,
     pub scratch: Vec<Node>,
 }
 
@@ -59,9 +60,10 @@ fn can_pushdown_slice_past_projections(exprs: &[ExprIR], arena: &Arena<AExpr>) -
 }
 
 impl SlicePushDown {
-    pub(super) fn new(streaming: bool) -> Self {
+    pub(super) fn new(streaming: bool, new_streaming: bool) -> Self {
         Self {
             streaming,
+            new_streaming,
             scratch: vec![],
         }
     }
@@ -211,6 +213,32 @@ impl SlicePushDown {
 
                 Ok(lp)
             },
+
+            #[cfg(feature = "ipc")]
+            (Scan {
+                sources,
+                file_info,
+                hive_parts,
+                output_schema,
+                mut file_options,
+                predicate,
+                scan_type: scan_type @ FileScan::Ipc { .. },
+            }, Some(state)) if self.new_streaming && predicate.is_none() =>  {
+                file_options.slice = Some((state.offset, state.len as usize));
+
+                let lp = Scan {
+                    sources,
+                    file_info,
+                    hive_parts,
+                    output_schema,
+                    scan_type,
+                    file_options,
+                    predicate,
+                };
+
+                Ok(lp)
+            },
+
             // TODO! we currently skip slice pushdown if there is a predicate.
             (Scan {
                 sources,
diff --git a/crates/polars-stream/src/nodes/io_sources/ipc.rs b/crates/polars-stream/src/nodes/io_sources/ipc.rs
new file mode 100644
index 000000000000..3a83c8e3132c
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/ipc.rs
@@ -0,0 +1,557 @@
+use std::cmp::Reverse;
+use std::io::Cursor;
+use std::ops::Range;
+use std::sync::Arc;
+
+use polars_core::config;
+use polars_core::frame::DataFrame;
+use polars_core::prelude::{Column, DataType};
+use polars_core::scalar::Scalar;
+use polars_core::utils::arrow::array::TryExtend;
+use polars_core::utils::arrow::io::ipc::read::{
+    prepare_projection, read_file_metadata, FileMetadata, FileReader, ProjectionInfo,
+};
+use polars_error::{ErrString, PolarsError, PolarsResult};
+use polars_expr::prelude::PhysicalExpr;
+use polars_expr::state::ExecutionState;
+use polars_io::cloud::CloudOptions;
+use polars_io::ipc::IpcScanOptions;
+use polars_io::utils::columns_to_projection;
+use polars_io::RowIndex;
+use polars_plan::plans::hive::HivePartitions;
+use polars_plan::plans::{FileInfo, ScanSources};
+use polars_plan::prelude::FileScanOptions;
+use polars_utils::mmap::MemSlice;
+use polars_utils::pl_str::PlSmallStr;
+use polars_utils::priority::Priority;
+use polars_utils::IdxSize;
+
+use crate::async_primitives::distributor_channel::distributor_channel;
+use crate::async_primitives::linearizer::Linearizer;
+use crate::morsel::{get_ideal_morsel_size, SourceToken};
+use crate::nodes::{
+    ComputeNode, JoinHandle, Morsel, MorselSeq, PortState, TaskPriority, TaskScope,
+};
+use crate::pipe::{RecvPort, SendPort};
+use crate::{DEFAULT_DISTRIBUTOR_BUFFER_SIZE, DEFAULT_LINEARIZER_BUFFER_SIZE};
+
+const ROW_COUNT_OVERFLOW_ERR: PolarsError = PolarsError::ComputeError(ErrString::new_static(
+    "\
+IPC file produces more than 2^32 rows; \
+consider compiling with polars-bigidx feature (polars-u64-idx package on python)",
+));
+
+pub struct IpcSourceNode {
+    sources: ScanSources,
+
+    config: IpcSourceNodeConfig,
+    num_pipelines: usize,
+
+    /// Every phase we need to be able to continue from where we left off, so we save the state of
+    /// the Walker task.
+    state: IpcSourceNodeState,
+}
+
+pub struct IpcSourceNodeConfig {
+    row_index: Option<RowIndex>,
+    projection_info: Option<ProjectionInfo>,
+
+    rechunk: bool,
+    include_file_paths: Option<PlSmallStr>,
+
+    first_metadata: FileMetadata,
+}
+
+pub struct IpcSourceNodeState {
+    morsel_seq: u64,
+    row_idx_offset: IdxSize,
+
+    slice: Range<usize>,
+
+    source_idx: usize,
+    source: Option<Source>,
+}
+
+pub struct Source {
+    file_path: Option<Arc<str>>,
+
+    memslice: Arc<MemSlice>,
+    metadata: Arc<FileMetadata>,
+
+    block_offset: usize,
+}
+
+impl IpcSourceNode {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        sources: ScanSources,
+        _file_info: FileInfo,
+        _hive_parts: Option<Arc<Vec<HivePartitions>>>, // @TODO
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        options: IpcScanOptions,
+        _cloud_options: Option<CloudOptions>,
+        file_options: FileScanOptions,
+        mut first_metadata: Option<FileMetadata>,
+    ) -> PolarsResult<Self> {
+        // These should have all been removed during lower_ir
+        assert!(predicate.is_none());
+        assert!(!sources.is_empty());
+
+        let IpcScanOptions = options;
+
+        let FileScanOptions {
+            slice,
+            with_columns,
+            cache: _, // @TODO
+            row_index,
+            rechunk,
+            file_counter: _, // @TODO
+            hive_options: _, // @TODO
+            glob: _,         // @TODO
+            include_file_paths,
+            allow_missing_columns: _, // @TODO
+        } = file_options;
+
+        let first_metadata = match first_metadata.take() {
+            Some(md) => md,
+            None => {
+                let source = sources.iter().next().unwrap();
+                let source = source.to_memslice()?;
+                read_file_metadata(&mut std::io::Cursor::new(&*source))?
+            },
+        };
+
+        let projection = with_columns
+            .as_ref()
+            .map(|cols| columns_to_projection(cols, &first_metadata.schema))
+            .transpose()?;
+        let projection_info = projection
+            .as_ref()
+            .map(|p| prepare_projection(&first_metadata.schema, p.clone()));
+
+        let state = IpcSourceNodeState {
+            morsel_seq: 0,
+            row_idx_offset: row_index.as_ref().map_or(0, |ri| ri.offset),
+
+            // Always create a slice. If no slice was given, just make the biggest slice possible.
+            slice: slice.map_or(0..usize::MAX, |(offset, length)| {
+                let offset = offset as usize;
+                offset..offset + length
+            }),
+
+            source_idx: 0,
+            source: None,
+        };
+
+        Ok(IpcSourceNode {
+            sources,
+
+            config: IpcSourceNodeConfig {
+                row_index,
+                projection_info,
+
+                rechunk,
+                include_file_paths,
+
+                first_metadata,
+            },
+
+            num_pipelines: 0,
+
+            state,
+        })
+    }
+}
+
+/// Move `slice` forward by `n` and return the slice until then.
+fn slice_take(slice: &mut Range<usize>, n: usize) -> Range<usize> {
+    let offset = slice.start;
+    let length = slice.len();
+
+    assert!(offset < n);
+
+    let chunk_length = (n - offset).min(length);
+    let rng = offset..offset + chunk_length;
+    *slice = 0..length - chunk_length;
+
+    rng
+}
+
+fn get_max_morsel_size() -> usize {
+    std::env::var("POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE")
+        .map_or_else(
+            |_| get_ideal_morsel_size(),
+            |v| {
+                v.parse::<usize>().expect(
+                    "POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE does not contain valid size",
+                )
+            },
+        )
+        .max(1)
+}
+
+impl ComputeNode for IpcSourceNode {
+    fn name(&self) -> &str {
+        "ipc_source"
+    }
+
+    fn initialize(&mut self, num_pipelines: usize) {
+        self.num_pipelines = num_pipelines;
+    }
+
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
+        assert!(recv.is_empty());
+        assert_eq!(send.len(), 1);
+
+        if self.state.slice.is_empty() || self.state.source_idx >= self.sources.len() {
+            send[0] = PortState::Done;
+        }
+
+        if send[0] != PortState::Done {
+            send[0] = PortState::Ready;
+        }
+
+        Ok(())
+    }
+
+    fn spawn<'env, 's>(
+        &'env mut self,
+        scope: &'s TaskScope<'s, 'env>,
+        recv_ports: &mut [Option<RecvPort<'_>>],
+        send_ports: &mut [Option<SendPort<'_>>],
+        _state: &'s ExecutionState,
+        join_handles: &mut Vec<JoinHandle<PolarsResult<()>>>,
+    ) {
+        assert!(recv_ports.is_empty());
+        assert_eq!(send_ports.len(), 1);
+
+        // Split size for morsels.
+        let max_morsel_size = get_max_morsel_size();
+        let source_token = SourceToken::new();
+
+        let num_pipelines = self.num_pipelines;
+        let config = &self.config;
+        let sources = &self.sources;
+        let state = &mut self.state;
+
+        /// Messages sent from Walker task to Decoder tasks.
+        struct BatchMessage {
+            memslice: Arc<MemSlice>,
+            metadata: Arc<FileMetadata>,
+            file_path: Option<Arc<str>>,
+            row_idx_offset: IdxSize,
+            slice: Range<usize>,
+            block_range: Range<usize>,
+            morsel_seq_base: u64,
+        }
+
+        // Walker task -> Decoder tasks.
+        let (mut batch_tx, batch_rxs) =
+            distributor_channel::<BatchMessage>(num_pipelines, DEFAULT_DISTRIBUTOR_BUFFER_SIZE);
+        // Decoder tasks -> Distributor task.
+        let (mut decoded_rx, decoded_tx) = Linearizer::<Priority<Reverse<MorselSeq>, Morsel>>::new(
+            num_pipelines,
+            DEFAULT_LINEARIZER_BUFFER_SIZE,
+        );
+        // Distributor task -> output.
+        let mut sender = send_ports[0].take().unwrap().serial();
+
+        // Distributor task.
+        //
+        // Shuffles morsels from `n` producers amongst `n` consumers.
+        //
+        // If record batches in the source IPC file are large, one decoder might produce many
+        // morsels at the same time. At the same time, other decoders might not produce anything.
+        // Therefore, we would like to distribute the output of a single decoder task over the
+        // available output pipelines.
+        join_handles.push(scope.spawn_task(TaskPriority::High, async move {
+            while let Some(morsel) = decoded_rx.get().await {
+                if sender.send(morsel.1).await.is_err() {
+                    break;
+                }
+            }
+            PolarsResult::Ok(())
+        }));
+
+        // Decoder tasks.
+        //
+        // Tasks a IPC file and certain number of blocks and decodes each block as a record batch.
+        // Then, all record batches are concatenated into a DataFrame. If the resulting DataFrame
+        // is too large, which happens when we have one very large block, the DataFrame is split
+        // into smaller pieces an spread among the pipelines.
+        let decoder_tasks = decoded_tx.into_iter().zip(batch_rxs)
+            .map(|(mut send, mut rx)| {
+                let source_token = source_token.clone();
+                scope.spawn_task(TaskPriority::Low, async move {
+                    // Amortize allocations.
+                    let mut data_scratch = Vec::new();
+                    let mut message_scratch = Vec::new();
+                    let mut projection_info = config.projection_info.clone();
+
+                    let schema = projection_info.as_ref().map_or(config.first_metadata.schema.as_ref(), |ProjectionInfo { schema, .. }| schema);
+                    let pl_schema = schema
+                        .iter()
+                        .map(|(n, f)| (n.clone(), DataType::from_arrow(&f.dtype, true)))
+                        .collect();
+
+                    while let Ok(m) = rx.recv().await {
+                        let BatchMessage {
+                            memslice: source,
+                            metadata,
+                            file_path,
+                            row_idx_offset,
+                            slice,
+                            morsel_seq_base,
+                            block_range,
+                        } = m;
+
+                        let mut reader = FileReader::new_with_projection_info(
+                            Cursor::new(source.as_ref()),
+                            metadata.as_ref().clone(),
+                            std::mem::take(&mut projection_info),
+                            None,
+                        );
+                        reader.set_current_block(block_range.start);
+                        reader.set_scratches((
+                            std::mem::take(&mut data_scratch),
+                            std::mem::take(&mut message_scratch),
+                        ));
+
+                        // Create the DataFrame with the appropriate schema and append all the record
+                        // batches to it. This will perform schema validation as well.
+                        let mut df = DataFrame::empty_with_schema(&pl_schema);
+                        df.try_extend(reader.by_ref().take(block_range.len()))?;
+
+                        df = df.slice(slice.start as i64, slice.len());
+
+                        if config.rechunk {
+                            df.rechunk_mut();
+                        }
+
+                        if let Some(RowIndex { name, offset: _ }) = &config.row_index {
+                            let offset = row_idx_offset + slice.start as IdxSize;
+                            df = df.with_row_index(name.clone(), Some(offset))?;
+                        }
+
+                        if let Some(col) = config.include_file_paths.as_ref() {
+                            let file_path = file_path.unwrap();
+                            let file_path = Scalar::from(PlSmallStr::from(file_path.as_ref()));
+                            df.with_column(Column::new_scalar(
+                                col.clone(),
+                                file_path,
+                                df.height(),
+                            ))?;
+                        }
+
+                        // If the block is very large, we want to split the block amongst the
+                        // pipelines. That will at least allow some parallelism.
+                        if df.height() > max_morsel_size && config::verbose() {
+                            eprintln!("IPC source encountered a (too) large record batch of {} rows. Splitting and continuing.", df.height());
+                        }
+                        for i in 0..df.height().div_ceil(max_morsel_size) {
+                            let morsel = df.slice((i * max_morsel_size) as i64, max_morsel_size);
+                            let seq = MorselSeq::new(morsel_seq_base + i as u64);
+                            let morsel = Morsel::new(
+                                morsel,
+                                seq,
+                                source_token.clone(),
+                            );
+                            if send.insert(Priority(Reverse(seq), morsel)).await.is_err() {
+                                break;
+                            }
+                        }
+
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                    }
+
+                    PolarsResult::Ok(())
+                })
+            })
+            .collect::<Vec<_>>();
+
+        // Walker task.
+        //
+        // Walks all the sources and supplies block ranges to the decoder tasks.
+        join_handles.push(scope.spawn_task(TaskPriority::Low, async move {
+            struct Batch {
+                row_idx_offset: IdxSize,
+                block_start: usize,
+                num_rows: usize,
+            }
+
+            // Batch completion parameters
+            let batch_size_limit = get_ideal_morsel_size();
+            let sliced_batch_size_limit = state.slice.len().div_ceil(num_pipelines);
+            let batch_block_limit = if sources.len() >= num_pipelines {
+                // If there are more files than decoder tasks, try to subdivide the files instead
+                // of the blocks.
+                usize::MAX
+            } else {
+                config.first_metadata.blocks.len().div_ceil(num_pipelines)
+            };
+
+            // Amortize allocations
+            let mut data_scratch = Vec::new();
+            let mut message_scratch = Vec::new();
+            let mut projection_info = config.projection_info.clone();
+
+            'source_loop: while !state.slice.is_empty() {
+                let source = match state.source {
+                    Some(ref mut source) => source,
+                    None => {
+                        let Some(source) = sources.get(state.source_idx) else {
+                            break;
+                        };
+
+                        let file_path: Option<Arc<str>> = config
+                            .include_file_paths
+                            .as_ref()
+                            .map(|_| source.to_include_path_name().into());
+                        let memslice = source.to_memslice()?;
+                        let metadata = if state.source_idx == 0 {
+                            config.first_metadata.clone()
+                        } else {
+                            read_file_metadata(&mut std::io::Cursor::new(memslice.as_ref()))?
+                        };
+
+                        state.source.insert(Source {
+                            file_path,
+                            memslice: Arc::new(memslice),
+                            metadata: Arc::new(metadata),
+                            block_offset: 0,
+                        })
+                    },
+                };
+
+                let mut reader = FileReader::new_with_projection_info(
+                    Cursor::new(source.memslice.as_ref()),
+                    source.metadata.as_ref().clone(),
+                    std::mem::take(&mut projection_info),
+                    None,
+                );
+                reader.set_current_block(source.block_offset);
+                reader.set_scratches((
+                    std::mem::take(&mut data_scratch),
+                    std::mem::take(&mut message_scratch),
+                ));
+
+                if state.slice.start > 0 {
+                    // Skip over all blocks that the slice would skip anyway.
+                    let new_offset = reader.skip_blocks_till_limit(state.slice.start as u64)?;
+
+                    state.row_idx_offset += (state.slice.start as u64 - new_offset) as IdxSize;
+                    state.slice = new_offset as usize..new_offset as usize + state.slice.len();
+
+                    // If we skip the entire file. Don't even try to read from it.
+                    if reader.get_current_block() == reader.metadata().blocks.len() {
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                        state.source.take();
+                        state.source_idx += 1;
+                        continue;
+                    }
+                }
+
+                let mut batch = Batch {
+                    row_idx_offset: state.row_idx_offset,
+                    block_start: reader.get_current_block(),
+                    num_rows: 0,
+                };
+
+                // We don't yet want to commit these values to the state in case this batch gets
+                // cancelled.
+                let mut uncommitted_slice = state.slice.clone();
+                let mut uncommitted_row_idx_offset = state.row_idx_offset;
+                while !state.slice.is_empty() {
+                    let mut is_batch_complete = false;
+
+                    match reader.next_record_batch() {
+                        None if batch.num_rows == 0 => break,
+
+                        // If we have no more record batches available, we want to send what is
+                        // left.
+                        None => is_batch_complete = true,
+                        Some(record_batch) => {
+                            let rb_num_rows = record_batch?.length()? as usize;
+                            batch.num_rows += rb_num_rows;
+
+                            // We need to ensure that we are not overflowing the IdxSize maximum
+                            // capacity.
+                            let rb_num_rows = IdxSize::try_from(rb_num_rows)
+                                .map_err(|_| ROW_COUNT_OVERFLOW_ERR)?;
+                            uncommitted_row_idx_offset = uncommitted_row_idx_offset
+                                .checked_add(rb_num_rows)
+                                .ok_or(ROW_COUNT_OVERFLOW_ERR)?;
+                        },
+                    }
+
+                    let current_block = reader.get_current_block();
+
+                    // Subdivide into batches for large files.
+                    is_batch_complete |= batch.num_rows >= batch_size_limit;
+                    // Subdivide into batches if the file is sliced.
+                    is_batch_complete |= batch.num_rows >= sliced_batch_size_limit;
+                    // Subdivide into batches for small files.
+                    is_batch_complete |= current_block - batch.block_start >= batch_block_limit;
+
+                    // Batch blocks such that we send appropriately sized morsels. We guarantee a
+                    // lower bound here, but not an upper bound.
+                    if is_batch_complete {
+                        let batch_slice = slice_take(&mut uncommitted_slice, batch.num_rows);
+                        let batch_slice_len = batch_slice.len();
+                        let block_range = batch.block_start..current_block;
+
+                        let message = BatchMessage {
+                            memslice: source.memslice.clone(),
+                            metadata: source.metadata.clone(),
+                            file_path: source.file_path.clone(),
+                            row_idx_offset: batch.row_idx_offset,
+                            slice: batch_slice,
+                            morsel_seq_base: state.morsel_seq,
+                            block_range,
+                        };
+
+                        if source_token.stop_requested() {
+                            break 'source_loop;
+                        }
+
+                        if batch_tx.send(message).await.is_err() {
+                            // This should only happen if the receiver of the decoder
+                            // has broken off, meaning no further input will be needed.
+                            break 'source_loop;
+                        }
+
+                        // Commit the changes to the state.
+                        // Now, we know that the a decoder will process it.
+                        //
+                        // This might generate several morsels if the record batch is very large.
+                        state.morsel_seq += batch_slice_len.div_ceil(max_morsel_size) as u64;
+                        state.slice = uncommitted_slice.clone();
+                        state.row_idx_offset = uncommitted_row_idx_offset;
+                        source.block_offset = current_block;
+
+                        batch = Batch {
+                            row_idx_offset: state.row_idx_offset,
+                            block_start: current_block,
+                            num_rows: 0,
+                        };
+                    }
+                }
+
+                (data_scratch, message_scratch) = reader.take_scratches();
+                projection_info = reader.take_projection_info();
+
+                state.source.take();
+                state.source_idx += 1;
+            }
+
+            drop(batch_tx); // Inform decoder tasks to stop.
+            for decoder_task in decoder_tasks {
+                decoder_task.await?;
+            }
+
+            PolarsResult::Ok(())
+        }));
+    }
+}
diff --git a/crates/polars-stream/src/nodes/io_sources/mod.rs b/crates/polars-stream/src/nodes/io_sources/mod.rs
new file mode 100644
index 000000000000..ce14ad3b0f7a
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/mod.rs
@@ -0,0 +1 @@
+pub mod ipc;
diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs
index 936c0ceb3ada..effebe67c34b 100644
--- a/crates/polars-stream/src/nodes/mod.rs
+++ b/crates/polars-stream/src/nodes/mod.rs
@@ -5,6 +5,7 @@ pub mod in_memory_sink;
 pub mod in_memory_source;
 pub mod input_independent_select;
 pub mod io_sinks;
+pub mod io_sources;
 pub mod joins;
 pub mod map;
 pub mod multiplexer;
diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs
index 95e3ae72224d..063c94081dbc 100644
--- a/crates/polars-stream/src/physical_plan/lower_ir.rs
+++ b/crates/polars-stream/src/physical_plan/lower_ir.rs
@@ -1,10 +1,11 @@
 use std::sync::Arc;
 
+use polars_core::frame::DataFrame;
 use polars_core::prelude::{InitHashMaps, PlHashMap, PlIndexMap};
 use polars_core::schema::Schema;
 use polars_error::{polars_ensure, PolarsResult};
 use polars_plan::plans::expr_ir::{ExprIR, OutputName};
-use polars_plan::plans::{AExpr, FunctionIR, IRAggExpr, IR};
+use polars_plan::plans::{AExpr, FileScan, FunctionIR, IRAggExpr, IR};
 use polars_plan::prelude::{FileType, SinkType};
 use polars_utils::arena::{Arena, Node};
 use polars_utils::itertools::Itertools;
@@ -314,23 +315,67 @@ pub fn lower_ir(
                 sources: scan_sources,
                 file_info,
                 hive_parts,
-                output_schema,
+                output_schema: scan_output_schema,
                 scan_type,
-                predicate,
+                mut predicate,
                 file_options,
             } = v.clone()
             else {
                 unreachable!();
             };
 
-            PhysNodeKind::FileScan {
-                scan_sources,
-                file_info,
-                hive_parts,
-                output_schema,
-                scan_type,
-                predicate,
-                file_options,
+            if scan_sources.is_empty() {
+                // If there are no sources, just provide an empty in-memory source with the right
+                // schema.
+                PhysNodeKind::InMemorySource {
+                    df: Arc::new(DataFrame::empty_with_schema(output_schema.as_ref())),
+                }
+            } else {
+                if matches!(scan_type, FileScan::Ipc { .. }) {
+                    // @TODO: All the things the IPC source does not support yet.
+                    if hive_parts.is_some()
+                        || scan_sources.is_cloud_url()
+                        || file_options.allow_missing_columns
+                        || file_options.slice.is_some_and(|(offset, _)| offset < 0)
+                    {
+                        todo!();
+                    }
+                }
+
+                // If the node itself would just filter on the whole output then there is no real
+                // reason to do it in the source node itself.
+                let do_filter_in_separate_node =
+                    predicate.is_some() && matches!(scan_type, FileScan::Ipc { .. });
+
+                if do_filter_in_separate_node {
+                    assert!(file_options.slice.is_none()); // Invariant of the scan
+                    let predicate = predicate.take().unwrap();
+
+                    let input = phys_sm.insert(PhysNode::new(
+                        output_schema.clone(),
+                        PhysNodeKind::FileScan {
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            output_schema: scan_output_schema,
+                            scan_type,
+                            predicate: None,
+                            file_options,
+                        },
+                    ));
+
+                    PhysNodeKind::Filter { input, predicate }
+                } else {
+                    PhysNodeKind::FileScan {
+                        scan_sources,
+                        file_info,
+                        hive_parts,
+                        output_schema: scan_output_schema,
+                        scan_type,
+                        predicate,
+                        file_options,
+                    }
+                }
             }
         },
 
diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs
index befa9c3a93b9..b701696972a9 100644
--- a/crates/polars-stream/src/physical_plan/to_graph.rs
+++ b/crates/polars-stream/src/physical_plan/to_graph.rs
@@ -367,6 +367,23 @@ fn to_graph_rec<'a>(
                             todo!()
                         }
                     },
+                    FileScan::Ipc {
+                        options,
+                        cloud_options,
+                        metadata: first_metadata,
+                    } => ctx.graph.add_node(
+                        nodes::io_sources::ipc::IpcSourceNode::new(
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            options,
+                            cloud_options,
+                            file_options,
+                            first_metadata,
+                        )?,
+                        [],
+                    ),
                     _ => todo!(),
                 }
             }
diff --git a/py-polars/tests/unit/io/test_lazy_ipc.py b/py-polars/tests/unit/io/test_lazy_ipc.py
index 0d67b6b06f89..ec75d495ce8d 100644
--- a/py-polars/tests/unit/io/test_lazy_ipc.py
+++ b/py-polars/tests/unit/io/test_lazy_ipc.py
@@ -88,6 +88,7 @@ def test_ipc_list_arg(io_files_path: Path) -> None:
     assert df.row(0) == ("vegetables", 45, 0.5, 2)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_scan_ipc_local_with_async(
     capfd: Any,
     monkeypatch: Any,

From 1a8735f6e26abcf76b43e90ab91f9d99b76f2fe9 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Wed, 13 Nov 2024 15:18:01 +0100
Subject: [PATCH 08/18] feat(rust): Add dylib (#19759)

---
 Cargo.lock                     | 539 +++++++++++++++++++++++++--------
 Cargo.toml                     |   1 +
 crates/Makefile                |   3 +-
 crates/polars-dylib/Cargo.toml |  25 ++
 crates/polars-dylib/README.md  |  16 +
 crates/polars-dylib/src/lib.rs |  15 +
 crates/polars/Cargo.toml       |   9 +
 7 files changed, 487 insertions(+), 121 deletions(-)
 create mode 100644 crates/polars-dylib/Cargo.toml
 create mode 100644 crates/polars-dylib/README.md
 create mode 100644 crates/polars-dylib/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index f79742adafb7..281f2845ccfc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -62,9 +62,9 @@ dependencies = [
 
 [[package]]
 name = "allocator-api2"
-version = "0.2.18"
+version = "0.2.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
+checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9"
 
 [[package]]
 name = "android-tzdata"
@@ -89,15 +89,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstyle"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 
 [[package]]
 name = "anyhow"
-version = "1.0.92"
+version = "1.0.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74f37166d7d48a0284b99dd824694c26119c700b53bf0d1540cdb147dbdaaf13"
+checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775"
 
 [[package]]
 name = "apache-avro"
@@ -120,7 +120,7 @@ dependencies = [
  "snap",
  "strum",
  "strum_macros",
- "thiserror",
+ "thiserror 1.0.69",
  "typed-builder",
  "uuid",
 ]
@@ -206,7 +206,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -217,7 +217,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -265,9 +265,9 @@ dependencies = [
 
 [[package]]
 name = "aws-config"
-version = "1.5.9"
+version = "1.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d6448cfb224dd6a9b9ac734f58622dd0d4751f3589f3b777345745f46b2eb14"
+checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -368,9 +368,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.48.0"
+version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ded855583fa1d22e88fe39fd6062b062376e50a8211989e07cf5e38d52eb3453"
+checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -390,9 +390,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.49.0"
+version = "1.50.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9177ea1192e6601ae16c7273385690d88a7ed386a00b74a6bc894d12103cd933"
+checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -412,9 +412,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.48.0"
+version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "823ef553cf36713c97453e2ddff1eb8f62be7f4523544e2a5db64caf80100f0a"
+checksum = "53dcf5e7d9bd1517b8b998e170e650047cea8a2b85fe1835abe3210713e541b7"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -574,9 +574,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.7.2"
+version = "1.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96"
+checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -591,9 +591,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.8"
+version = "1.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07c9cdc179e6afbf5d391ab08c85eac817b51c87e1892a5edb5f7bbdc64314b4"
+checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -794,7 +794,7 @@ checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -845,9 +845,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.31"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
+checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8"
 dependencies = [
  "jobserver",
  "libc",
@@ -1037,9 +1037,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0"
+checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6"
 dependencies = [
  "libc",
 ]
@@ -1252,6 +1252,17 @@ dependencies = [
  "subtle",
 ]
 
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "doc-comment"
 version = "0.3.3"
@@ -1314,7 +1325,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1369,9 +1380,9 @@ checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
 
 [[package]]
 name = "fastrand"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
+checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
 
 [[package]]
 name = "ff"
@@ -1432,9 +1443,9 @@ dependencies = [
 
 [[package]]
 name = "fs4"
-version = "0.11.0"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adc91b3da7f1a7968b00f9f65a4971252f6a927d3cb9eec05d91cbeaff678f9a"
+checksum = "e871a4cfa68bb224863b53149d973df1ac8d1ed2fa1d1bfc37ac1bb65dd37207"
 dependencies = [
  "rustix",
  "windows-sys 0.52.0",
@@ -1496,7 +1507,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1663,9 +1674,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.0"
+version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -1916,14 +1927,143 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "idna"
-version = "0.5.0"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
 dependencies = [
- "unicode-bidi",
- "unicode-normalization",
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
 ]
 
 [[package]]
@@ -1933,7 +2073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "serde",
 ]
 
@@ -2053,9 +2193,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "libc"
-version = "0.2.161"
+version = "0.2.162"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
+checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
 
 [[package]]
 name = "libflate"
@@ -2143,6 +2283,12 @@ version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
 
+[[package]]
+name = "litemap"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
+
 [[package]]
 name = "lock_api"
 version = "0.4.12"
@@ -2165,7 +2311,7 @@ version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
 dependencies = [
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
 ]
 
 [[package]]
@@ -2754,7 +2900,7 @@ dependencies = [
  "flate2",
  "futures",
  "getrandom",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "hex",
  "indexmap",
  "itoa",
@@ -2826,7 +2972,7 @@ dependencies = [
  "comfy-table",
  "either",
  "hashbrown 0.14.5",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "ndarray",
  "num-traits",
@@ -2844,7 +2990,7 @@ dependencies = [
  "serde",
  "serde_json",
  "strum_macros",
- "thiserror",
+ "thiserror 1.0.69",
  "version_check",
  "xxhash-rust",
 ]
@@ -2863,6 +3009,20 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "polars-dylib"
+version = "0.44.2"
+dependencies = [
+ "polars",
+ "polars-arrow",
+ "polars-core",
+ "polars-expr",
+ "polars-lazy",
+ "polars-mem-engine",
+ "polars-plan",
+ "polars-python",
+]
+
 [[package]]
 name = "polars-error"
 version = "0.44.2"
@@ -2872,7 +3032,7 @@ dependencies = [
  "polars-arrow-format",
  "regex",
  "simdutf8",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -2881,7 +3041,7 @@ version = "0.44.2"
 dependencies = [
  "ahash",
  "bitflags",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "num-traits",
  "once_cell",
  "polars-arrow",
@@ -2922,7 +3082,7 @@ dependencies = [
  "fs4",
  "futures",
  "glob",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "home",
  "itoa",
  "memchr",
@@ -2963,7 +3123,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "fallible-streaming-iterator",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "itoa",
  "num-traits",
@@ -3036,7 +3196,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "either",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "hex",
  "indexmap",
  "jsonpath_lib_polars_vendor",
@@ -3074,7 +3234,7 @@ dependencies = [
  "fallible-streaming-iterator",
  "flate2",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "lz4",
  "lz4_flex",
  "num-traits",
@@ -3110,7 +3270,7 @@ dependencies = [
  "crossbeam-queue",
  "enum_dispatch",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "num-traits",
  "polars-arrow",
  "polars-compute",
@@ -3140,7 +3300,7 @@ dependencies = [
  "ciborium",
  "either",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "libloading",
  "memmap2",
  "num-traits",
@@ -3196,7 +3356,7 @@ dependencies = [
  "pyo3",
  "recursive",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "version_check",
 ]
 
@@ -3297,7 +3457,7 @@ dependencies = [
  "bytemuck",
  "bytes",
  "compact_str",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "libc",
  "memmap2",
@@ -3385,9 +3545,9 @@ dependencies = [
 
 [[package]]
 name = "psm"
-version = "0.1.23"
+version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205"
+checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810"
 dependencies = [
  "cc",
 ]
@@ -3453,7 +3613,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3466,14 +3626,14 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "quad-rand"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db"
+checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
 
 [[package]]
 name = "quick-xml"
@@ -3498,9 +3658,9 @@ dependencies = [
 
 [[package]]
 name = "quinn"
-version = "0.11.5"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684"
+checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
 dependencies = [
  "bytes",
  "pin-project-lite",
@@ -3509,33 +3669,36 @@ dependencies = [
  "rustc-hash 2.0.0",
  "rustls 0.23.16",
  "socket2",
- "thiserror",
+ "thiserror 2.0.3",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.8"
+version = "0.11.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6"
+checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
 dependencies = [
  "bytes",
+ "getrandom",
  "rand",
  "ring",
  "rustc-hash 2.0.0",
  "rustls 0.23.16",
+ "rustls-pki-types",
  "slab",
- "thiserror",
+ "thiserror 2.0.3",
  "tinyvec",
  "tracing",
+ "web-time",
 ]
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.6"
+version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780"
+checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da"
 dependencies = [
  "cfg_aliases",
  "libc",
@@ -3665,7 +3828,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3694,7 +3857,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3711,9 +3874,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3845,9 +4008,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.38"
+version = "0.38.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a"
+checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0"
 dependencies = [
  "bitflags",
  "errno",
@@ -3930,6 +4093,9 @@ name = "rustls-pki-types"
 version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
+dependencies = [
+ "web-time",
+]
 
 [[package]]
 name = "rustls-webpki"
@@ -4073,9 +4239,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.12.0"
+version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6"
+checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -4089,9 +4255,9 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
 
 [[package]]
 name = "serde"
-version = "1.0.214"
+version = "1.0.215"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
+checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f"
 dependencies = [
  "serde_derive",
 ]
@@ -4107,13 +4273,13 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.214"
+version = "1.0.215"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
+checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4304,6 +4470,12 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "stacker"
 version = "0.1.17"
@@ -4360,7 +4532,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4382,9 +4554,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.86"
+version = "2.0.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4400,6 +4572,17 @@ dependencies = [
  "futures-core",
 ]
 
+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "sysinfo"
 version = "0.32.0"
@@ -4427,9 +4610,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
 
 [[package]]
 name = "tempfile"
-version = "3.13.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b"
+checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
 dependencies = [
  "cfg-if",
  "fastrand",
@@ -4440,22 +4623,42 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.66"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa"
+dependencies = [
+ "thiserror-impl 2.0.3",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
- "thiserror-impl",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.66"
+version = "2.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5"
+checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4488,6 +4691,16 @@ dependencies = [
  "time-core",
 ]
 
+[[package]]
+name = "tinystr"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
 [[package]]
 name = "tinytemplate"
 version = "1.2.1"
@@ -4515,9 +4728,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.41.0"
+version = "1.41.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb"
+checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33"
 dependencies = [
  "backtrace",
  "bytes",
@@ -4538,7 +4751,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4601,7 +4814,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4646,7 +4859,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4661,27 +4874,12 @@ version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
 
-[[package]]
-name = "unicode-bidi"
-version = "0.3.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893"
-
 [[package]]
 name = "unicode-ident"
 version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
 
-[[package]]
-name = "unicode-normalization"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
-dependencies = [
- "tinyvec",
-]
-
 [[package]]
 name = "unicode-reverse"
 version = "1.0.9"
@@ -4717,9 +4915,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "url"
-version = "2.5.2"
+version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
+checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -4732,6 +4930,18 @@ version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
 [[package]]
 name = "uuid"
 version = "1.11.0"
@@ -4813,7 +5023,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
  "wasm-bindgen-shared",
 ]
 
@@ -4847,7 +5057,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4881,6 +5091,16 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -4951,7 +5171,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4962,7 +5182,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -5143,6 +5363,18 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
 [[package]]
 name = "x11rb"
 version = "0.13.1"
@@ -5172,6 +5404,30 @@ version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984"
 
+[[package]]
+name = "yoke"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
@@ -5190,7 +5446,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
 ]
 
 [[package]]
@@ -5199,6 +5476,28 @@ version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "zstd"
 version = "0.13.2"
diff --git a/Cargo.toml b/Cargo.toml
index 35595086f981..34502bb5e9ee 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -94,6 +94,7 @@ zstd = "0.13"
 polars = { version = "0.44.2", path = "crates/polars", default-features = false }
 polars-compute = { version = "0.44.2", path = "crates/polars-compute", default-features = false }
 polars-core = { version = "0.44.2", path = "crates/polars-core", default-features = false }
+polars-dylib = { version = "0.44.2", path = "crates/polars-dyn", default-features = false }
 polars-error = { version = "0.44.2", path = "crates/polars-error", default-features = false }
 polars-expr = { version = "0.44.2", path = "crates/polars-expr", default-features = false }
 polars-ffi = { version = "0.44.2", path = "crates/polars-ffi", default-features = false }
diff --git a/crates/Makefile b/crates/Makefile
index 28622ee061f5..6b3cf3372149 100644
--- a/crates/Makefile
+++ b/crates/Makefile
@@ -152,5 +152,6 @@ check-wasm:  ## Check wasm build without supported features
 		--exclude-features parquet            \
 		--exclude-features performant         \
 		--exclude-features streaming          \
-		--exclude-features http          	  \
+		--exclude-features http               \
+		--exclude-features full               \
 		--exclude-features test
diff --git a/crates/polars-dylib/Cargo.toml b/crates/polars-dylib/Cargo.toml
new file mode 100644
index 000000000000..5cc963f2d701
--- /dev/null
+++ b/crates/polars-dylib/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "polars-dylib"
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+
+[lib]
+crate-type = ["dylib", "rlib"]
+
+[dependencies]
+arrow = { workspace = true, optional = true, features = ["io_flight"] }
+polars = { workspace = true, features = ["full"] }
+polars-core = { workspace = true, optional = true }
+polars-expr = { workspace = true, optional = true }
+polars-lazy = { workspace = true, optional = true }
+polars-mem-engine = { workspace = true, optional = true }
+polars-plan = { workspace = true, optional = true }
+polars-python = { workspace = true, optional = true, default-features = true }
+
+[features]
+private = ["polars-plan", "arrow", "polars-core", "polars-lazy", "polars-expr", "polars-mem-engine"]
+python = ["polars-plan?/python", "polars-python", "polars-lazy?/python"]
diff --git a/crates/polars-dylib/README.md b/crates/polars-dylib/README.md
new file mode 100644
index 000000000000..3fd4b30de8f7
--- /dev/null
+++ b/crates/polars-dylib/README.md
@@ -0,0 +1,16 @@
+# Polars dynamic library
+
+```toml
+# Cargo.toml
+[workspace.dependencies.polars]
+package = "polars-dylib"
+```
+
+```toml
+# .cargo/config.toml
+[build]
+rustflags = [
+  "-C",
+  "prefer-dynamic",
+]
+```
diff --git a/crates/polars-dylib/src/lib.rs b/crates/polars-dylib/src/lib.rs
new file mode 100644
index 000000000000..907ce175aec8
--- /dev/null
+++ b/crates/polars-dylib/src/lib.rs
@@ -0,0 +1,15 @@
+#[cfg(feature = "private")]
+pub use arrow as _arrow;
+pub use polars::*;
+#[cfg(feature = "private")]
+pub use polars_core as _core;
+#[cfg(feature = "private")]
+pub use polars_expr as _expr;
+#[cfg(feature = "private")]
+pub use polars_lazy as _lazy;
+#[cfg(feature = "private")]
+pub use polars_mem_engine as _mem_engine;
+#[cfg(feature = "private")]
+pub use polars_plan as _plan;
+#[cfg(feature = "python")]
+pub use polars_python as _python;
diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml
index 9ff45610a3c7..7c054c21f59b 100644
--- a/crates/polars/Cargo.toml
+++ b/crates/polars/Cargo.toml
@@ -417,12 +417,21 @@ docs-selection = [
   "replace",
   "approx_unique",
   "unique_counts",
+  "polars_cloud",
+  "serde",
+  "ir_serde",
+  "cloud",
+  "async",
+  "cloud_write",
 ]
 
 bench = [
   "lazy",
 ]
 
+# All features expect python
+full = ["docs-selection", "performant", "fmt"]
+
 [package.metadata.docs.rs]
 # all-features = true
 features = ["docs-selection"]

From 9f791007ae702cd1c63c36f702523df0626e3793 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Wed, 13 Nov 2024 19:33:34 +0100
Subject: [PATCH 09/18] Python Polars 1.13.1 (#19768)

---
 Cargo.lock           | 2 +-
 py-polars/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 281f2845ccfc..d00cfa7ff0a6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3554,7 +3554,7 @@ dependencies = [
 
 [[package]]
 name = "py-polars"
-version = "1.13.0"
+version = "1.13.1"
 dependencies = [
  "jemallocator",
  "libc",
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
index d17218a3b6cd..a2ff3d9882da 100644
--- a/py-polars/Cargo.toml
+++ b/py-polars/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "py-polars"
-version = "1.13.0"
+version = "1.13.1"
 edition = "2021"
 
 [lib]

From 420c0d99b210816e89a01c041354d322424e2137 Mon Sep 17 00:00:00 2001
From: Alisa Petrova <60570090+sn0rkmaiden@users.noreply.github.com>
Date: Thu, 14 Nov 2024 09:55:35 +0300
Subject: [PATCH 10/18] fix(python): Fixed typo in file lazy.py (#19769)

---
 py-polars/polars/functions/lazy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py
index dfd3f607791c..30185cc6a586 100644
--- a/py-polars/polars/functions/lazy.py
+++ b/py-polars/polars/functions/lazy.py
@@ -1030,7 +1030,7 @@ def map_groups(
 
     The output for group `1` can be understood as follows:
 
-    - group `1` contains Series `'a': [1, 3]` and `'b': [4, 5]`
+    - group `1` contains Series `'a': [1, 3]` and `'b': [5, 6]`
     - applying the function to those lists of Series, one gets the output
       `[1 / 4 + 5, 3 / 4 + 6]`, i.e. `[5.25, 6.75]`
     """

From b7fce25bdeef0b0a7fddc708763568d25ba166b1 Mon Sep 17 00:00:00 2001
From: nameexhaustion <simonlin.rqmmw@slmail.me>
Date: Thu, 14 Nov 2024 21:28:16 +1100
Subject: [PATCH 11/18] fix: Fix incorrect lazy schema for explode on array
 columns (#19776)

---
 .../polars-plan/src/dsl/function_expr/array.rs |  6 +++++-
 crates/polars-plan/src/plans/aexpr/schema.rs   | 13 ++++++++-----
 .../simplify_expr/simplify_functions.rs        |  6 ------
 py-polars/tests/unit/operations/test_join.py   | 10 ++++++++++
 py-polars/tests/unit/test_schema.py            | 18 ++++++++++++++++++
 5 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/crates/polars-plan/src/dsl/function_expr/array.rs b/crates/polars-plan/src/dsl/function_expr/array.rs
index 2ecd016981e3..08333beb3893 100644
--- a/crates/polars-plan/src/dsl/function_expr/array.rs
+++ b/crates/polars-plan/src/dsl/function_expr/array.rs
@@ -132,7 +132,7 @@ impl From<ArrayFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
             #[cfg(feature = "array_count")]
             CountMatches => map_as_slice!(count_matches),
             Shift => map_as_slice!(shift),
-            Explode => unreachable!(),
+            Explode => map_as_slice!(explode),
         }
     }
 }
@@ -253,3 +253,7 @@ pub(super) fn shift(s: &[Column]) -> PolarsResult<Column> {
 
     ca.array_shift(n.as_materialized_series()).map(Column::from)
 }
+
+fn explode(c: &[Column]) -> PolarsResult<Column> {
+    c[0].explode()
+}
diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs
index 6c1b675b2bd8..6547c391eaae 100644
--- a/crates/polars-plan/src/plans/aexpr/schema.rs
+++ b/crates/polars-plan/src/plans/aexpr/schema.rs
@@ -84,11 +84,14 @@ impl AExpr {
                     .get(*expr)
                     .to_field_impl(schema, ctx, arena, &mut false)?;
 
-                if let List(inner) = field.dtype() {
-                    Ok(Field::new(field.name().clone(), *inner.clone()))
-                } else {
-                    Ok(field)
-                }
+                let field = match field.dtype() {
+                    List(inner) => Field::new(field.name().clone(), *inner.clone()),
+                    #[cfg(feature = "dtype-array")]
+                    Array(inner, ..) => Field::new(field.name().clone(), *inner.clone()),
+                    _ => field,
+                };
+
+                Ok(field)
             },
             Alias(expr, name) => Ok(Field::new(
                 name.clone(),
diff --git a/crates/polars-plan/src/plans/optimizer/simplify_expr/simplify_functions.rs b/crates/polars-plan/src/plans/optimizer/simplify_expr/simplify_functions.rs
index 03f274e5211a..2b5493c62e6b 100644
--- a/crates/polars-plan/src/plans/optimizer/simplify_expr/simplify_functions.rs
+++ b/crates/polars-plan/src/plans/optimizer/simplify_expr/simplify_functions.rs
@@ -7,12 +7,6 @@ pub(super) fn optimize_functions(
     expr_arena: &mut Arena<AExpr>,
 ) -> PolarsResult<Option<AExpr>> {
     let out = match function {
-        #[cfg(feature = "dtype-array")]
-        // arr.explode() -> explode()
-        FunctionExpr::ArrayExpr(ArrayFunction::Explode) => {
-            let input_node = input[0].node();
-            Some(AExpr::Explode(input_node))
-        },
         // is_null().any() -> null_count() > 0
         // is_not_null().any() ->  null_count() < len()
         // CORRECTNESS: we can ignore 'ignore_nulls' since is_null/is_not_null never produces NULLS
diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py
index c65be5ad61c0..93395fafbdd5 100644
--- a/py-polars/tests/unit/operations/test_join.py
+++ b/py-polars/tests/unit/operations/test_join.py
@@ -1113,3 +1113,13 @@ def test_join_key_type_coercion_19597() -> None:
         left.join(
             right, left_on=pl.col("a") * 2, right_on=pl.col("a") * 2
         ).collect_schema()
+
+
+def test_array_explode_join_19763() -> None:
+    q = pl.LazyFrame().select(
+        pl.lit(pl.Series([[1], [2]], dtype=pl.Array(pl.Int64, 1))).explode().alias("k")
+    )
+
+    q = q.join(pl.LazyFrame({"k": [1, 2]}), on="k")
+
+    assert_frame_equal(q.collect().sort("k"), pl.DataFrame({"k": [1, 2]}))
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index a8f9e43d84c0..43e8840458d3 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -278,3 +278,21 @@ def test_lf_window_schema(expr: pl.Expr, mapping_strategy: str) -> None:
     )
 
     assert q.collect_schema() == q.collect().collect_schema()
+
+
+def test_lf_explode_schema() -> None:
+    lf = pl.LazyFrame({"k": [1], "x": pl.Series([[1]], dtype=pl.Array(pl.Int64, 1))})
+
+    q = lf.select(pl.col("x").explode())
+    assert q.collect_schema() == {"x": pl.Int64}
+
+    q = lf.select(pl.col("x").arr.explode())
+    assert q.collect_schema() == {"x": pl.Int64}
+
+    lf = pl.LazyFrame({"k": [1], "x": pl.Series([[1]], dtype=pl.List(pl.Int64))})
+
+    q = lf.select(pl.col("x").explode())
+    assert q.collect_schema() == {"x": pl.Int64}
+
+    q = lf.select(pl.col("x").list.explode())
+    assert q.collect_schema() == {"x": pl.Int64}

From 9af7ccdb1b3b22f11a000d5003e15bc29af2a654 Mon Sep 17 00:00:00 2001
From: Itamar Turner-Trauring <itamar@itamarst.org>
Date: Thu, 14 Nov 2024 05:31:06 -0500
Subject: [PATCH 12/18] feat: A different approach to warning users of fork()
 issues with Polars (#19197)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
---
 py-polars/polars/__init__.py               | 28 +++++++++++++++++++
 py-polars/tests/unit/test_polars_import.py | 31 ++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py
index 83ea52acc822..eb33f23bf53f 100644
--- a/py-polars/polars/__init__.py
+++ b/py-polars/polars/__init__.py
@@ -429,3 +429,31 @@ def __getattr__(name: str) -> Any:
 
     msg = f"module {__name__!r} has no attribute {name!r}"
     raise AttributeError(msg)
+
+
+# fork() breaks Polars thread pool, so warn users who might be doing this.
+def __install_postfork_hook() -> None:
+    message = """\
+Using fork() can cause Polars to deadlock in the child process.
+In addition, using fork() with Python in general is a recipe for mysterious
+deadlocks and crashes.
+
+The most likely reason you are seeing this error is because you are using the
+multiprocessing module on Linux, which uses fork() by default. This will be
+fixed in Python 3.14. Until then, you want to use the "spawn" context instead.
+
+See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.
+"""
+
+    def before_hook() -> None:
+        import warnings
+
+        warnings.warn(message, RuntimeWarning, stacklevel=2)
+
+    import os
+
+    if hasattr(os, "register_at_fork"):
+        os.register_at_fork(before=before_hook)
+
+
+__install_postfork_hook()
diff --git a/py-polars/tests/unit/test_polars_import.py b/py-polars/tests/unit/test_polars_import.py
index fa1779de3478..2686c094999b 100644
--- a/py-polars/tests/unit/test_polars_import.py
+++ b/py-polars/tests/unit/test_polars_import.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import compileall
+import multiprocessing
+import os
 import subprocess
 import sys
 from pathlib import Path
@@ -97,3 +99,32 @@ def test_polars_import() -> None:
             import_time_ms = polars_import_time // 1_000
             msg = f"Possible import speed regression; took {import_time_ms}ms\n{df_import}"
             raise AssertionError(msg)
+
+
+def run_in_child() -> int:
+    return 123
+
+
+@pytest.mark.skipif(not hasattr(os, "fork"), reason="Requires fork()")
+def test_fork_safety(recwarn: pytest.WarningsRecorder) -> None:
+    def get_num_fork_warnings() -> int:
+        fork_warnings = 0
+        for warning in recwarn:
+            if issubclass(warning.category, RuntimeWarning) and str(
+                warning.message
+            ).startswith("Using fork() can cause Polars"):
+                fork_warnings += 1
+        return fork_warnings
+
+    assert get_num_fork_warnings() == 0
+
+    # Using forkserver and spawn context should not do any of our warning:
+    for context in ["spawn", "forkserver"]:
+        with multiprocessing.get_context(context).Pool(1) as pool:
+            assert pool.apply(run_in_child) == 123
+    assert get_num_fork_warnings() == 0
+
+    # Using fork()-based multiprocessing should raise a warning:
+    with multiprocessing.get_context("fork").Pool(1) as pool:
+        assert pool.apply(run_in_child) == 123
+    assert get_num_fork_warnings() == 1

From 1e262ba431bfbc1f7000cf037a894a71d5b30aa7 Mon Sep 17 00:00:00 2001
From: nameexhaustion <simonlin.rqmmw@slmail.me>
Date: Thu, 14 Nov 2024 23:09:10 +1100
Subject: [PATCH 13/18] fix: Fix incorrect filter after right-join on LazyFrame
 (#19775)

---
 .../optimizer/predicate_pushdown/join.rs      | 33 ++++++++++---------
 py-polars/tests/unit/test_predicates.py       | 15 +++++++++
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/join.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/join.rs
index b79de0e2b959..7e4710e709a1 100644
--- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/join.rs
+++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/join.rs
@@ -66,22 +66,25 @@ fn should_block_join_specific(
     }
 }
 
+/// Returns a tuple indicating whether predicates should be blocked for either side based on the
+/// join type.
+///
+/// * `true` indicates that predicates must not be pushed to that side
 fn join_produces_null(how: &JoinType) -> LeftRight<bool> {
-    #[cfg(feature = "asof_join")]
-    {
-        match how {
-            JoinType::Left => LeftRight(false, true),
-            JoinType::Full { .. } | JoinType::Cross | JoinType::AsOf(_) => LeftRight(true, true),
-            _ => LeftRight(false, false),
-        }
-    }
-    #[cfg(not(feature = "asof_join"))]
-    {
-        match how {
-            JoinType::Left => LeftRight(false, true),
-            JoinType::Full { .. } | JoinType::Cross => LeftRight(true, true),
-            _ => LeftRight(false, false),
-        }
+    match how {
+        JoinType::Left => LeftRight(false, true),
+        JoinType::Right => LeftRight(true, false),
+
+        JoinType::Full { .. } => LeftRight(true, true),
+        JoinType::Cross => LeftRight(true, true),
+        #[cfg(feature = "asof_join")]
+        JoinType::AsOf(_) => LeftRight(true, true),
+
+        JoinType::Inner => LeftRight(false, false),
+        #[cfg(feature = "semi_anti_join")]
+        JoinType::Semi | JoinType::Anti => LeftRight(false, false),
+        #[cfg(feature = "iejoin")]
+        JoinType::IEJoin(..) => LeftRight(false, false),
     }
 }
 
diff --git a/py-polars/tests/unit/test_predicates.py b/py-polars/tests/unit/test_predicates.py
index e752bacdf81d..e8f0be927cb9 100644
--- a/py-polars/tests/unit/test_predicates.py
+++ b/py-polars/tests/unit/test_predicates.py
@@ -553,3 +553,18 @@ def test_predicate_pushdown_struct_unnest_19632() -> None:
         q.collect(),
         pl.DataFrame({"a": 1, "count": 1}, schema={"a": pl.Int64, "count": pl.UInt32}),
     )
+
+
+def test_predicate_pushdown_right_join_19772() -> None:
+    left = pl.LazyFrame({"k": [1], "v": [7]})
+    right = pl.LazyFrame({"k": [1, 2]})
+
+    q = left.join(right, on="k", how="right").filter(pl.col("v") == 7)
+
+    plan = q.explain()
+    assert plan.startswith("FILTER")
+
+    expect = pl.DataFrame({"v": 7, "k": 1})
+
+    assert_frame_equal(q.collect(no_optimization=True), expect)
+    assert_frame_equal(q.collect(), expect)

From 869d1b93b52bdde17bc8fede4d81c1aee4324a87 Mon Sep 17 00:00:00 2001
From: Itamar Turner-Trauring <itamar@itamarst.org>
Date: Thu, 14 Nov 2024 07:09:28 -0500
Subject: [PATCH 14/18] fix(python): Release the GIL in Python APIs, part 2 of
 2 (#19762)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
---
 crates/polars-python/src/cloud.rs             |   4 +-
 .../src/dataframe/construction.rs             |   4 +-
 crates/polars-python/src/dataframe/export.rs  |  28 +-
 crates/polars-python/src/dataframe/general.rs | 330 +++++++++++-------
 crates/polars-python/src/functions/range.rs   |   3 +-
 .../src/interop/arrow/to_rust.rs              |  26 +-
 .../src/interop/numpy/to_numpy_df.rs          |   1 +
 .../src/interop/numpy/to_numpy_series.rs      |   7 +-
 crates/polars-python/src/map/mod.rs           |  13 +-
 crates/polars-python/src/map/series.rs        |  16 +-
 10 files changed, 259 insertions(+), 173 deletions(-)

diff --git a/crates/polars-python/src/cloud.rs b/crates/polars-python/src/cloud.rs
index 39410a6fa7a1..19d4f6dfda07 100644
--- a/crates/polars-python/src/cloud.rs
+++ b/crates/polars-python/src/cloud.rs
@@ -49,9 +49,7 @@ pub fn _execute_ir_plan_with_gpu(ir_plan_ser: Vec<u8>, py: Python) -> PyResult<P
 
     // Execute the plan.
     let mut state = ExecutionState::new();
-    let df = physical_plan
-        .execute(&mut state)
-        .map_err(PyPolarsErr::from)?;
+    let df = py.allow_threads(|| physical_plan.execute(&mut state).map_err(PyPolarsErr::from))?;
 
     Ok(df.into())
 }
diff --git a/crates/polars-python/src/dataframe/construction.rs b/crates/polars-python/src/dataframe/construction.rs
index 1c753ac88e0e..8520caf220d9 100644
--- a/crates/polars-python/src/dataframe/construction.rs
+++ b/crates/polars-python/src/dataframe/construction.rs
@@ -52,8 +52,8 @@ impl PyDataFrame {
     }
 
     #[staticmethod]
-    pub fn from_arrow_record_batches(rb: Vec<Bound<PyAny>>) -> PyResult<Self> {
-        let df = interop::arrow::to_rust::to_rust_df(&rb)?;
+    pub fn from_arrow_record_batches(py: Python, rb: Vec<Bound<PyAny>>) -> PyResult<Self> {
+        let df = interop::arrow::to_rust::to_rust_df(py, &rb)?;
         Ok(Self::from(df))
     }
 }
diff --git a/crates/polars-python/src/dataframe/export.rs b/crates/polars-python/src/dataframe/export.rs
index b32ad3d6afb3..36037865feb2 100644
--- a/crates/polars-python/src/dataframe/export.rs
+++ b/crates/polars-python/src/dataframe/export.rs
@@ -79,19 +79,17 @@ impl PyDataFrame {
     }
 
     #[allow(clippy::wrong_self_convention)]
-    pub fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult<Vec<PyObject>> {
-        self.df.align_chunks_par();
-        Python::with_gil(|py| {
-            let pyarrow = py.import_bound("pyarrow")?;
-            let names = self.df.get_column_names_str();
+    pub fn to_arrow(&mut self, py: Python, compat_level: PyCompatLevel) -> PyResult<Vec<PyObject>> {
+        py.allow_threads(|| self.df.align_chunks_par());
+        let pyarrow = py.import_bound("pyarrow")?;
+        let names = self.df.get_column_names_str();
 
-            let rbs = self
-                .df
-                .iter_chunks(compat_level.0, true)
-                .map(|rb| interop::arrow::to_py::to_py_rb(&rb, &names, py, &pyarrow))
-                .collect::<PyResult<_>>()?;
-            Ok(rbs)
-        })
+        let rbs = self
+            .df
+            .iter_chunks(compat_level.0, true)
+            .map(|rb| interop::arrow::to_py::to_py_rb(&rb, &names, py, &pyarrow))
+            .collect::<PyResult<_>>()?;
+        Ok(rbs)
     }
 
     /// Create a `Vec` of PyArrow RecordBatch instances.
@@ -100,8 +98,8 @@ impl PyDataFrame {
     /// since those can't be converted correctly via PyArrow. The calling Python
     /// code should make sure these are not included.
     #[allow(clippy::wrong_self_convention)]
-    pub fn to_pandas(&mut self) -> PyResult<Vec<PyObject>> {
-        self.df.as_single_chunk_par();
+    pub fn to_pandas(&mut self, py: Python) -> PyResult<Vec<PyObject>> {
+        py.allow_threads(|| self.df.as_single_chunk_par());
         Python::with_gil(|py| {
             let pyarrow = py.import_bound("pyarrow")?;
             let names = self.df.get_column_names_str();
@@ -154,7 +152,7 @@ impl PyDataFrame {
         py: Python<'py>,
         requested_schema: Option<PyObject>,
     ) -> PyResult<Bound<'py, PyCapsule>> {
-        self.df.align_chunks_par();
+        py.allow_threads(|| self.df.align_chunks_par());
         dataframe_to_stream(&self.df, py)
     }
 }
diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs
index 8f2321d103fd..e866e7db1004 100644
--- a/crates/polars-python/src/dataframe/general.rs
+++ b/crates/polars-python/src/dataframe/general.rs
@@ -45,67 +45,87 @@ impl PyDataFrame {
             .collect()
     }
 
-    pub fn add(&self, s: &PySeries) -> PyResult<Self> {
-        let df = (&self.df + &s.series).map_err(PyPolarsErr::from)?;
+    pub fn add(&self, py: Python, s: &PySeries) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df + &s.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn sub(&self, s: &PySeries) -> PyResult<Self> {
-        let df = (&self.df - &s.series).map_err(PyPolarsErr::from)?;
+    pub fn sub(&self, py: Python, s: &PySeries) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df - &s.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn div(&self, s: &PySeries) -> PyResult<Self> {
-        let df = (&self.df / &s.series).map_err(PyPolarsErr::from)?;
+    pub fn div(&self, py: Python, s: &PySeries) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df / &s.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn mul(&self, s: &PySeries) -> PyResult<Self> {
-        let df = (&self.df * &s.series).map_err(PyPolarsErr::from)?;
+    pub fn mul(&self, py: Python, s: &PySeries) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df * &s.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn rem(&self, s: &PySeries) -> PyResult<Self> {
-        let df = (&self.df % &s.series).map_err(PyPolarsErr::from)?;
+    pub fn rem(&self, py: Python, s: &PySeries) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df % &s.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn add_df(&self, s: &Self) -> PyResult<Self> {
-        let df = (&self.df + &s.df).map_err(PyPolarsErr::from)?;
+    pub fn add_df(&self, py: Python, s: &Self) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df + &s.df)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn sub_df(&self, s: &Self) -> PyResult<Self> {
-        let df = (&self.df - &s.df).map_err(PyPolarsErr::from)?;
+    pub fn sub_df(&self, py: Python, s: &Self) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df - &s.df)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn div_df(&self, s: &Self) -> PyResult<Self> {
-        let df = (&self.df / &s.df).map_err(PyPolarsErr::from)?;
+    pub fn div_df(&self, py: Python, s: &Self) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df / &s.df)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn mul_df(&self, s: &Self) -> PyResult<Self> {
-        let df = (&self.df * &s.df).map_err(PyPolarsErr::from)?;
+    pub fn mul_df(&self, py: Python, s: &Self) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df * &s.df)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn rem_df(&self, s: &Self) -> PyResult<Self> {
-        let df = (&self.df % &s.df).map_err(PyPolarsErr::from)?;
+    pub fn rem_df(&self, py: Python, s: &Self) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| &self.df % &s.df)
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
     #[pyo3(signature = (n, with_replacement, shuffle, seed=None))]
     pub fn sample_n(
         &self,
+        py: Python,
         n: &PySeries,
         with_replacement: bool,
         shuffle: bool,
         seed: Option<u64>,
     ) -> PyResult<Self> {
-        let df = self
-            .df
-            .sample_n(&n.series, with_replacement, shuffle, seed)
+        let df = py
+            .allow_threads(|| self.df.sample_n(&n.series, with_replacement, shuffle, seed))
             .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
@@ -113,14 +133,17 @@ impl PyDataFrame {
     #[pyo3(signature = (frac, with_replacement, shuffle, seed=None))]
     pub fn sample_frac(
         &self,
+        py: Python,
         frac: &PySeries,
         with_replacement: bool,
         shuffle: bool,
         seed: Option<u64>,
     ) -> PyResult<Self> {
-        let df = self
-            .df
-            .sample_frac(&frac.series, with_replacement, shuffle, seed)
+        let df = py
+            .allow_threads(|| {
+                self.df
+                    .sample_frac(&frac.series, with_replacement, shuffle, seed)
+            })
             .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
@@ -183,34 +206,41 @@ impl PyDataFrame {
         self.df.is_empty()
     }
 
-    pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
+    pub fn hstack(&self, py: Python, columns: Vec<PySeries>) -> PyResult<Self> {
         let columns = columns.to_series();
         // @scalar-opt
         let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
-        let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
+        let df = py
+            .allow_threads(|| self.df.hstack(&columns))
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn hstack_mut(&mut self, columns: Vec<PySeries>) -> PyResult<()> {
+    pub fn hstack_mut(&mut self, py: Python, columns: Vec<PySeries>) -> PyResult<()> {
         let columns = columns.to_series();
         // @scalar-opt
         let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
-        self.df.hstack_mut(&columns).map_err(PyPolarsErr::from)?;
+        py.allow_threads(|| self.df.hstack_mut(&columns))
+            .map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    pub fn vstack(&self, other: &PyDataFrame) -> PyResult<Self> {
-        let df = self.df.vstack(&other.df).map_err(PyPolarsErr::from)?;
+    pub fn vstack(&self, py: Python, other: &PyDataFrame) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| self.df.vstack(&other.df))
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn vstack_mut(&mut self, other: &PyDataFrame) -> PyResult<()> {
-        self.df.vstack_mut(&other.df).map_err(PyPolarsErr::from)?;
+    pub fn vstack_mut(&mut self, py: Python, other: &PyDataFrame) -> PyResult<()> {
+        py.allow_threads(|| self.df.vstack_mut(&other.df))
+            .map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    pub fn extend(&mut self, other: &PyDataFrame) -> PyResult<()> {
-        self.df.extend(&other.df).map_err(PyPolarsErr::from)?;
+    pub fn extend(&mut self, py: Python, other: &PyDataFrame) -> PyResult<()> {
+        py.allow_threads(|| self.df.extend(&other.df))
+            .map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
@@ -254,10 +284,9 @@ impl PyDataFrame {
         Ok(series)
     }
 
-    pub fn select(&self, columns: Vec<PyBackedStr>) -> PyResult<Self> {
-        let df = self
-            .df
-            .select(columns.iter().map(|x| &**x))
+    pub fn select(&self, py: Python, columns: Vec<PyBackedStr>) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| self.df.select(columns.iter().map(|x| &**x)))
             .map_err(PyPolarsErr::from)?;
         Ok(PyDataFrame::new(df))
     }
@@ -297,46 +326,55 @@ impl PyDataFrame {
     }
 
     #[pyo3(signature = (offset, length=None))]
-    pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
-        let df = self
-            .df
-            .slice(offset, length.unwrap_or_else(|| self.df.height()));
+    pub fn slice(&self, py: Python, offset: i64, length: Option<usize>) -> Self {
+        let df = py.allow_threads(|| {
+            self.df
+                .slice(offset, length.unwrap_or_else(|| self.df.height()))
+        });
         df.into()
     }
 
-    pub fn head(&self, n: usize) -> Self {
-        let df = self.df.head(Some(n));
+    pub fn head(&self, py: Python, n: usize) -> Self {
+        let df = py.allow_threads(|| self.df.head(Some(n)));
         PyDataFrame::new(df)
     }
 
-    pub fn tail(&self, n: usize) -> Self {
-        let df = self.df.tail(Some(n));
+    pub fn tail(&self, py: Python, n: usize) -> Self {
+        let df = py.allow_threads(|| self.df.tail(Some(n)));
         PyDataFrame::new(df)
     }
 
-    pub fn is_unique(&self) -> PyResult<PySeries> {
-        let mask = self.df.is_unique().map_err(PyPolarsErr::from)?;
+    pub fn is_unique(&self, py: Python) -> PyResult<PySeries> {
+        let mask = py
+            .allow_threads(|| self.df.is_unique())
+            .map_err(PyPolarsErr::from)?;
         Ok(mask.into_series().into())
     }
 
-    pub fn is_duplicated(&self) -> PyResult<PySeries> {
-        let mask = self.df.is_duplicated().map_err(PyPolarsErr::from)?;
+    pub fn is_duplicated(&self, py: Python) -> PyResult<PySeries> {
+        let mask = py
+            .allow_threads(|| self.df.is_duplicated())
+            .map_err(PyPolarsErr::from)?;
         Ok(mask.into_series().into())
     }
 
-    pub fn equals(&self, other: &PyDataFrame, null_equal: bool) -> bool {
+    pub fn equals(&self, py: Python, other: &PyDataFrame, null_equal: bool) -> bool {
         if null_equal {
-            self.df.equals_missing(&other.df)
+            py.allow_threads(|| self.df.equals_missing(&other.df))
         } else {
-            self.df.equals(&other.df)
+            py.allow_threads(|| self.df.equals(&other.df))
         }
     }
 
     #[pyo3(signature = (name, offset=None))]
-    pub fn with_row_index(&self, name: &str, offset: Option<IdxSize>) -> PyResult<Self> {
-        let df = self
-            .df
-            .with_row_index(name.into(), offset)
+    pub fn with_row_index(
+        &self,
+        py: Python,
+        name: &str,
+        offset: Option<IdxSize>,
+    ) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| self.df.with_row_index(name.into(), offset))
             .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
@@ -398,6 +436,7 @@ impl PyDataFrame {
     #[pyo3(signature = (on, index, value_name=None, variable_name=None))]
     pub fn unpivot(
         &self,
+        py: Python,
         on: Vec<PyBackedStr>,
         index: Vec<PyBackedStr>,
         value_name: Option<&str>,
@@ -411,7 +450,9 @@ impl PyDataFrame {
             variable_name: variable_name.map(|s| s.into()),
         };
 
-        let df = self.df.unpivot2(args).map_err(PyPolarsErr::from)?;
+        let df = py
+            .allow_threads(|| self.df.unpivot2(args))
+            .map_err(PyPolarsErr::from)?;
         Ok(PyDataFrame::new(df))
     }
 
@@ -419,6 +460,7 @@ impl PyDataFrame {
     #[pyo3(signature = (on, index, values, maintain_order, sort_columns, aggregate_expr, separator))]
     pub fn pivot_expr(
         &self,
+        py: Python,
         on: Vec<String>,
         index: Option<Vec<String>>,
         values: Option<Vec<String>>,
@@ -429,31 +471,38 @@ impl PyDataFrame {
     ) -> PyResult<Self> {
         let fun = if maintain_order { pivot_stable } else { pivot };
         let agg_expr = aggregate_expr.map(|expr| expr.inner);
-        let df = fun(
-            &self.df,
-            on,
-            index,
-            values,
-            sort_columns,
-            agg_expr,
-            separator,
-        )
-        .map_err(PyPolarsErr::from)?;
+        let df = py
+            .allow_threads(|| {
+                fun(
+                    &self.df,
+                    on,
+                    index,
+                    values,
+                    sort_columns,
+                    agg_expr,
+                    separator,
+                )
+            })
+            .map_err(PyPolarsErr::from)?;
         Ok(PyDataFrame::new(df))
     }
 
     pub fn partition_by(
         &self,
+        py: Python,
         by: Vec<String>,
         maintain_order: bool,
         include_key: bool,
     ) -> PyResult<Vec<Self>> {
-        let out = if maintain_order {
-            self.df.partition_by_stable(by, include_key)
-        } else {
-            self.df.partition_by(by, include_key)
-        }
-        .map_err(PyPolarsErr::from)?;
+        let out = py
+            .allow_threads(|| {
+                if maintain_order {
+                    self.df.partition_by_stable(by, include_key)
+                } else {
+                    self.df.partition_by(by, include_key)
+                }
+            })
+            .map_err(PyPolarsErr::from)?;
 
         // SAFETY: PyDataFrame is a repr(transparent) DataFrame.
         Ok(unsafe { std::mem::transmute::<Vec<DataFrame>, Vec<PyDataFrame>>(out) })
@@ -463,38 +512,40 @@ impl PyDataFrame {
         self.df.clone().lazy().into()
     }
 
-    pub fn max_horizontal(&self) -> PyResult<Option<PySeries>> {
-        let s = self.df.max_horizontal().map_err(PyPolarsErr::from)?;
+    pub fn max_horizontal(&self, py: Python) -> PyResult<Option<PySeries>> {
+        let s = py
+            .allow_threads(|| self.df.max_horizontal())
+            .map_err(PyPolarsErr::from)?;
         Ok(s.map(|s| s.take_materialized_series().into()))
     }
 
-    pub fn min_horizontal(&self) -> PyResult<Option<PySeries>> {
-        let s = self.df.min_horizontal().map_err(PyPolarsErr::from)?;
+    pub fn min_horizontal(&self, py: Python) -> PyResult<Option<PySeries>> {
+        let s = py
+            .allow_threads(|| self.df.min_horizontal())
+            .map_err(PyPolarsErr::from)?;
         Ok(s.map(|s| s.take_materialized_series().into()))
     }
 
-    pub fn sum_horizontal(&self, ignore_nulls: bool) -> PyResult<Option<PySeries>> {
+    pub fn sum_horizontal(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<PySeries>> {
         let null_strategy = if ignore_nulls {
             NullStrategy::Ignore
         } else {
             NullStrategy::Propagate
         };
-        let s = self
-            .df
-            .sum_horizontal(null_strategy)
+        let s = py
+            .allow_threads(|| self.df.sum_horizontal(null_strategy))
             .map_err(PyPolarsErr::from)?;
         Ok(s.map(|s| s.into()))
     }
 
-    pub fn mean_horizontal(&self, ignore_nulls: bool) -> PyResult<Option<PySeries>> {
+    pub fn mean_horizontal(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<PySeries>> {
         let null_strategy = if ignore_nulls {
             NullStrategy::Ignore
         } else {
             NullStrategy::Propagate
         };
-        let s = self
-            .df
-            .mean_horizontal(null_strategy)
+        let s = py
+            .allow_threads(|| self.df.mean_horizontal(null_strategy))
             .map_err(PyPolarsErr::from)?;
         Ok(s.map(|s| s.into()))
     }
@@ -502,24 +553,26 @@ impl PyDataFrame {
     #[pyo3(signature = (columns, separator, drop_first=false))]
     pub fn to_dummies(
         &self,
+        py: Python,
         columns: Option<Vec<String>>,
         separator: Option<&str>,
         drop_first: bool,
     ) -> PyResult<Self> {
-        let df = match columns {
-            Some(cols) => self.df.columns_to_dummies(
-                cols.iter().map(|x| x as &str).collect(),
-                separator,
-                drop_first,
-            ),
-            None => self.df.to_dummies(separator, drop_first),
-        }
-        .map_err(PyPolarsErr::from)?;
+        let df = py
+            .allow_threads(|| match columns {
+                Some(cols) => self.df.columns_to_dummies(
+                    cols.iter().map(|x| x as &str).collect(),
+                    separator,
+                    drop_first,
+                ),
+                None => self.df.to_dummies(separator, drop_first),
+            })
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn null_count(&self) -> Self {
-        let df = self.df.null_count();
+    pub fn null_count(&self, py: Python) -> Self {
+        let df = py.allow_threads(|| self.df.null_count());
         df.into()
     }
 
@@ -555,19 +608,29 @@ impl PyDataFrame {
         })
     }
 
-    pub fn shrink_to_fit(&mut self) {
-        self.df.shrink_to_fit();
+    pub fn shrink_to_fit(&mut self, py: Python) {
+        py.allow_threads(|| self.df.shrink_to_fit());
     }
 
-    pub fn hash_rows(&mut self, k0: u64, k1: u64, k2: u64, k3: u64) -> PyResult<PySeries> {
+    pub fn hash_rows(
+        &mut self,
+        py: Python,
+        k0: u64,
+        k1: u64,
+        k2: u64,
+        k3: u64,
+    ) -> PyResult<PySeries> {
         let hb = PlRandomState::with_seeds(k0, k1, k2, k3);
-        let hash = self.df.hash_rows(Some(hb)).map_err(PyPolarsErr::from)?;
+        let hash = py
+            .allow_threads(|| self.df.hash_rows(Some(hb)))
+            .map_err(PyPolarsErr::from)?;
         Ok(hash.into_series().into())
     }
 
     #[pyo3(signature = (keep_names_as, column_names))]
     pub fn transpose(
         &mut self,
+        py: Python,
         keep_names_as: Option<&str>,
         column_names: &Bound<PyAny>,
     ) -> PyResult<Self> {
@@ -578,54 +641,61 @@ impl PyDataFrame {
         } else {
             None
         };
-        Ok(self
-            .df
-            .transpose(keep_names_as, new_col_names)
+        Ok(py
+            .allow_threads(|| self.df.transpose(keep_names_as, new_col_names))
             .map_err(PyPolarsErr::from)?
             .into())
     }
+
     pub fn upsample(
         &self,
+        py: Python,
         by: Vec<String>,
         index_column: &str,
         every: &str,
         stable: bool,
     ) -> PyResult<Self> {
         let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?;
-        let out = if stable {
-            self.df.upsample_stable(by, index_column, every)
-        } else {
-            self.df.upsample(by, index_column, every)
-        };
+        let out = py.allow_threads(|| {
+            if stable {
+                self.df.upsample_stable(by, index_column, every)
+            } else {
+                self.df.upsample(by, index_column, every)
+            }
+        });
         let out = out.map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 
-    pub fn to_struct(&self, name: &str, invalid_indices: Vec<usize>) -> PySeries {
-        let ca = self.df.clone().into_struct(name.into());
-
-        if !invalid_indices.is_empty() {
-            let mut validity = MutableBitmap::with_capacity(ca.len());
-            validity.extend_constant(ca.len(), true);
-            for i in invalid_indices {
-                validity.set(i, false);
+    pub fn to_struct(&self, py: Python, name: &str, invalid_indices: Vec<usize>) -> PySeries {
+        py.allow_threads(|| {
+            let ca = self.df.clone().into_struct(name.into());
+
+            if !invalid_indices.is_empty() {
+                let mut validity = MutableBitmap::with_capacity(ca.len());
+                validity.extend_constant(ca.len(), true);
+                for i in invalid_indices {
+                    validity.set(i, false);
+                }
+                let ca = ca.rechunk();
+                ca.with_outer_validity(Some(validity.freeze()))
+                    .into_series()
+                    .into()
+            } else {
+                ca.into_series().into()
             }
-            let ca = ca.rechunk();
-            ca.with_outer_validity(Some(validity.freeze()))
-                .into_series()
-                .into()
-        } else {
-            ca.into_series().into()
-        }
+        })
     }
 
-    pub fn unnest(&self, columns: Vec<String>) -> PyResult<Self> {
-        let df = self.df.unnest(columns).map_err(PyPolarsErr::from)?;
+    pub fn unnest(&self, py: Python, columns: Vec<String>) -> PyResult<Self> {
+        let df = py
+            .allow_threads(|| self.df.unnest(columns))
+            .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn clear(&self) -> Self {
-        self.df.clear().into()
+    pub fn clear(&self, py: Python) -> Self {
+        py.allow_threads(|| self.df.clear()).into()
     }
 
     #[allow(clippy::wrong_self_convention)]
diff --git a/crates/polars-python/src/functions/range.rs b/crates/polars-python/src/functions/range.rs
index b6eae4400dd8..11ff3864fdfa 100644
--- a/crates/polars-python/src/functions/range.rs
+++ b/crates/polars-python/src/functions/range.rs
@@ -17,6 +17,7 @@ pub fn int_range(start: PyExpr, end: PyExpr, step: i64, dtype: Wrap<DataType>) -
 /// Eager version of `int_range` to avoid overhead from the expression engine.
 #[pyfunction]
 pub fn eager_int_range(
+    py: Python,
     lower: &Bound<'_, PyAny>,
     upper: &Bound<'_, PyAny>,
     step: &Bound<'_, PyAny>,
@@ -34,7 +35,7 @@ pub fn eager_int_range(
         let start_v: <$T as PolarsNumericType>::Native = lower.extract()?;
         let end_v: <$T as PolarsNumericType>::Native = upper.extract()?;
         let step: i64 = step.extract()?;
-        new_int_range::<$T>(start_v, end_v, step, PlSmallStr::from_static("literal"))
+        py.allow_threads(|| new_int_range::<$T>(start_v, end_v, step, PlSmallStr::from_static("literal")))
     });
 
     let s = ret.map_err(PyPolarsErr::from)?;
diff --git a/crates/polars-python/src/interop/arrow/to_rust.rs b/crates/polars-python/src/interop/arrow/to_rust.rs
index 1add88c96fd8..ee741c4279cc 100644
--- a/crates/polars-python/src/interop/arrow/to_rust.rs
+++ b/crates/polars-python/src/interop/arrow/to_rust.rs
@@ -46,7 +46,7 @@ pub fn array_to_rust(obj: &Bound<PyAny>) -> PyResult<ArrayRef> {
     }
 }
 
-pub fn to_rust_df(rb: &[Bound<PyAny>]) -> PyResult<DataFrame> {
+pub fn to_rust_df(py: Python, rb: &[Bound<PyAny>]) -> PyResult<DataFrame> {
     let schema = rb
         .first()
         .ok_or_else(|| PyPolarsErr::Other("empty table".into()))?
@@ -79,17 +79,19 @@ pub fn to_rust_df(rb: &[Bound<PyAny>]) -> PyResult<DataFrame> {
             // for instance string -> large-utf8
             // dict encoded to categorical
             let columns = if run_parallel {
-                POOL.install(|| {
-                    columns
-                        .into_par_iter()
-                        .enumerate()
-                        .map(|(i, arr)| {
-                            let s = Series::try_from((names[i].clone(), arr))
-                                .map_err(PyPolarsErr::from)?
-                                .into_column();
-                            Ok(s)
-                        })
-                        .collect::<PyResult<Vec<_>>>()
+                py.allow_threads(|| {
+                    POOL.install(|| {
+                        columns
+                            .into_par_iter()
+                            .enumerate()
+                            .map(|(i, arr)| {
+                                let s = Series::try_from((names[i].clone(), arr))
+                                    .map_err(PyPolarsErr::from)?
+                                    .into_column();
+                                Ok(s)
+                            })
+                            .collect::<PyResult<Vec<_>>>()
+                    })
                 })
             } else {
                 columns
diff --git a/crates/polars-python/src/interop/numpy/to_numpy_df.rs b/crates/polars-python/src/interop/numpy/to_numpy_df.rs
index c14753bdc7a3..887d218f5fe0 100644
--- a/crates/polars-python/src/interop/numpy/to_numpy_df.rs
+++ b/crates/polars-python/src/interop/numpy/to_numpy_df.rs
@@ -251,6 +251,7 @@ fn try_df_to_numpy_numeric_supertype(
     };
     Some(np_array)
 }
+
 fn df_columns_to_numpy(
     py: Python,
     df: &DataFrame,
diff --git a/crates/polars-python/src/interop/numpy/to_numpy_series.rs b/crates/polars-python/src/interop/numpy/to_numpy_series.rs
index 12f71e2a551d..e2a6c439caad 100644
--- a/crates/polars-python/src/interop/numpy/to_numpy_series.rs
+++ b/crates/polars-python/src/interop/numpy/to_numpy_series.rs
@@ -85,20 +85,21 @@ fn try_series_to_numpy_view(
     if !allow_nulls && series_contains_null(s) {
         return None;
     }
-    let (s_owned, writable_flag) = handle_chunks(s, allow_rechunk)?;
+    let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
 
     let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
     Some((array, writable_flag))
 }
+
 /// Rechunk the Series if required.
 ///
 /// NumPy arrays are always contiguous, so we may have to rechunk before creating a view.
 /// If we do so, we can flag the resulting array as writable.
-fn handle_chunks(s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
+fn handle_chunks(py: Python, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
     let is_chunked = s.n_chunks() > 1;
     match (is_chunked, allow_rechunk) {
         (true, false) => None,
-        (true, true) => Some((s.rechunk(), true)),
+        (true, true) => Some((py.allow_threads(|| s.rechunk()), true)),
         (false, _) => Some((s.clone(), false)),
     }
 }
diff --git a/crates/polars-python/src/map/mod.rs b/crates/polars-python/src/map/mod.rs
index 3bf96f91e631..9ffc74961302 100644
--- a/crates/polars-python/src/map/mod.rs
+++ b/crates/polars-python/src/map/mod.rs
@@ -32,6 +32,7 @@ impl PyArrowPrimitiveType for Float32Type {}
 impl PyArrowPrimitiveType for Float64Type {}
 
 fn iterator_to_struct<'a>(
+    py: Python,
     it: impl Iterator<Item = Option<Bound<'a, PyAny>>>,
     init_null_count: usize,
     first_value: AnyValue<'a>,
@@ -115,11 +116,13 @@ fn iterator_to_struct<'a>(
         }
     }
 
-    let fields = POOL.install(|| {
-        field_names_ordered
-            .par_iter()
-            .map(|name| Series::new(name.clone(), struct_fields.get(name).unwrap()))
-            .collect::<Vec<_>>()
+    let fields = py.allow_threads(|| {
+        POOL.install(|| {
+            field_names_ordered
+                .par_iter()
+                .map(|name| Series::new(name.clone(), struct_fields.get(name).unwrap()))
+                .collect::<Vec<_>>()
+        })
     });
 
     Ok(
diff --git a/crates/polars-python/src/map/series.rs b/crates/polars-python/src/map/series.rs
index cb731e7c03f8..16d6212b8d1e 100644
--- a/crates/polars-python/src/map/series.rs
+++ b/crates/polars-python/src/map/series.rs
@@ -271,6 +271,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
                 .skip(init_null_count + skip)
                 .map(|val| call_lambda(py, lambda, val).ok());
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -283,6 +284,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
                 .skip(init_null_count + skip)
                 .map(|opt_val| opt_val.and_then(|val| call_lambda(py, lambda, val).ok()));
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -576,6 +578,7 @@ where
                 .skip(init_null_count + skip)
                 .map(|val| call_lambda(py, lambda, val).ok());
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -588,6 +591,7 @@ where
                 .skip(init_null_count + skip)
                 .map(|opt_val| opt_val.and_then(|val| call_lambda(py, lambda, val).ok()));
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -874,6 +878,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
                 .skip(init_null_count + skip)
                 .map(|val| call_lambda(py, lambda, val).ok());
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -886,6 +891,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
                 .skip(init_null_count + skip)
                 .map(|opt_val| opt_val.and_then(|val| call_lambda(py, lambda, val).ok()));
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -1221,6 +1227,7 @@ impl<'a> ApplyLambda<'a> for ListChunked {
                     call_lambda(py, lambda, python_series_wrapper).ok()
                 });
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -1245,6 +1252,7 @@ impl<'a> ApplyLambda<'a> for ListChunked {
                     })
                 });
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -1648,6 +1656,7 @@ impl<'a> ApplyLambda<'a> for ArrayChunked {
                     call_lambda(py, lambda, python_series_wrapper).ok()
                 });
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -1672,6 +1681,7 @@ impl<'a> ApplyLambda<'a> for ArrayChunked {
                     })
                 });
             iterator_to_struct(
+                py,
                 it,
                 init_null_count,
                 first_value,
@@ -2042,7 +2052,7 @@ impl<'a> ApplyLambda<'a> for ObjectChunked<ObjectValue> {
 
     fn apply_into_struct(
         &'a self,
-        _py: Python,
+        py: Python,
         lambda: &Bound<'a, PyAny>,
         init_null_count: usize,
         first_value: AnyValue<'a>,
@@ -2056,6 +2066,7 @@ impl<'a> ApplyLambda<'a> for ObjectChunked<ObjectValue> {
                 Some(out)
             });
         iterator_to_struct(
+            py,
             it,
             init_null_count,
             first_value,
@@ -2329,7 +2340,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
 
     fn apply_into_struct(
         &'a self,
-        _py: Python,
+        py: Python,
         lambda: &Bound<'a, PyAny>,
         init_null_count: usize,
         first_value: AnyValue<'a>,
@@ -2340,6 +2351,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
             Some(out)
         });
         iterator_to_struct(
+            py,
             it,
             init_null_count,
             first_value,

From 058491f60bb7cc4b51dc0429abc724512ff78878 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Thu, 14 Nov 2024 13:45:09 +0100
Subject: [PATCH 15/18] refactor: Migrate polars-expr AggregationContext to use
 `Column` (#19736)

---
 crates/polars-core/src/frame/column/mod.rs    | 154 +++++++++++++++---
 .../src/frame/column/partitioned.rs           |   2 +-
 crates/polars-core/src/frame/column/scalar.rs |   5 +
 crates/polars-core/src/frame/column/series.rs |  71 ++++++++
 crates/polars-core/src/frame/mod.rs           |  14 +-
 crates/polars-core/src/scalar/mod.rs          |   9 +
 .../src/expressions/aggregation.rs            | 116 +++++++------
 crates/polars-expr/src/expressions/alias.rs   |  12 +-
 crates/polars-expr/src/expressions/apply.rs   |  76 ++++-----
 crates/polars-expr/src/expressions/binary.rs  |  30 ++--
 crates/polars-expr/src/expressions/cast.rs    |  10 +-
 crates/polars-expr/src/expressions/column.rs  |  45 ++---
 crates/polars-expr/src/expressions/count.rs   |   4 +-
 crates/polars-expr/src/expressions/filter.rs  |   2 +-
 crates/polars-expr/src/expressions/gather.rs  |  28 ++--
 .../polars-expr/src/expressions/group_iter.rs |  28 ++--
 crates/polars-expr/src/expressions/literal.rs |   5 +-
 crates/polars-expr/src/expressions/mod.rs     | 130 +++++++--------
 crates/polars-expr/src/expressions/slice.rs   |  16 +-
 crates/polars-expr/src/expressions/sort.rs    |   2 +-
 crates/polars-expr/src/expressions/sortby.rs  |  20 ++-
 crates/polars-expr/src/expressions/ternary.rs |  36 ++--
 crates/polars-expr/src/expressions/window.rs  |  22 +--
 crates/polars-lazy/src/dsl/list.rs            |   2 +-
 crates/polars-lazy/src/frame/pivot.rs         |   2 +-
 .../polars-mem-engine/src/executors/filter.rs |  20 ++-
 .../src/executors/group_by.rs                 |   2 +-
 .../polars-mem-engine/src/executors/stack.rs  |  14 +-
 crates/polars-ops/src/series/ops/index.rs     |  11 +-
 29 files changed, 531 insertions(+), 357 deletions(-)
 create mode 100644 crates/polars-core/src/frame/column/series.rs

diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs
index d2eec86c1b15..cea56a2e87b7 100644
--- a/crates/polars-core/src/frame/column/mod.rs
+++ b/crates/polars-core/src/frame/column/mod.rs
@@ -1,5 +1,7 @@
 use std::borrow::Cow;
 
+use arrow::bitmap::MutableBitmap;
+use arrow::trusted_len::TrustMyLength;
 use num_traits::{Num, NumCast};
 use polars_error::PolarsResult;
 use polars_utils::index::check_bounds;
@@ -8,6 +10,7 @@ pub use scalar::ScalarColumn;
 
 use self::gather::check_bounds_ca;
 use self::partitioned::PartitionedColumn;
+use self::series::SeriesColumn;
 use crate::chunked_array::cast::CastOptions;
 use crate::chunked_array::metadata::{MetadataFlags, MetadataTrait};
 use crate::datatypes::ReshapeDimension;
@@ -20,6 +23,7 @@ mod arithmetic;
 mod compare;
 mod partitioned;
 mod scalar;
+mod series;
 
 /// A column within a [`DataFrame`].
 ///
@@ -35,7 +39,7 @@ mod scalar;
 #[cfg_attr(feature = "serde", serde(from = "Series"))]
 #[cfg_attr(feature = "serde", serde(into = "_SerdeSeries"))]
 pub enum Column {
-    Series(Series),
+    Series(SeriesColumn),
     Partitioned(PartitionedColumn),
     Scalar(ScalarColumn),
 }
@@ -47,12 +51,13 @@ pub trait IntoColumn: Sized {
 
 impl Column {
     #[inline]
+    #[track_caller]
     pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
     where
         Phantom: ?Sized,
         Series: NamedFrom<T, Phantom>,
     {
-        Self::Series(NamedFrom::new(name, values))
+        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
     }
 
     #[inline]
@@ -95,7 +100,7 @@ impl Column {
                     PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
                 )
                 .take_materialized_series();
-                *self = Column::Series(series);
+                *self = Column::Series(series.into());
                 let Column::Series(s) = self else {
                     unreachable!();
                 };
@@ -107,7 +112,7 @@ impl Column {
                     ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
                 )
                 .take_materialized_series();
-                *self = Column::Series(series);
+                *self = Column::Series(series.into());
                 let Column::Series(s) = self else {
                     unreachable!();
                 };
@@ -121,7 +126,7 @@ impl Column {
     #[inline]
     pub fn take_materialized_series(self) -> Series {
         match self {
-            Column::Series(s) => s,
+            Column::Series(s) => s.take(),
             Column::Partitioned(s) => s.take_materialized_series(),
             Column::Scalar(s) => s.take_materialized_series(),
         }
@@ -586,13 +591,86 @@ impl Column {
         }
     }
 
+    /// General implementation for aggregation where a non-missing scalar would map to itself.
+    #[inline(always)]
+    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
+    fn agg_with_unit_scalar(
+        &self,
+        groups: &GroupsProxy,
+        series_agg: impl Fn(&Series, &GroupsProxy) -> Series,
+    ) -> Column {
+        match self {
+            Column::Series(s) => series_agg(s, groups).into_column(),
+            // @partition-opt
+            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
+            Column::Scalar(s) => {
+                if s.is_empty() {
+                    return self.clone();
+                }
+
+                // We utilize the aggregation on Series to see:
+                // 1. the output datatype of the aggregation
+                // 2. whether this aggregation is even defined
+                let series_aggregation = series_agg(
+                    &s.as_single_value_series(),
+                    &GroupsProxy::Slice {
+                        // @NOTE: this group is always valid since s is non-empty.
+                        groups: vec![[0, 1]],
+                        rolling: false,
+                    },
+                );
+
+                // If the aggregation is not defined, just return all nulls.
+                if series_aggregation.has_nulls() {
+                    return Self::new_scalar(
+                        series_aggregation.name().clone(),
+                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
+                        groups.len(),
+                    );
+                }
+
+                let mut scalar_col = s.resize(groups.len());
+                // The aggregation might change the type (e.g. mean changes int -> float), so we do
+                // a cast here to the output type.
+                if series_aggregation.dtype() != s.dtype() {
+                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
+                }
+
+                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
+                    // Fast path: no empty groups. keep the scalar intact.
+                    return scalar_col.into_column();
+                };
+
+                // All empty groups produce a *missing* or `null` value.
+                let mut validity = MutableBitmap::with_capacity(groups.len());
+                validity.extend_constant(first_empty_idx, true);
+                // SAFETY: We trust the length of this iterator.
+                let iter = unsafe {
+                    TrustMyLength::new(
+                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
+                        groups.len() - first_empty_idx,
+                    )
+                };
+                validity.extend_from_trusted_len_iter(iter);
+                let validity = validity.freeze();
+
+                let mut s = scalar_col.take_materialized_series().rechunk();
+                // SAFETY: We perform a compute_len afterwards.
+                let chunks = unsafe { s.chunks_mut() };
+                chunks[0].with_validity(Some(validity));
+                s.compute_len();
+
+                s.into_column()
+            },
+        }
+    }
+
     /// # Safety
     ///
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_min(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_min(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
     }
 
     /// # Safety
@@ -600,8 +678,7 @@ impl Column {
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_max(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_max(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
     }
 
     /// # Safety
@@ -609,8 +686,7 @@ impl Column {
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_mean(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_mean(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
     }
 
     /// # Safety
@@ -627,8 +703,7 @@ impl Column {
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_first(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
     }
 
     /// # Safety
@@ -636,8 +711,7 @@ impl Column {
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_last(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_last(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
     }
 
     /// # Safety
@@ -672,8 +746,7 @@ impl Column {
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
     pub unsafe fn agg_median(&self, groups: &GroupsProxy) -> Self {
-        // @scalar-opt
-        unsafe { self.as_materialized_series().agg_median(groups) }.into()
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
     }
 
     /// # Safety
@@ -689,7 +762,7 @@ impl Column {
     ///
     /// Does no bounds checks, groups must be correct.
     #[cfg(feature = "algorithm_group_by")]
-    pub(crate) unsafe fn agg_std(&self, groups: &GroupsProxy, ddof: u8) -> Self {
+    pub unsafe fn agg_std(&self, groups: &GroupsProxy, ddof: u8) -> Self {
         // @scalar-opt
         unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
     }
@@ -713,6 +786,30 @@ impl Column {
         unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
     }
 
+    /// # Safety
+    ///
+    /// Does no bounds checks, groups must be correct.
+    #[cfg(feature = "bitwise")]
+    pub fn agg_and(&self, groups: &GroupsProxy) -> Self {
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
+    }
+    /// # Safety
+    ///
+    /// Does no bounds checks, groups must be correct.
+    #[cfg(feature = "bitwise")]
+    pub fn agg_or(&self, groups: &GroupsProxy) -> Self {
+        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
+    }
+    /// # Safety
+    ///
+    /// Does no bounds checks, groups must be correct.
+    #[cfg(feature = "bitwise")]
+    pub fn agg_xor(&self, groups: &GroupsProxy) -> Self {
+        // @partition-opt
+        // @scalar-opt
+        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
+    }
+
     pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
         Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
     }
@@ -877,6 +974,13 @@ impl Column {
         }
     }
 
+    /// Packs every element into a list.
+    pub fn as_list(&self) -> ListChunked {
+        // @scalar-opt
+        // @partition-opt
+        self.as_materialized_series().as_list()
+    }
+
     pub fn is_sorted_flag(&self) -> IsSorted {
         // @scalar-opt
         self.as_materialized_series().is_sorted_flag()
@@ -1105,19 +1209,25 @@ impl Column {
 
     pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
         match (self, other) {
-            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_add_owned(rhs).map(Column::from),
+            (Column::Series(lhs), Column::Series(rhs)) => {
+                lhs.take().try_add_owned(rhs.take()).map(Column::from)
+            },
             (lhs, rhs) => lhs + rhs,
         }
     }
     pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
         match (self, other) {
-            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_sub_owned(rhs).map(Column::from),
+            (Column::Series(lhs), Column::Series(rhs)) => {
+                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
+            },
             (lhs, rhs) => lhs - rhs,
         }
     }
     pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
         match (self, other) {
-            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_mul_owned(rhs).map(Column::from),
+            (Column::Series(lhs), Column::Series(rhs)) => {
+                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
+            },
             (lhs, rhs) => lhs * rhs,
         }
     }
@@ -1443,7 +1553,7 @@ impl From<Series> for Column {
             return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
         }
 
-        Self::Series(series)
+        Self::Series(SeriesColumn::new(series))
     }
 }
 
diff --git a/crates/polars-core/src/frame/column/partitioned.rs b/crates/polars-core/src/frame/column/partitioned.rs
index 16d4e9538634..93471c662d72 100644
--- a/crates/polars-core/src/frame/column/partitioned.rs
+++ b/crates/polars-core/src/frame/column/partitioned.rs
@@ -124,7 +124,7 @@ impl PartitionedColumn {
 
     fn _to_series(name: PlSmallStr, values: &Series, ends: &[IdxSize]) -> Series {
         let dtype = values.dtype();
-        let mut column = Column::Series(Series::new_empty(name, dtype));
+        let mut column = Column::Series(Series::new_empty(name, dtype).into());
 
         let mut prev_offset = 0;
         for (i, &offset) in ends.iter().enumerate() {
diff --git a/crates/polars-core/src/frame/column/scalar.rs b/crates/polars-core/src/frame/column/scalar.rs
index e3d8105362c4..c08a9e3cfee0 100644
--- a/crates/polars-core/src/frame/column/scalar.rs
+++ b/crates/polars-core/src/frame/column/scalar.rs
@@ -284,6 +284,11 @@ impl ScalarColumn {
         self.scalar.update(AnyValue::Null);
         self
     }
+
+    pub fn map_scalar(&mut self, map_scalar: impl Fn(Scalar) -> Scalar) {
+        self.scalar = map_scalar(std::mem::take(&mut self.scalar));
+        self.materialized.take();
+    }
 }
 
 impl IntoColumn for ScalarColumn {
diff --git a/crates/polars-core/src/frame/column/series.rs b/crates/polars-core/src/frame/column/series.rs
new file mode 100644
index 000000000000..c7f79906ea0d
--- /dev/null
+++ b/crates/polars-core/src/frame/column/series.rs
@@ -0,0 +1,71 @@
+use std::ops::{Deref, DerefMut};
+
+use super::Series;
+
+/// A very thin wrapper around [`Series`] that represents a [`Column`]ized version of [`Series`].
+///
+/// At the moment this just conditionally tracks where it was created so that materialization
+/// problems can be tracked down.
+#[derive(Debug, Clone)]
+pub struct SeriesColumn {
+    inner: Series,
+
+    #[cfg(debug_assertions)]
+    materialized_at: Option<std::sync::Arc<std::backtrace::Backtrace>>,
+}
+
+impl SeriesColumn {
+    #[track_caller]
+    pub fn new(series: Series) -> Self {
+        Self {
+            inner: series,
+
+            #[cfg(debug_assertions)]
+            materialized_at: if std::env::var("POLARS_TRACK_SERIES_MATERIALIZATION").as_deref()
+                == Ok("1")
+            {
+                Some(std::sync::Arc::new(
+                    std::backtrace::Backtrace::force_capture(),
+                ))
+            } else {
+                None
+            },
+        }
+    }
+
+    pub fn materialized_at(&self) -> Option<&std::backtrace::Backtrace> {
+        #[cfg(debug_assertions)]
+        {
+            self.materialized_at.as_ref().map(|v| v.as_ref())
+        }
+
+        #[cfg(not(debug_assertions))]
+        None
+    }
+
+    pub fn take(self) -> Series {
+        self.inner
+    }
+}
+
+impl From<Series> for SeriesColumn {
+    #[track_caller]
+    #[inline(always)]
+    fn from(value: Series) -> Self {
+        Self::new(value)
+    }
+}
+
+impl Deref for SeriesColumn {
+    type Target = Series;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl DerefMut for SeriesColumn {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.inner
+    }
+}
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index 0d8fef7f4c4a..6fed5c25071c 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -538,7 +538,7 @@ impl DataFrame {
         // Don't parallelize this. Memory overhead
         for s in &mut self.columns {
             if let Column::Series(s) = s {
-                *s = s.rechunk();
+                *s = s.rechunk().into();
             }
         }
         self
@@ -2085,6 +2085,8 @@ impl DataFrame {
         let mut max_value_ca =
             StringChunkedBuilder::new(PlSmallStr::from_static("max_value"), num_columns);
         let mut distinct_count_ca: Vec<Option<IdxSize>> = Vec::with_capacity(num_columns);
+        let mut materialized_at_ca =
+            StringChunkedBuilder::new(PlSmallStr::from_static("materialized_at"), num_columns);
 
         for col in &self.columns {
             let metadata = col.get_metadata();
@@ -2099,10 +2101,10 @@ impl DataFrame {
                     )
                 });
 
-            let repr = match col {
-                Column::Series(_) => "series",
-                Column::Partitioned(_) => "partitioned",
-                Column::Scalar(_) => "scalar",
+            let (repr, materialized_at) = match col {
+                Column::Series(s) => ("series", s.materialized_at()),
+                Column::Partitioned(_) => ("partitioned", None),
+                Column::Scalar(_) => ("scalar", None),
             };
             let sorted_asc = flags.contains(MetadataFlags::SORTED_ASC);
             let sorted_dsc = flags.contains(MetadataFlags::SORTED_DSC);
@@ -2116,6 +2118,7 @@ impl DataFrame {
             min_value_ca.append_option(min_value.map(|v| v.as_any_value().to_string()));
             max_value_ca.append_option(max_value.map(|v| v.as_any_value().to_string()));
             distinct_count_ca.push(distinct_count);
+            materialized_at_ca.append_option(materialized_at.map(|v| format!("{v:#?}")));
         }
 
         unsafe {
@@ -2134,6 +2137,7 @@ impl DataFrame {
                         &distinct_count_ca[..],
                     )
                     .into_column(),
+                    materialized_at_ca.finish().into_column(),
                 ],
             )
         }
diff --git a/crates/polars-core/src/scalar/mod.rs b/crates/polars-core/src/scalar/mod.rs
index 3e456837e534..7487603ff998 100644
--- a/crates/polars-core/src/scalar/mod.rs
+++ b/crates/polars-core/src/scalar/mod.rs
@@ -15,6 +15,15 @@ pub struct Scalar {
     value: AnyValue<'static>,
 }
 
+impl Default for Scalar {
+    fn default() -> Self {
+        Self {
+            dtype: DataType::Null,
+            value: AnyValue::Null,
+        }
+    }
+}
+
 impl Scalar {
     #[inline(always)]
     pub fn new(dtype: DataType, value: AnyValue<'static>) -> Self {
diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs
index fb691d746715..883598789622 100644
--- a/crates/polars-expr/src/expressions/aggregation.rs
+++ b/crates/polars-expr/src/expressions/aggregation.rs
@@ -206,7 +206,7 @@ impl PhysicalExpr for AggregationExpr {
     ) -> PolarsResult<AggregationContext<'a>> {
         let mut ac = self.input.evaluate_on_groups(df, groups, state)?;
         // don't change names by aggregations as is done in polars-core
-        let keep_name = ac.series().name().clone();
+        let keep_name = ac.get_values().name().clone();
         polars_ensure!(!matches!(ac.agg_state(), AggState::Literal(_)), ComputeError: "cannot aggregate a literal");
 
         if let AggregatedScalar(_) = ac.agg_state() {
@@ -223,37 +223,37 @@ impl PhysicalExpr for AggregationExpr {
         let out = unsafe {
             match self.agg_type.groupby {
                 GroupByMethod::Min => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_min(&groups);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_min(&groups);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Max => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_max(&groups);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_max(&groups);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Median => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_median(&groups);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_median(&groups);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Mean => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_mean(&groups);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_mean(&groups);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Sum => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_sum(&groups);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_sum(&groups);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Count { include_nulls } => {
-                    if include_nulls || ac.series().null_count() == 0 {
+                    if include_nulls || ac.get_values().null_count() == 0 {
                         // a few fast paths that prevent materializing new groups
                         match ac.update_groups {
                             UpdateGroups::WithSeriesLen => {
                                 let list = ac
-                                    .series()
+                                    .get_values()
                                     .list()
                                     .expect("impl error, should be a list at this point");
 
@@ -288,7 +288,7 @@ impl PhysicalExpr for AggregationExpr {
                                     },
                                 };
                                 s.rename(keep_name);
-                                AggregatedScalar(s.into_series())
+                                AggregatedScalar(s.into_column())
                             },
                             UpdateGroups::WithGroupsLen => {
                                 // no need to update the groups
@@ -296,20 +296,20 @@ impl PhysicalExpr for AggregationExpr {
                                 // not the correct order
                                 let mut ca = ac.groups.group_count();
                                 ca.rename(keep_name);
-                                AggregatedScalar(ca.into_series())
+                                AggregatedScalar(ca.into_column())
                             },
                             // materialize groups
                             _ => {
                                 let mut ca = ac.groups().group_count();
                                 ca.rename(keep_name);
-                                AggregatedScalar(ca.into_series())
+                                AggregatedScalar(ca.into_column())
                             },
                         }
                     } else {
                         // TODO: optimize this/and write somewhere else.
                         match ac.agg_state() {
                             AggState::Literal(s) | AggState::AggregatedScalar(s) => {
-                                AggregatedScalar(Series::new(
+                                AggregatedScalar(Column::new(
                                     keep_name,
                                     [(s.len() as IdxSize - s.null_count() as IdxSize)],
                                 ))
@@ -323,7 +323,7 @@ impl PhysicalExpr for AggregationExpr {
                                             .map(|s| s.len() as IdxSize - s.null_count() as IdxSize)
                                     })
                                     .collect();
-                                AggregatedScalar(out.into_series().with_name(keep_name))
+                                AggregatedScalar(out.into_column().with_name(keep_name))
                             },
                             AggState::NotAggregated(s) => {
                                 let s = s.clone();
@@ -334,7 +334,9 @@ impl PhysicalExpr for AggregationExpr {
                                     match groups.as_ref() {
                                         GroupsProxy::Idx(idx) => {
                                             let s = s.rechunk();
-                                            let array = &s.chunks()[0];
+                                            // @scalar-opt
+                                            // @partition-opt
+                                            let array = &s.as_materialized_series().chunks()[0];
                                             let validity = array.validity().unwrap();
                                             idx.iter()
                                                 .map(|(_, g)| {
@@ -365,7 +367,7 @@ impl PhysicalExpr for AggregationExpr {
                                         },
                                     }
                                 };
-                                AggregatedScalar(out.into_series())
+                                AggregatedScalar(out.into_column())
                             },
                         }
                     }
@@ -392,10 +394,10 @@ impl PhysicalExpr for AggregationExpr {
                     //
                     // if it is not, we traverse the groups and create
                     // a list per group.
-                    let s = match ac.agg_state() {
+                    let c = match ac.agg_state() {
                         // mean agg:
                         // -> f64 -> list<f64>
-                        AggState::AggregatedScalar(s) => s
+                        AggState::AggregatedScalar(c) => c
                             .reshape_list(&[
                                 ReshapeDimension::Infer,
                                 ReshapeDimension::new_dimension(1),
@@ -403,25 +405,25 @@ impl PhysicalExpr for AggregationExpr {
                             .unwrap(),
                         _ => {
                             let agg = ac.aggregated();
-                            agg.as_list().into_series()
+                            agg.as_list().into_column()
                         },
                     };
-                    AggregatedList(s.with_name(keep_name))
+                    AggregatedList(c.with_name(keep_name))
                 },
                 GroupByMethod::Groups => {
                     let mut column: ListChunked = ac.groups().as_list_chunked();
                     column.rename(keep_name);
-                    AggregatedScalar(column.into_series())
+                    AggregatedScalar(column.into_column())
                 },
                 GroupByMethod::Std(ddof) => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_std(&groups, ddof);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_std(&groups, ddof);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Var(ddof) => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = s.agg_var(&groups, ddof);
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = c.agg_var(&groups, ddof);
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::Quantile(_, _) => {
                     // implemented explicitly in AggQuantile struct
@@ -429,24 +431,28 @@ impl PhysicalExpr for AggregationExpr {
                 },
                 #[cfg(feature = "bitwise")]
                 GroupByMethod::Bitwise(f) => {
-                    let (s, groups) = ac.get_final_aggregation();
-                    let agg_s = match f {
-                        GroupByBitwiseMethod::And => s.agg_and(&groups),
-                        GroupByBitwiseMethod::Or => s.agg_or(&groups),
-                        GroupByBitwiseMethod::Xor => s.agg_xor(&groups),
+                    let (c, groups) = ac.get_final_aggregation();
+                    let agg_c = match f {
+                        GroupByBitwiseMethod::And => c.agg_and(&groups),
+                        GroupByBitwiseMethod::Or => c.agg_or(&groups),
+                        GroupByBitwiseMethod::Xor => c.agg_xor(&groups),
                     };
-                    AggregatedScalar(agg_s.with_name(keep_name))
+                    AggregatedScalar(agg_c.with_name(keep_name))
                 },
                 GroupByMethod::NanMin => {
                     #[cfg(feature = "propagate_nans")]
                     {
-                        let (s, groups) = ac.get_final_aggregation();
-                        let agg_s = if s.dtype().is_float() {
-                            nan_propagating_aggregate::group_agg_nan_min_s(&s, &groups)
+                        let (c, groups) = ac.get_final_aggregation();
+                        let agg_c = if c.dtype().is_float() {
+                            nan_propagating_aggregate::group_agg_nan_min_s(
+                                c.as_materialized_series(),
+                                &groups,
+                            )
+                            .into_column()
                         } else {
-                            s.agg_min(&groups)
+                            c.agg_min(&groups)
                         };
-                        AggregatedScalar(agg_s.with_name(keep_name))
+                        AggregatedScalar(agg_c.with_name(keep_name))
                     }
                     #[cfg(not(feature = "propagate_nans"))]
                     {
@@ -456,13 +462,17 @@ impl PhysicalExpr for AggregationExpr {
                 GroupByMethod::NanMax => {
                     #[cfg(feature = "propagate_nans")]
                     {
-                        let (s, groups) = ac.get_final_aggregation();
-                        let agg_s = if s.dtype().is_float() {
-                            nan_propagating_aggregate::group_agg_nan_max_s(&s, &groups)
+                        let (c, groups) = ac.get_final_aggregation();
+                        let agg_c = if c.dtype().is_float() {
+                            nan_propagating_aggregate::group_agg_nan_max_s(
+                                c.as_materialized_series(),
+                                &groups,
+                            )
+                            .into_column()
                         } else {
-                            s.agg_max(&groups)
+                            c.agg_max(&groups)
                         };
-                        AggregatedScalar(agg_s.with_name(keep_name))
+                        AggregatedScalar(agg_c.with_name(keep_name))
                     }
                     #[cfg(not(feature = "propagate_nans"))]
                     {
@@ -757,7 +767,7 @@ impl PhysicalExpr for AggQuantileExpr {
     ) -> PolarsResult<AggregationContext<'a>> {
         let mut ac = self.input.evaluate_on_groups(df, groups, state)?;
         // don't change names by aggregations as is done in polars-core
-        let keep_name = ac.series().name().clone();
+        let keep_name = ac.get_values().name().clone();
 
         let quantile = self.get_quantile(df, state)?;
 
diff --git a/crates/polars-expr/src/expressions/alias.rs b/crates/polars-expr/src/expressions/alias.rs
index f2065289e1ae..131d2ca2f16c 100644
--- a/crates/polars-expr/src/expressions/alias.rs
+++ b/crates/polars-expr/src/expressions/alias.rs
@@ -48,17 +48,13 @@ impl PhysicalExpr for AliasExpr {
         state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
         let mut ac = self.physical_expr.evaluate_on_groups(df, groups, state)?;
-        let s = ac.take();
-        let s = self.finish(s.into());
+        let c = ac.take();
+        let c = self.finish(c);
 
         if ac.is_literal() {
-            ac.with_literal(s.take_materialized_series());
+            ac.with_literal(c);
         } else {
-            ac.with_series(
-                s.take_materialized_series(),
-                ac.is_aggregated(),
-                Some(&self.expr),
-            )?;
+            ac.with_values(c, ac.is_aggregated(), Some(&self.expr))?;
         }
         Ok(ac)
     }
diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs
index f8e2619c4153..c03511a64734 100644
--- a/crates/polars-expr/src/expressions/apply.rs
+++ b/crates/polars-expr/src/expressions/apply.rs
@@ -92,11 +92,11 @@ impl ApplyExpr {
         let all_unit_len = all_unit_length(&ca);
         if all_unit_len && self.function_returns_scalar {
             ac.with_agg_state(AggState::AggregatedScalar(
-                ca.explode().unwrap().into_series(),
+                ca.explode().unwrap().into_column(),
             ));
             ac.with_update_groups(UpdateGroups::No);
         } else {
-            ac.with_series(ca.into_series(), true, Some(&self.expr))?;
+            ac.with_values(ca.into_column(), true, Some(&self.expr))?;
             ac.with_update_groups(UpdateGroups::WithSeriesLen);
         }
 
@@ -120,7 +120,7 @@ impl ApplyExpr {
         &self,
         mut ac: AggregationContext<'a>,
     ) -> PolarsResult<AggregationContext<'a>> {
-        let s = ac.series();
+        let s = ac.get_values();
 
         polars_ensure!(
             !matches!(ac.agg_state(), AggState::AggregatedScalar(_)),
@@ -131,7 +131,7 @@ impl ApplyExpr {
         let name = s.name().clone();
         let agg = ac.aggregated();
         // Collection of empty list leads to a null dtype. See: #3687.
-        if agg.len() == 0 {
+        if agg.is_empty() {
             // Create input for the function to determine the output dtype, see #3946.
             let agg = agg.list().unwrap();
             let input_dtype = agg.inner_dtype();
@@ -199,35 +199,28 @@ impl ApplyExpr {
         &self,
         mut ac: AggregationContext<'a>,
     ) -> PolarsResult<AggregationContext<'a>> {
-        let (s, aggregated) = match ac.agg_state() {
-            AggState::AggregatedList(s) => {
-                let ca = s.list().unwrap();
+        let (c, aggregated) = match ac.agg_state() {
+            AggState::AggregatedList(c) => {
+                let ca = c.list().unwrap();
                 let out = ca.apply_to_inner(&|s| {
-                    self.eval_and_flatten(&mut [s.into()])
-                        .map(|c| c.as_materialized_series().clone())
+                    Ok(self
+                        .eval_and_flatten(&mut [s.into_column()])?
+                        .take_materialized_series())
                 })?;
-                (out.into_series(), true)
+                (out.into_column(), true)
             },
-            AggState::NotAggregated(s) => {
-                let (out, aggregated) = (
-                    self.eval_and_flatten(&mut [s.clone().into()])?
-                        .as_materialized_series()
-                        .clone(),
-                    false,
-                );
-                check_map_output_len(s.len(), out.len(), &self.expr)?;
+            AggState::NotAggregated(c) => {
+                let (out, aggregated) = (self.eval_and_flatten(&mut [c.clone()])?, false);
+                check_map_output_len(c.len(), out.len(), &self.expr)?;
                 (out, aggregated)
             },
             agg_state => {
-                ac.with_agg_state(agg_state.try_map(|s| {
-                    self.eval_and_flatten(&mut [s.clone().into()])
-                        .map(|c| c.as_materialized_series().clone())
-                })?);
+                ac.with_agg_state(agg_state.try_map(|s| self.eval_and_flatten(&mut [s.clone()]))?);
                 return Ok(ac);
             },
         };
 
-        ac.with_series_and_args(s, aggregated, Some(&self.expr), true)?;
+        ac.with_values_and_args(c, aggregated, Some(&self.expr), true)?;
         Ok(ac)
     }
     fn apply_multiple_group_aware<'a>(
@@ -385,11 +378,8 @@ impl PhysicalExpr for ApplyExpr {
 
             match self.collect_groups {
                 ApplyOptions::ApplyList => {
-                    let s = self
-                        .eval_and_flatten(&mut [ac.aggregated().into()])?
-                        .as_materialized_series()
-                        .clone();
-                    ac.with_series(s, true, Some(&self.expr))?;
+                    let c = self.eval_and_flatten(&mut [ac.aggregated()])?;
+                    ac.with_values(c, true, Some(&self.expr))?;
                     Ok(ac)
                 },
                 ApplyOptions::GroupWise => self.apply_single_group_aware(ac),
@@ -400,18 +390,12 @@ impl PhysicalExpr for ApplyExpr {
 
             match self.collect_groups {
                 ApplyOptions::ApplyList => {
-                    let mut s = acs
-                        .iter_mut()
-                        .map(|ac| ac.aggregated().into())
-                        .collect::<Vec<_>>();
-                    let s = self
-                        .eval_and_flatten(&mut s)?
-                        .as_materialized_series()
-                        .clone();
+                    let mut c = acs.iter_mut().map(|ac| ac.aggregated()).collect::<Vec<_>>();
+                    let c = self.eval_and_flatten(&mut c)?;
                     // take the first aggregation context that as that is the input series
                     let mut ac = acs.swap_remove(0);
                     ac.with_update_groups(UpdateGroups::WithGroupsLen);
-                    ac.with_series(s, true, Some(&self.expr))?;
+                    ac.with_values(c, true, Some(&self.expr))?;
                     Ok(ac)
                 },
                 ApplyOptions::GroupWise => self.apply_multiple_group_aware(acs, df),
@@ -487,7 +471,7 @@ fn apply_multiple_elementwise<'a>(
 
             let other = acs[1..]
                 .iter()
-                .map(|ac| ac.flat_naive().into_owned().into())
+                .map(|ac| ac.flat_naive().into_owned())
                 .collect::<Vec<_>>();
 
             let out = ca.apply_to_inner(&|s| {
@@ -501,14 +485,14 @@ fn apply_multiple_elementwise<'a>(
                     .clone())
             })?;
             let mut ac = acs.swap_remove(0);
-            ac.with_series(out.into_series(), true, None)?;
+            ac.with_values(out.into_column(), true, None)?;
             Ok(ac)
         },
         first_as => {
             let check_lengths = check_lengths && !matches!(first_as, AggState::Literal(_));
             let aggregated = acs.iter().all(|ac| ac.is_aggregated() | ac.is_literal())
                 && acs.iter().any(|ac| ac.is_aggregated());
-            let mut s = acs
+            let mut c = acs
                 .iter_mut()
                 .enumerate()
                 .map(|(i, ac)| {
@@ -523,19 +507,15 @@ fn apply_multiple_elementwise<'a>(
                 .map(Column::from)
                 .collect::<Vec<_>>();
 
-            let input_len = s[0].len();
-            let s = function
-                .call_udf(&mut s)?
-                .unwrap()
-                .as_materialized_series()
-                .clone();
+            let input_len = c[0].len();
+            let c = function.call_udf(&mut c)?.unwrap();
             if check_lengths {
-                check_map_output_len(input_len, s.len(), expr)?;
+                check_map_output_len(input_len, c.len(), expr)?;
             }
 
             // Take the first aggregation context that as that is the input series.
             let mut ac = acs.swap_remove(0);
-            ac.with_series_and_args(s, aggregated, None, true)?;
+            ac.with_values_and_args(c, aggregated, None, true)?;
             Ok(ac)
         },
     }
diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs
index 10f217844ab1..0976afc5e608 100644
--- a/crates/polars-expr/src/expressions/binary.rs
+++ b/crates/polars-expr/src/expressions/binary.rs
@@ -121,14 +121,14 @@ impl BinaryExpr {
         aggregated: bool,
     ) -> PolarsResult<AggregationContext<'a>> {
         // We want to be able to mutate in place, so we take the lhs to make sure that we drop.
-        let lhs = ac_l.series().clone();
-        let rhs = ac_r.series().clone();
+        let lhs = ac_l.get_values().clone();
+        let rhs = ac_r.get_values().clone();
 
         // Drop lhs so that we might operate in place.
         drop(ac_l.take());
 
-        let out = apply_operator_owned(lhs.into_column(), rhs.into_column(), self.op)?;
-        ac_l.with_series(out.take_materialized_series(), aggregated, Some(&self.expr))?;
+        let out = apply_operator_owned(lhs, rhs, self.op)?;
+        ac_l.with_values(out, aggregated, Some(&self.expr))?;
         Ok(ac_l)
     }
 
@@ -137,20 +137,20 @@ impl BinaryExpr {
         mut ac_l: AggregationContext<'a>,
         mut ac_r: AggregationContext<'a>,
     ) -> PolarsResult<AggregationContext<'a>> {
-        let name = ac_l.series().name().clone();
+        let name = ac_l.get_values().name().clone();
         ac_l.groups();
         ac_r.groups();
         polars_ensure!(ac_l.groups.len() == ac_r.groups.len(), ComputeError: "lhs and rhs should have same group length");
-        let left_s = ac_l.series().rechunk().into_column();
-        let right_s = ac_r.series().rechunk().into_column();
-        let res_s = apply_operator(&left_s, &right_s, self.op)?;
+        let left_c = ac_l.get_values().rechunk().into_column();
+        let right_c = ac_r.get_values().rechunk().into_column();
+        let res_c = apply_operator(&left_c, &right_c, self.op)?;
         ac_l.with_update_groups(UpdateGroups::WithSeriesLen);
-        let res_s = if res_s.len() == 1 {
-            res_s.new_from_index(0, ac_l.groups.len())
+        let res_s = if res_c.len() == 1 {
+            res_c.new_from_index(0, ac_l.groups.len())
         } else {
-            ListChunked::full(name, res_s.as_materialized_series(), ac_l.groups.len()).into_column()
+            ListChunked::full(name, res_c.as_materialized_series(), ac_l.groups.len()).into_column()
         };
-        ac_l.with_series(res_s.take_materialized_series(), true, Some(&self.expr))?;
+        ac_l.with_values(res_s, true, Some(&self.expr))?;
         Ok(ac_l)
     }
 
@@ -159,7 +159,7 @@ impl BinaryExpr {
         mut ac_l: AggregationContext<'a>,
         mut ac_r: AggregationContext<'a>,
     ) -> PolarsResult<AggregationContext<'a>> {
-        let name = ac_l.series().name().clone();
+        let name = ac_l.get_values().name().clone();
         let ca = ac_l
             .iter_groups(false)
             .zip(ac_r.iter_groups(false))
@@ -175,7 +175,7 @@ impl BinaryExpr {
             .with_name(name);
 
         ac_l.with_update_groups(UpdateGroups::WithSeriesLen);
-        ac_l.with_agg_state(AggState::AggregatedList(ca.into_series()));
+        ac_l.with_agg_state(AggState::AggregatedList(ca.into_column()));
         Ok(ac_l)
     }
 }
@@ -260,7 +260,7 @@ impl PhysicalExpr for BinaryExpr {
                     apply_operator(&lhs.into_column(), &rhs.get_inner().into_column(), self.op)
                         .map(|c| c.take_materialized_series())
                 })?;
-                ac_l.with_series(out.into_series(), true, Some(&self.expr))?;
+                ac_l.with_values(out.into_column(), true, Some(&self.expr))?;
                 Ok(ac_l)
             },
             _ => self.apply_group_aware(ac_l, ac_r),
diff --git a/crates/polars-expr/src/expressions/cast.rs b/crates/polars-expr/src/expressions/cast.rs
index 95f0c9eebee5..1bc230ceab8f 100644
--- a/crates/polars-expr/src/expressions/cast.rs
+++ b/crates/polars-expr/src/expressions/cast.rs
@@ -59,14 +59,14 @@ impl PhysicalExpr for CastExpr {
                     self.finish(&s.into_column())
                         .map(|c| c.take_materialized_series())
                 })?;
-                ac.with_series(casted.into_series(), true, None)?;
+                ac.with_values(casted.into_column(), true, None)?;
             },
             AggState::AggregatedScalar(s) => {
                 let s = self.finish(&s.clone().into_column())?;
                 if ac.is_literal() {
-                    ac.with_literal(s.take_materialized_series());
+                    ac.with_literal(s);
                 } else {
-                    ac.with_series(s.take_materialized_series(), true, None)?;
+                    ac.with_values(s, true, None)?;
                 }
             },
             _ => {
@@ -77,9 +77,9 @@ impl PhysicalExpr for CastExpr {
                 let s = self.finish(&s.as_ref().clone().into_column())?;
 
                 if ac.is_literal() {
-                    ac.with_literal(s.take_materialized_series());
+                    ac.with_literal(s);
                 } else {
-                    ac.with_series(s.take_materialized_series(), false, None)?;
+                    ac.with_values(s, false, None)?;
                 }
             },
         }
diff --git a/crates/polars-expr/src/expressions/column.rs b/crates/polars-expr/src/expressions/column.rs
index 2142d22df6d9..99b5ba9fe262 100644
--- a/crates/polars-expr/src/expressions/column.rs
+++ b/crates/polars-expr/src/expressions/column.rs
@@ -21,9 +21,9 @@ impl ColumnExpr {
 impl ColumnExpr {
     fn check_external_context(
         &self,
-        out: PolarsResult<Series>,
+        out: PolarsResult<Column>,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         match out {
             Ok(col) => Ok(col),
             Err(e) => {
@@ -33,7 +33,7 @@ impl ColumnExpr {
                     for df in state.ext_contexts.as_ref() {
                         let out = df.column(&self.name);
                         if out.is_ok() {
-                            return out.map(Column::as_materialized_series).cloned();
+                            return out.cloned();
                         }
                     }
                     Err(e)
@@ -44,12 +44,12 @@ impl ColumnExpr {
 
     fn process_by_idx(
         &self,
-        out: &Series,
+        out: &Column,
         _state: &ExecutionState,
         _schema: &Schema,
         df: &DataFrame,
         check_state_schema: bool,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         if out.name() != &*self.name {
             if check_state_schema {
                 if let Some(schema) = _state.get_schema() {
@@ -75,9 +75,7 @@ impl ColumnExpr {
             // in release we fallback to linear search
             #[allow(unreachable_code)]
             {
-                df.column(&self.name)
-                    .map(Column::as_materialized_series)
-                    .cloned()
+                df.column(&self.name).cloned()
             }
         } else {
             Ok(out.clone())
@@ -88,7 +86,7 @@ impl ColumnExpr {
         df: &DataFrame,
         _state: &ExecutionState,
         _panic_during_test: bool,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         #[cfg(feature = "panic_on_schema")]
         {
             if _panic_during_test
@@ -100,9 +98,7 @@ impl ColumnExpr {
         }
         // in release we fallback to linear search
         #[allow(unreachable_code)]
-        df.column(&self.name)
-            .map(Column::as_materialized_series)
-            .cloned()
+        df.column(&self.name).cloned()
     }
 
     fn process_from_state_schema(
@@ -110,19 +106,17 @@ impl ColumnExpr {
         df: &DataFrame,
         state: &ExecutionState,
         schema: &Schema,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         match schema.get_full(&self.name) {
             None => self.process_by_linear_search(df, state, true),
             Some((idx, _, _)) => match df.get_columns().get(idx) {
-                Some(out) => {
-                    self.process_by_idx(out.as_materialized_series(), state, schema, df, false)
-                },
+                Some(out) => self.process_by_idx(out, state, schema, df, false),
                 None => self.process_by_linear_search(df, state, true),
             },
         }
     }
 
-    fn process_cse(&self, df: &DataFrame, schema: &Schema) -> PolarsResult<Series> {
+    fn process_cse(&self, df: &DataFrame, schema: &Schema) -> PolarsResult<Column> {
         // The CSE columns are added on the rhs.
         let offset = schema.len();
         let columns = &df.get_columns()[offset..];
@@ -131,7 +125,6 @@ impl ColumnExpr {
             .iter()
             .find(|s| s.name() == &self.name)
             .unwrap()
-            .as_materialized_series()
             .clone())
     }
 }
@@ -146,13 +139,7 @@ impl PhysicalExpr for ColumnExpr {
                 // check if the schema was correct
                 // if not do O(n) search
                 match df.get_columns().get(idx) {
-                    Some(out) => self.process_by_idx(
-                        out.as_materialized_series(),
-                        state,
-                        &self.schema,
-                        df,
-                        true,
-                    ),
+                    Some(out) => self.process_by_idx(out, state, &self.schema, df, true),
                     None => {
                         // partitioned group_by special case
                         if let Some(schema) = state.get_schema() {
@@ -183,12 +170,8 @@ impl PhysicalExpr for ColumnExpr {
         groups: &'a GroupsProxy,
         state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
-        let s = self.evaluate(df, state)?;
-        Ok(AggregationContext::new(
-            s.take_materialized_series(),
-            Cow::Borrowed(groups),
-            false,
-        ))
+        let c = self.evaluate(df, state)?;
+        Ok(AggregationContext::new(c, Cow::Borrowed(groups), false))
     }
 
     fn as_partitioned_aggregator(&self) -> Option<&dyn PartitionedAggregation> {
diff --git a/crates/polars-expr/src/expressions/count.rs b/crates/polars-expr/src/expressions/count.rs
index 6102caf5a354..db25f0d9e73b 100644
--- a/crates/polars-expr/src/expressions/count.rs
+++ b/crates/polars-expr/src/expressions/count.rs
@@ -32,8 +32,8 @@ impl PhysicalExpr for CountExpr {
         _state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
         let ca = groups.group_count().with_name(PlSmallStr::from_static(LEN));
-        let s = ca.into_series();
-        Ok(AggregationContext::new(s, Cow::Borrowed(groups), true))
+        let c = ca.into_column();
+        Ok(AggregationContext::new(c, Cow::Borrowed(groups), true))
     }
 
     fn to_field(&self, _input_schema: &Schema) -> PolarsResult<Field> {
diff --git a/crates/polars-expr/src/expressions/filter.rs b/crates/polars-expr/src/expressions/filter.rs
index 6f847a7fa8ed..f2b1383059ee 100644
--- a/crates/polars-expr/src/expressions/filter.rs
+++ b/crates/polars-expr/src/expressions/filter.rs
@@ -73,7 +73,7 @@ impl PhysicalExpr for FilterExpr {
                         .with_name(s.name().clone())
                 }
             };
-            ac_s.with_series(out.into_series(), true, Some(&self.expr))?;
+            ac_s.with_values(out.into_column(), true, Some(&self.expr))?;
             ac_s.update_groups = WithSeriesLen;
             Ok(ac_s)
         } else {
diff --git a/crates/polars-expr/src/expressions/gather.rs b/crates/polars-expr/src/expressions/gather.rs
index 19a0e35ff315..5c0ccae4f2bc 100644
--- a/crates/polars-expr/src/expressions/gather.rs
+++ b/crates/polars-expr/src/expressions/gather.rs
@@ -2,7 +2,7 @@ use arrow::legacy::utils::CustomIterTools;
 use polars_core::chunked_array::builder::get_list_builder;
 use polars_core::prelude::*;
 use polars_core::utils::NoNull;
-use polars_ops::prelude::{convert_to_unsigned_index, is_positive_idx_uncertain};
+use polars_ops::prelude::{convert_to_unsigned_index, is_positive_idx_uncertain_col};
 
 use super::*;
 use crate::expressions::{AggState, AggregationContext, PhysicalExpr, UpdateGroups};
@@ -33,14 +33,14 @@ impl PhysicalExpr for GatherExpr {
         let mut ac = self.phys_expr.evaluate_on_groups(df, groups, state)?;
         let mut idx = self.idx.evaluate_on_groups(df, groups, state)?;
 
-        let s_idx = idx.series();
-        match s_idx.dtype() {
+        let c_idx = idx.get_values();
+        match c_idx.dtype() {
             DataType::List(inner) => {
                 polars_ensure!(inner.is_integer(), InvalidOperation: "expected numeric dtype as index, got {:?}", inner)
             },
             dt if dt.is_integer() => {
                 // Unsigned integers will fall through and will use faster paths.
-                if !is_positive_idx_uncertain(s_idx) {
+                if !is_positive_idx_uncertain_col(c_idx) {
                     return self.process_negative_indices_agg(ac, idx, groups);
                 }
             },
@@ -80,10 +80,10 @@ impl PhysicalExpr for GatherExpr {
                 .map(|(s, idx)| Some(s?.as_ref().take(idx?.as_ref().idx().unwrap())))
                 .map(|opt_res| opt_res.transpose())
                 .collect::<PolarsResult<ListChunked>>()?
-                .with_name(ac.series().name().clone())
+                .with_name(ac.get_values().name().clone())
         };
 
-        ac.with_series(taken.into_series(), true, Some(&self.expr))?;
+        ac.with_values(taken.into_column(), true, Some(&self.expr))?;
         ac.with_update_groups(UpdateGroups::WithSeriesLen);
         Ok(ac)
     }
@@ -162,10 +162,10 @@ impl GatherExpr {
             let taken = if self.returns_scalar {
                 taken
             } else {
-                taken.as_list().into_series()
+                taken.as_list().into_column()
             };
 
-            ac.with_series(taken, true, Some(&self.expr))?;
+            ac.with_values(taken, true, Some(&self.expr))?;
             Ok(ac)
         } else {
             self.gather_aggregated_expensive(ac, idx)
@@ -183,7 +183,7 @@ impl GatherExpr {
             .unwrap()
             .try_apply_amortized(|s| s.as_ref().take(idx))?;
 
-        ac.with_series(out.into_series(), true, Some(&self.expr))?;
+        ac.with_values(out.into_column(), true, Some(&self.expr))?;
         ac.with_update_groups(UpdateGroups::WithGroupsLen);
         Ok(ac)
     }
@@ -228,10 +228,10 @@ impl GatherExpr {
                     let taken = if self.returns_scalar {
                         taken
                     } else {
-                        taken.as_list().into_series()
+                        taken.as_list().into_column()
                     };
 
-                    ac.with_series(taken, true, Some(&self.expr))?;
+                    ac.with_values(taken, true, Some(&self.expr))?;
                     ac.with_update_groups(UpdateGroups::WithGroupsLen);
                     Ok(ac)
                 },
@@ -249,9 +249,9 @@ impl GatherExpr {
     ) -> PolarsResult<AggregationContext<'b>> {
         let mut builder = get_list_builder(
             &ac.dtype(),
-            idx.series().len(),
+            idx.get_values().len(),
             groups.len(),
-            ac.series().name().clone(),
+            ac.get_values().name().clone(),
         );
 
         let iter = ac.iter_groups(false).zip(idx.iter_groups(false));
@@ -265,7 +265,7 @@ impl GatherExpr {
                 _ => builder.append_null(),
             };
         }
-        let out = builder.finish().into_series();
+        let out = builder.finish().into_column();
         ac.with_agg_state(AggState::AggregatedList(out));
         Ok(ac)
     }
diff --git a/crates/polars-expr/src/expressions/group_iter.rs b/crates/polars-expr/src/expressions/group_iter.rs
index b42851e49d2a..31a694fe4a86 100644
--- a/crates/polars-expr/src/expressions/group_iter.rs
+++ b/crates/polars-expr/src/expressions/group_iter.rs
@@ -12,45 +12,45 @@ impl AggregationContext<'_> {
         match self.agg_state() {
             AggState::Literal(_) => {
                 self.groups();
-                let s = self.series().rechunk();
+                let c = self.get_values().rechunk();
                 let name = if keep_names {
-                    s.name().clone()
+                    c.name().clone()
                 } else {
                     PlSmallStr::EMPTY
                 };
                 // SAFETY: dtype is correct
                 unsafe {
                     Box::new(LitIter::new(
-                        s.array_ref(0).clone(),
+                        c.as_materialized_series().array_ref(0).clone(),
                         self.groups.len(),
-                        s._dtype(),
+                        c.dtype(),
                         name,
                     ))
                 }
             },
             AggState::AggregatedScalar(_) => {
                 self.groups();
-                let s = self.series();
+                let c = self.get_values();
                 let name = if keep_names {
-                    s.name().clone()
+                    c.name().clone()
                 } else {
                     PlSmallStr::EMPTY
                 };
                 // SAFETY: dtype is correct
                 unsafe {
                     Box::new(FlatIter::new(
-                        s.chunks(),
+                        c.as_materialized_series().chunks(),
                         self.groups.len(),
-                        s.dtype(),
+                        c.dtype(),
                         name,
                     ))
                 }
             },
             AggState::AggregatedList(_) => {
-                let s = self.series();
-                let list = s.list().unwrap();
+                let c = self.get_values();
+                let list = c.list().unwrap();
                 let name = if keep_names {
-                    s.name().clone()
+                    c.name().clone()
                 } else {
                     PlSmallStr::EMPTY
                 };
@@ -59,10 +59,10 @@ impl AggregationContext<'_> {
             AggState::NotAggregated(_) => {
                 // we don't take the owned series as we want a reference
                 let _ = self.aggregated();
-                let s = self.series();
-                let list = s.list().unwrap();
+                let c = self.get_values();
+                let list = c.list().unwrap();
                 let name = if keep_names {
-                    s.name().clone()
+                    c.name().clone()
                 } else {
                     PlSmallStr::EMPTY
                 };
diff --git a/crates/polars-expr/src/expressions/literal.rs b/crates/polars-expr/src/expressions/literal.rs
index 0ab9ad9872b3..e2ea2f6d0f90 100644
--- a/crates/polars-expr/src/expressions/literal.rs
+++ b/crates/polars-expr/src/expressions/literal.rs
@@ -139,10 +139,7 @@ impl PhysicalExpr for LiteralExpr {
         state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
         let s = self.evaluate(df, state)?;
-        Ok(AggregationContext::from_literal(
-            s.take_materialized_series(),
-            Cow::Borrowed(groups),
-        ))
+        Ok(AggregationContext::from_literal(s, Cow::Borrowed(groups)))
     }
 
     fn as_partitioned_aggregator(&self) -> Option<&dyn PartitionedAggregation> {
diff --git a/crates/polars-expr/src/expressions/mod.rs b/crates/polars-expr/src/expressions/mod.rs
index 277afddb41f2..70963dde7eec 100644
--- a/crates/polars-expr/src/expressions/mod.rs
+++ b/crates/polars-expr/src/expressions/mod.rs
@@ -48,28 +48,28 @@ use crate::state::ExecutionState;
 
 #[derive(Clone, Debug)]
 pub enum AggState {
-    /// Already aggregated: `.agg_list(group_tuples`) is called
+    /// Already aggregated: `.agg_list(group_tuples)` is called
     /// and produced a `Series` of dtype `List`
-    AggregatedList(Series),
+    AggregatedList(Column),
     /// Already aggregated: `.agg` is called on an aggregation
     /// that produces a scalar.
     /// think of `sum`, `mean`, `variance` like aggregations.
-    AggregatedScalar(Series),
+    AggregatedScalar(Column),
     /// Not yet aggregated: `agg_list` still has to be called.
-    NotAggregated(Series),
-    Literal(Series),
+    NotAggregated(Column),
+    Literal(Column),
 }
 
 impl AggState {
     fn try_map<F>(&self, func: F) -> PolarsResult<Self>
     where
-        F: FnOnce(&Series) -> PolarsResult<Series>,
+        F: FnOnce(&Column) -> PolarsResult<Column>,
     {
         Ok(match self {
-            AggState::AggregatedList(s) => AggState::AggregatedList(func(s)?),
-            AggState::AggregatedScalar(s) => AggState::AggregatedScalar(func(s)?),
-            AggState::Literal(s) => AggState::Literal(func(s)?),
-            AggState::NotAggregated(s) => AggState::NotAggregated(func(s)?),
+            AggState::AggregatedList(c) => AggState::AggregatedList(func(c)?),
+            AggState::AggregatedScalar(c) => AggState::AggregatedScalar(func(c)?),
+            AggState::Literal(c) => AggState::Literal(func(c)?),
+            AggState::NotAggregated(c) => AggState::NotAggregated(func(c)?),
         })
     }
 }
@@ -152,14 +152,14 @@ impl<'a> AggregationContext<'a> {
                 self.update_groups = UpdateGroups::No;
             },
             UpdateGroups::WithSeriesLen => {
-                let s = self.series().clone();
-                self.det_groups_from_list(&s);
+                let s = self.get_values().clone();
+                self.det_groups_from_list(s.as_materialized_series());
             },
         }
         &self.groups
     }
 
-    pub(crate) fn series(&self) -> &Series {
+    pub(crate) fn get_values(&self) -> &Column {
         match &self.state {
             AggState::NotAggregated(s)
             | AggState::AggregatedScalar(s)
@@ -191,20 +191,20 @@ impl<'a> AggregationContext<'a> {
     /// - `aggregated` sets if the Series is a list due to aggregation (could also be a list because its
     ///   the columns dtype)
     fn new(
-        series: Series,
+        column: Column,
         groups: Cow<'a, GroupsProxy>,
         aggregated: bool,
     ) -> AggregationContext<'a> {
-        let series = match (aggregated, series.dtype()) {
+        let series = match (aggregated, column.dtype()) {
             (true, &DataType::List(_)) => {
-                assert_eq!(series.len(), groups.len());
-                AggState::AggregatedList(series)
+                assert_eq!(column.len(), groups.len());
+                AggState::AggregatedList(column)
             },
             (true, _) => {
-                assert_eq!(series.len(), groups.len());
-                AggState::AggregatedScalar(series)
+                assert_eq!(column.len(), groups.len());
+                AggState::AggregatedScalar(column)
             },
-            _ => AggState::NotAggregated(series),
+            _ => AggState::NotAggregated(column),
         };
 
         Self {
@@ -230,7 +230,7 @@ impl<'a> AggregationContext<'a> {
         }
     }
 
-    fn from_literal(lit: Series, groups: Cow<'a, GroupsProxy>) -> AggregationContext<'a> {
+    fn from_literal(lit: Column, groups: Cow<'a, GroupsProxy>) -> AggregationContext<'a> {
         Self {
             state: AggState::Literal(lit),
             groups,
@@ -283,7 +283,7 @@ impl<'a> AggregationContext<'a> {
             },
             _ => {
                 let groups = {
-                    self.series()
+                    self.get_values()
                         .list()
                         .expect("impl error, should be a list at this point")
                         .amortized_iter()
@@ -312,27 +312,27 @@ impl<'a> AggregationContext<'a> {
     /// # Arguments
     /// - `aggregated` sets if the Series is a list due to aggregation (could also be a list because its
     ///   the columns dtype)
-    pub(crate) fn with_series(
+    pub(crate) fn with_values(
         &mut self,
-        series: Series,
+        column: Column,
         aggregated: bool,
         expr: Option<&Expr>,
     ) -> PolarsResult<&mut Self> {
-        self.with_series_and_args(series, aggregated, expr, false)
+        self.with_values_and_args(column, aggregated, expr, false)
     }
 
-    pub(crate) fn with_series_and_args(
+    pub(crate) fn with_values_and_args(
         &mut self,
-        series: Series,
+        column: Column,
         aggregated: bool,
         expr: Option<&Expr>,
         // if the applied function was a `map` instead of an `apply`
         // this will keep functions applied over literals as literals: F(lit) = lit
         mapped: bool,
     ) -> PolarsResult<&mut Self> {
-        self.state = match (aggregated, series.dtype()) {
+        self.state = match (aggregated, column.dtype()) {
             (true, &DataType::List(_)) => {
-                if series.len() != self.groups.len() {
+                if column.len() != self.groups.len() {
                     let fmt_expr = if let Some(e) = expr {
                         format!("'{e:?}' ")
                     } else {
@@ -342,30 +342,30 @@ impl<'a> AggregationContext<'a> {
                         ComputeError:
                         "aggregation expression '{}' produced a different number of elements: {} \
                         than the number of groups: {} (this is likely invalid)",
-                        fmt_expr, series.len(), self.groups.len(),
+                        fmt_expr, column.len(), self.groups.len(),
                     );
                 }
-                AggState::AggregatedList(series)
+                AggState::AggregatedList(column)
             },
-            (true, _) => AggState::AggregatedScalar(series),
+            (true, _) => AggState::AggregatedScalar(column),
             _ => {
                 match self.state {
                     // already aggregated to sum, min even this series was flattened it never could
                     // retrieve the length before grouping, so it stays  in this state.
-                    AggState::AggregatedScalar(_) => AggState::AggregatedScalar(series),
+                    AggState::AggregatedScalar(_) => AggState::AggregatedScalar(column),
                     // applying a function on a literal, keeps the literal state
-                    AggState::Literal(_) if series.len() == 1 && mapped => {
-                        AggState::Literal(series)
+                    AggState::Literal(_) if column.len() == 1 && mapped => {
+                        AggState::Literal(column)
                     },
-                    _ => AggState::NotAggregated(series),
+                    _ => AggState::NotAggregated(column.into_column()),
                 }
             },
         };
         Ok(self)
     }
 
-    pub(crate) fn with_literal(&mut self, series: Series) -> &mut Self {
-        self.state = AggState::Literal(series);
+    pub(crate) fn with_literal(&mut self, column: Column) -> &mut Self {
+        self.state = AggState::Literal(column);
         self
     }
 
@@ -373,7 +373,7 @@ impl<'a> AggregationContext<'a> {
     pub(crate) fn with_groups(&mut self, groups: GroupsProxy) -> &mut Self {
         if let AggState::AggregatedList(_) = self.agg_state() {
             // In case of new groups, a series always needs to be flattened
-            self.with_series(self.flat_naive().into_owned(), false, None)
+            self.with_values(self.flat_naive().into_owned(), false, None)
                 .unwrap();
         }
         self.groups = Cow::Owned(groups);
@@ -383,7 +383,7 @@ impl<'a> AggregationContext<'a> {
     }
 
     /// Get the aggregated version of the series.
-    pub fn aggregated(&mut self) -> Series {
+    pub fn aggregated(&mut self) -> Column {
         // we clone, because we only want to call `self.groups()` if needed.
         // self groups may instantiate new groups and thus can be expensive.
         match self.state.clone() {
@@ -409,7 +409,7 @@ impl<'a> AggregationContext<'a> {
                 self.update_groups = UpdateGroups::WithGroupsLen;
                 out
             },
-            AggState::AggregatedList(s) | AggState::AggregatedScalar(s) => s,
+            AggState::AggregatedList(s) | AggState::AggregatedScalar(s) => s.into_column(),
             AggState::Literal(s) => {
                 self.groups();
                 let rows = self.groups.len();
@@ -421,21 +421,21 @@ impl<'a> AggregationContext<'a> {
                     ])
                     .unwrap();
                 self.state = AggState::AggregatedList(out.clone());
-                out
+                out.into_column()
             },
         }
     }
 
     /// Get the final aggregated version of the series.
-    pub fn finalize(&mut self) -> Series {
+    pub fn finalize(&mut self) -> Column {
         // we clone, because we only want to call `self.groups()` if needed.
         // self groups may instantiate new groups and thus can be expensive.
         match &self.state {
-            AggState::Literal(s) => {
-                let s = s.clone();
+            AggState::Literal(c) => {
+                let c = c.clone();
                 self.groups();
                 let rows = self.groups.len();
-                s.new_from_index(0, rows)
+                c.new_from_index(0, rows)
             },
             _ => self.aggregated(),
         }
@@ -452,15 +452,15 @@ impl<'a> AggregationContext<'a> {
         }
     }
 
-    pub fn get_final_aggregation(mut self) -> (Series, Cow<'a, GroupsProxy>) {
+    pub fn get_final_aggregation(mut self) -> (Column, Cow<'a, GroupsProxy>) {
         let _ = self.groups();
         let groups = self.groups;
         match self.state {
-            AggState::NotAggregated(s) => (s, groups),
-            AggState::AggregatedScalar(s) => (s, groups),
-            AggState::Literal(s) => (s, groups),
-            AggState::AggregatedList(s) => {
-                let flattened = s.explode().unwrap();
+            AggState::NotAggregated(c) => (c, groups),
+            AggState::AggregatedScalar(c) => (c, groups),
+            AggState::Literal(c) => (c, groups),
+            AggState::AggregatedList(c) => {
+                let flattened = c.explode().unwrap();
                 let groups = groups.into_owned();
                 // unroll the possible flattened state
                 // say we have groups with overlapping windows:
@@ -496,10 +496,10 @@ impl<'a> AggregationContext<'a> {
     /// Note that we call it naive, because if a previous expr
     /// has filtered or sorted this, this information is in the
     /// group tuples not the flattened series.
-    pub(crate) fn flat_naive(&self) -> Cow<'_, Series> {
+    pub(crate) fn flat_naive(&self) -> Cow<'_, Column> {
         match &self.state {
-            AggState::NotAggregated(s) => Cow::Borrowed(s),
-            AggState::AggregatedList(s) => {
+            AggState::NotAggregated(c) => Cow::Borrowed(c),
+            AggState::AggregatedList(c) => {
                 #[cfg(debug_assertions)]
                 {
                     // panic so we find cases where we accidentally explode overlapping groups
@@ -509,22 +509,22 @@ impl<'a> AggregationContext<'a> {
                     }
                 }
 
-                Cow::Owned(s.explode().unwrap())
+                Cow::Owned(c.explode().unwrap())
             },
-            AggState::AggregatedScalar(s) => Cow::Borrowed(s),
-            AggState::Literal(s) => Cow::Borrowed(s),
+            AggState::AggregatedScalar(c) => Cow::Borrowed(c),
+            AggState::Literal(c) => Cow::Borrowed(c),
         }
     }
 
     /// Take the series.
-    pub(crate) fn take(&mut self) -> Series {
-        let s = match &mut self.state {
-            AggState::NotAggregated(s)
-            | AggState::AggregatedScalar(s)
-            | AggState::AggregatedList(s) => s,
-            AggState::Literal(s) => s,
+    pub(crate) fn take(&mut self) -> Column {
+        let c = match &mut self.state {
+            AggState::NotAggregated(c)
+            | AggState::AggregatedScalar(c)
+            | AggState::AggregatedList(c) => c,
+            AggState::Literal(c) => c,
         };
-        std::mem::take(s)
+        std::mem::take(c)
     }
 }
 
diff --git a/crates/polars-expr/src/expressions/slice.rs b/crates/polars-expr/src/expressions/slice.rs
index 2b805edd1bb0..0c2688d7999a 100644
--- a/crates/polars-expr/src/expressions/slice.rs
+++ b/crates/polars-expr/src/expressions/slice.rs
@@ -1,5 +1,5 @@
 use polars_core::prelude::*;
-use polars_core::utils::{slice_offsets, Container, CustomIterTools};
+use polars_core::utils::{slice_offsets, CustomIterTools};
 use polars_core::POOL;
 use rayon::prelude::*;
 use AnyValue::Null;
@@ -14,7 +14,7 @@ pub struct SliceExpr {
     pub(crate) expr: Expr,
 }
 
-fn extract_offset(offset: &Series, expr: &Expr) -> PolarsResult<i64> {
+fn extract_offset(offset: &Column, expr: &Expr) -> PolarsResult<i64> {
     polars_ensure!(
         offset.len() <= 1, expr = expr, ComputeError:
         "invalid argument to slice; expected an offset literal, got series of length {}",
@@ -25,7 +25,7 @@ fn extract_offset(offset: &Series, expr: &Expr) -> PolarsResult<i64> {
     )
 }
 
-fn extract_length(length: &Series, expr: &Expr) -> PolarsResult<usize> {
+fn extract_length(length: &Column, expr: &Expr) -> PolarsResult<usize> {
     polars_ensure!(
         length.len() <= 1, expr = expr, ComputeError:
         "invalid argument to slice; expected a length literal, got series of length {}",
@@ -39,11 +39,11 @@ fn extract_length(length: &Series, expr: &Expr) -> PolarsResult<usize> {
     }
 }
 
-fn extract_args(offset: &Series, length: &Series, expr: &Expr) -> PolarsResult<(i64, usize)> {
+fn extract_args(offset: &Column, length: &Column, expr: &Expr) -> PolarsResult<(i64, usize)> {
     Ok((extract_offset(offset, expr)?, extract_length(length, expr)?))
 }
 
-fn check_argument(arg: &Series, groups: &GroupsProxy, name: &str, expr: &Expr) -> PolarsResult<()> {
+fn check_argument(arg: &Column, groups: &GroupsProxy, name: &str, expr: &Expr) -> PolarsResult<()> {
     polars_ensure!(
         !matches!(arg.dtype(), DataType::List(_)), expr = expr, ComputeError:
         "invalid slice argument: cannot use an array as {} argument", name,
@@ -92,11 +92,7 @@ impl PhysicalExpr for SliceExpr {
         let offset = &results[0];
         let length = &results[1];
         let series = &results[2];
-        let (offset, length) = extract_args(
-            offset.as_materialized_series(),
-            length.as_materialized_series(),
-            &self.expr,
-        )?;
+        let (offset, length) = extract_args(offset, length, &self.expr)?;
 
         Ok(series.slice(offset, length))
     }
diff --git a/crates/polars-expr/src/expressions/sort.rs b/crates/polars-expr/src/expressions/sort.rs
index be9fe57e29ce..df816f9b48e7 100644
--- a/crates/polars-expr/src/expressions/sort.rs
+++ b/crates/polars-expr/src/expressions/sort.rs
@@ -63,7 +63,7 @@ impl PhysicalExpr for SortExpr {
             AggState::AggregatedList(s) => {
                 let ca = s.list().unwrap();
                 let out = ca.lst_sort(self.options)?;
-                ac.with_series(out.into_series(), true, Some(&self.expr))?;
+                ac.with_values(out.into_column(), true, Some(&self.expr))?;
             },
             _ => {
                 let series = ac.flat_naive().into_owned();
diff --git a/crates/polars-expr/src/expressions/sortby.rs b/crates/polars-expr/src/expressions/sortby.rs
index fad081cb49ed..ed34ed6414cd 100644
--- a/crates/polars-expr/src/expressions/sortby.rs
+++ b/crates/polars-expr/src/expressions/sortby.rs
@@ -133,8 +133,8 @@ fn sort_by_groups_no_match_single<'a>(
             })
             .collect_ca_with_dtype(PlSmallStr::EMPTY, dtype)
     });
-    let s = ca?.with_name(s_in.name().clone()).into_series();
-    ac_in.with_series(s, true, Some(expr))?;
+    let c = ca?.with_name(s_in.name().clone()).into_column();
+    ac_in.with_values(c, true, Some(expr))?;
     Ok(ac_in)
 }
 
@@ -281,12 +281,16 @@ impl PhysicalExpr for SortByExpr {
             .collect::<PolarsResult<Vec<_>>>()?;
         let mut sort_by_s = ac_sort_by
             .iter()
-            .map(|s| {
-                let s = s.flat_naive();
-                match s.dtype() {
+            .map(|c| {
+                let c = c.flat_naive();
+                match c.dtype() {
                     #[cfg(feature = "dtype-categorical")]
-                    DataType::Categorical(_, _) | DataType::Enum(_, _) => s.into_owned(),
-                    _ => s.to_physical_repr().into_owned(),
+                    DataType::Categorical(_, _) | DataType::Enum(_, _) => {
+                        c.as_materialized_series().clone()
+                    },
+                    // @scalar-opt
+                    // @partition-opt
+                    _ => c.to_physical_repr().take_materialized_series(),
                 }
             })
             .collect::<Vec<_>>();
@@ -363,7 +367,7 @@ impl PhysicalExpr for SortByExpr {
         // group_by operation - we must ensure that we are as well.
         if ordered_by_group_operation {
             let s = ac_in.aggregated();
-            ac_in.with_series(s.explode().unwrap(), false, None)?;
+            ac_in.with_values(s.explode().unwrap(), false, None)?;
         }
 
         ac_in.with_groups(groups);
diff --git a/crates/polars-expr/src/expressions/ternary.rs b/crates/polars-expr/src/expressions/ternary.rs
index 2d1035c22eb7..bbd0c5f7d936 100644
--- a/crates/polars-expr/src/expressions/ternary.rs
+++ b/crates/polars-expr/src/expressions/ternary.rs
@@ -56,13 +56,13 @@ fn finish_as_iters<'a>(
             .transpose()
         })
         .collect::<PolarsResult<ListChunked>>()?
-        .with_name(ac_truthy.series().name().clone());
+        .with_name(ac_truthy.get_values().name().clone());
 
     // Aggregation leaves only a single chunk.
     let arr = ca.downcast_iter().next().unwrap();
     let list_vals_len = arr.values().len();
 
-    let mut out = ca.into_series();
+    let mut out = ca.into_column();
     if ac_truthy.arity_should_explode() && ac_falsy.arity_should_explode() && ac_mask.arity_should_explode() &&
         // Exploded list should be equal to groups length.
         list_vals_len == ac_truthy.groups.len()
@@ -70,7 +70,7 @@ fn finish_as_iters<'a>(
         out = out.explode()?
     }
 
-    ac_truthy.with_series(out, true, None)?;
+    ac_truthy.with_values(out, true, None)?;
     Ok(ac_truthy)
 }
 
@@ -168,8 +168,8 @@ impl PhysicalExpr for TernaryExpr {
             }
 
             let out = ac_truthy
-                .series()
-                .zip_with(ac_mask.series().bool()?, ac_falsy.series())?;
+                .get_values()
+                .zip_with(ac_mask.get_values().bool()?, ac_falsy.get_values())?;
 
             for ac in [&ac_mask, &ac_truthy, &ac_falsy].into_iter() {
                 if matches!(ac.agg_state(), NotAggregated(_)) {
@@ -257,21 +257,21 @@ impl PhysicalExpr for TernaryExpr {
                 }
 
                 let truthy = if let AggregatedList(s) = ac_truthy.agg_state() {
-                    s.list().unwrap().get_inner()
+                    s.list().unwrap().get_inner().into_column()
                 } else {
-                    ac_truthy.series().clone()
+                    ac_truthy.get_values().clone()
                 };
 
                 let falsy = if let AggregatedList(s) = ac_falsy.agg_state() {
-                    s.list().unwrap().get_inner()
+                    s.list().unwrap().get_inner().into_column()
                 } else {
-                    ac_falsy.series().clone()
+                    ac_falsy.get_values().clone()
                 };
 
                 let mask = if let AggregatedList(s) = ac_mask.agg_state() {
-                    s.list().unwrap().get_inner()
+                    s.list().unwrap().get_inner().into_column()
                 } else {
-                    ac_mask.series().clone()
+                    ac_mask.get_values().clone()
                 };
 
                 let out = truthy.zip_with(mask.bool()?, &falsy)?;
@@ -280,8 +280,10 @@ impl PhysicalExpr for TernaryExpr {
                 // offsets buffer of the result, so we construct the result
                 // ListChunked directly from the 2.
                 let out = out.rechunk();
-                let values = out.array_ref(0);
-                let offsets = ac_target.series().list().unwrap().offsets()?;
+                // @scalar-opt
+                // @partition-opt
+                let values = out.as_materialized_series().array_ref(0);
+                let offsets = ac_target.get_values().list().unwrap().offsets()?;
                 let inner_type = out.dtype();
                 let dtype = LargeListArray::default_datatype(values.dtype().clone());
 
@@ -291,11 +293,11 @@ impl PhysicalExpr for TernaryExpr {
                 let mut out = ListChunked::with_chunk(truthy.name().clone(), out);
                 unsafe { out.to_logical(inner_type.clone()) };
 
-                if ac_target.series().list().unwrap()._can_fast_explode() {
+                if ac_target.get_values().list().unwrap()._can_fast_explode() {
                     out.set_fast_explode();
                 };
 
-                let out = out.into_series();
+                let out = out.into_column();
 
                 AggregatedList(out)
             },
@@ -305,8 +307,8 @@ impl PhysicalExpr for TernaryExpr {
                 }
 
                 let out = ac_truthy
-                    .series()
-                    .zip_with(ac_mask.series().bool()?, ac_falsy.series())?;
+                    .get_values()
+                    .zip_with(ac_mask.get_values().bool()?, ac_falsy.get_values())?;
                 AggregatedScalar(out)
             },
             _ => {
diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs
index e15a301f68b4..bbb9a1cface1 100644
--- a/crates/polars-expr/src/expressions/window.rs
+++ b/crates/polars-expr/src/expressions/window.rs
@@ -45,13 +45,13 @@ enum MapStrategy {
 impl WindowExpr {
     fn map_list_agg_by_arg_sort(
         &self,
-        out_column: Series,
-        flattened: Series,
+        out_column: Column,
+        flattened: Column,
         mut ac: AggregationContext,
         gb: GroupBy,
         state: &ExecutionState,
         cache_key: &str,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         // idx (new-idx, original-idx)
         let mut idx_mapping = Vec::with_capacity(out_column.len());
 
@@ -124,14 +124,14 @@ impl WindowExpr {
     fn map_by_arg_sort(
         &self,
         df: &DataFrame,
-        out_column: Series,
-        flattened: Series,
+        out_column: Column,
+        flattened: Column,
         mut ac: AggregationContext,
         group_by_columns: &[Column],
         gb: GroupBy,
         state: &ExecutionState,
         cache_key: &str,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         // we use an arg_sort to map the values back
 
         // This is a bit more complicated because the final group tuples may differ from the original
@@ -656,7 +656,7 @@ impl PhysicalExpr for WindowExpr {
     }
 }
 
-fn materialize_column(join_opt_ids: &ChunkJoinOptIds, out_column: &Series) -> Series {
+fn materialize_column(join_opt_ids: &ChunkJoinOptIds, out_column: &Column) -> Column {
     {
         use arrow::Either;
         use polars_ops::chunked_array::TakeChunked;
@@ -680,11 +680,11 @@ fn cache_gb(gb: GroupBy, state: &ExecutionState, cache_key: &str) {
 
 /// Simple reducing aggregation can be set by the groups
 fn set_by_groups(
-    s: &Series,
+    s: &Column,
     groups: &GroupsProxy,
     len: usize,
     update_groups: bool,
-) -> Option<Series> {
+) -> Option<Column> {
     if update_groups {
         return None;
     }
@@ -697,7 +697,9 @@ fn set_by_groups(
                 Some(set_numeric($ca, groups, len))
             }};
         }
-        downcast_as_macro_arg_physical!(&s, dispatch).map(|s| s.cast(dtype).unwrap())
+        downcast_as_macro_arg_physical!(&s, dispatch)
+            .map(|s| s.cast(dtype).unwrap())
+            .map(Column::from)
     } else {
         None
     }
diff --git a/crates/polars-lazy/src/dsl/list.rs b/crates/polars-lazy/src/dsl/list.rs
index c706ee9b6957..d23d99b90e5c 100644
--- a/crates/polars-lazy/src/dsl/list.rs
+++ b/crates/polars-lazy/src/dsl/list.rs
@@ -138,7 +138,7 @@ fn run_on_group_by_engine(
     let out = match ac.agg_state() {
         AggState::AggregatedScalar(_) => {
             let out = ac.aggregated();
-            out.as_list().into_series()
+            out.as_list().into_column()
         },
         _ => ac.aggregated(),
     };
diff --git a/crates/polars-lazy/src/frame/pivot.rs b/crates/polars-lazy/src/frame/pivot.rs
index 4d89eebef010..70eed4d8f58c 100644
--- a/crates/polars-lazy/src/frame/pivot.rs
+++ b/crates/polars-lazy/src/frame/pivot.rs
@@ -29,7 +29,7 @@ impl PhysicalAggExpr for PivotExpr {
         )?;
         phys_expr
             .evaluate_on_groups(df, groups, &state)
-            .map(|mut ac| ac.aggregated())
+            .map(|mut ac| ac.aggregated().take_materialized_series())
     }
 
     fn root_name(&self) -> PolarsResult<&PlSmallStr> {
diff --git a/crates/polars-mem-engine/src/executors/filter.rs b/crates/polars-mem-engine/src/executors/filter.rs
index 417a7ecf766e..a47e9b6f5ed9 100644
--- a/crates/polars-mem-engine/src/executors/filter.rs
+++ b/crates/polars-mem-engine/src/executors/filter.rs
@@ -10,10 +10,10 @@ pub struct FilterExec {
     streamable: bool,
 }
 
-fn series_to_mask(s: &Series) -> PolarsResult<&BooleanChunked> {
-    s.bool().map_err(|_| {
+fn column_to_mask(c: &Column) -> PolarsResult<&BooleanChunked> {
+    c.bool().map_err(|_| {
         polars_err!(
-            ComputeError: "filter predicate must be of type `Boolean`, got `{}`", s.dtype()
+            ComputeError: "filter predicate must be of type `Boolean`, got `{}`", c.dtype()
         )
     })
 }
@@ -41,11 +41,14 @@ impl FilterExec {
         if self.has_window {
             state.insert_has_window_function_flag()
         }
-        let s = self.predicate.evaluate(&df, state)?;
+        let c = self.predicate.evaluate(&df, state)?;
         if self.has_window {
             state.clear_window_expr_cache()
         }
-        df.filter(series_to_mask(s.as_materialized_series())?)
+
+        // @scalar-opt
+        // @partition-opt
+        df.filter(column_to_mask(&c)?)
     }
 
     fn execute_chunks(
@@ -54,8 +57,11 @@ impl FilterExec {
         state: &ExecutionState,
     ) -> PolarsResult<DataFrame> {
         let iter = chunks.into_par_iter().map(|df| {
-            let s = self.predicate.evaluate(&df, state)?;
-            df.filter(series_to_mask(s.as_materialized_series())?)
+            let c = self.predicate.evaluate(&df, state)?;
+
+            // @scalar-opt
+            // @partition-opt
+            df.filter(column_to_mask(&c)?)
         });
         let df = POOL.install(|| iter.collect::<PolarsResult<Vec<_>>>())?;
         Ok(accumulate_dataframes_vertical_unchecked(df))
diff --git a/crates/polars-mem-engine/src/executors/group_by.rs b/crates/polars-mem-engine/src/executors/group_by.rs
index 1ae612f64d67..437b7fb574aa 100644
--- a/crates/polars-mem-engine/src/executors/group_by.rs
+++ b/crates/polars-mem-engine/src/executors/group_by.rs
@@ -7,7 +7,7 @@ pub(super) fn evaluate_aggs(
     aggs: &[Arc<dyn PhysicalExpr>],
     groups: &GroupsProxy,
     state: &ExecutionState,
-) -> PolarsResult<Vec<Series>> {
+) -> PolarsResult<Vec<Column>> {
     POOL.install(|| {
         aggs.par_iter()
             .map(|expr| {
diff --git a/crates/polars-mem-engine/src/executors/stack.rs b/crates/polars-mem-engine/src/executors/stack.rs
index a93d4fc72d89..0b2dbfd01da3 100644
--- a/crates/polars-mem-engine/src/executors/stack.rs
+++ b/crates/polars-mem-engine/src/executors/stack.rs
@@ -38,12 +38,7 @@ impl StackExec {
                     self.options.run_parallel,
                 )?;
                 // We don't have to do a broadcast check as cse is not allowed to hit this.
-                df._add_series(
-                    res.into_iter()
-                        .map(|c| c.take_materialized_series())
-                        .collect(),
-                    schema,
-                )?;
+                df._add_columns(res.into_iter().collect(), schema)?;
                 Ok(df)
             });
 
@@ -100,12 +95,7 @@ impl StackExec {
                         }
                     }
                 }
-                df._add_series(
-                    res.into_iter()
-                        .map(|v| v.take_materialized_series())
-                        .collect(),
-                    schema,
-                )?;
+                df._add_columns(res.into_iter().collect(), schema)?;
             }
             df
         };
diff --git a/crates/polars-ops/src/series/ops/index.rs b/crates/polars-ops/src/series/ops/index.rs
index 51811cf0c319..b56f499895ff 100644
--- a/crates/polars-ops/src/series/ops/index.rs
+++ b/crates/polars-ops/src/series/ops/index.rs
@@ -1,7 +1,9 @@
 use num_traits::{Signed, Zero};
 use polars_core::error::{polars_ensure, PolarsResult};
 use polars_core::prelude::arity::unary_elementwise_values;
-use polars_core::prelude::{ChunkedArray, DataType, IdxCa, PolarsIntegerType, Series, IDX_DTYPE};
+use polars_core::prelude::{
+    ChunkedArray, Column, DataType, IdxCa, PolarsIntegerType, Series, IDX_DTYPE,
+};
 use polars_utils::index::ToIdx;
 
 fn convert<T>(ca: &ChunkedArray<T>, target_len: usize) -> PolarsResult<IdxCa>
@@ -97,3 +99,10 @@ pub fn is_positive_idx_uncertain(s: &Series) -> bool {
         _ => unreachable!(),
     }
 }
+
+/// May give false negatives because it ignores the null values.
+pub fn is_positive_idx_uncertain_col(c: &Column) -> bool {
+    // @scalar-opt
+    // @partition-opt
+    is_positive_idx_uncertain(c.as_materialized_series())
+}

From 85b8de22a1823459a38b794791903724472e4a35 Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Thu, 14 Nov 2024 16:59:56 +0400
Subject: [PATCH 16/18] feat: Add an `is_literal` method to expression `meta`
 namespace (#19773)

---
 crates/polars-plan/src/dsl/meta.rs            | 18 +++++++++--
 crates/polars-python/src/expr/meta.rs         |  4 +++
 .../source/reference/expressions/meta.rst     |  1 +
 py-polars/polars/expr/meta.py                 | 31 +++++++++++++++++--
 .../unit/operations/namespaces/test_meta.py   | 31 ++++++++++++++++++-
 5 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/crates/polars-plan/src/dsl/meta.rs b/crates/polars-plan/src/dsl/meta.rs
index b23c5181cc13..76a881f08ed1 100644
--- a/crates/polars-plan/src/dsl/meta.rs
+++ b/crates/polars-plan/src/dsl/meta.rs
@@ -83,9 +83,21 @@ impl MetaNameSpace {
             | Expr::IndexColumn(_)
             | Expr::Selector(_)
             | Expr::Wildcard => true,
-            Expr::Alias(_, _) | Expr::KeepName(_) | Expr::RenameAlias { .. } if allow_aliasing => {
-                true
-            },
+            Expr::Alias(_, _) | Expr::KeepName(_) | Expr::RenameAlias { .. } => allow_aliasing,
+            _ => false,
+        })
+    }
+
+    /// Indicate if this expression represents a literal value (optionally aliased).
+    pub fn is_literal(&self, allow_aliasing: bool) -> bool {
+        self.0.into_iter().all(|e| match e {
+            Expr::Literal(_) => true,
+            Expr::Alias(_, _) => allow_aliasing,
+            Expr::Cast {
+                expr,
+                dtype: DataType::Datetime(_, _),
+                options: CastOptions::Strict,
+            } if matches!(&**expr, Expr::Literal(LiteralValue::DateTime(_, _, _))) => true,
             _ => false,
         })
     }
diff --git a/crates/polars-python/src/expr/meta.rs b/crates/polars-python/src/expr/meta.rs
index d0e3a8b3e1df..891d37d26afa 100644
--- a/crates/polars-python/src/expr/meta.rs
+++ b/crates/polars-python/src/expr/meta.rs
@@ -58,6 +58,10 @@ impl PyExpr {
             .is_column_selection(allow_aliasing)
     }
 
+    fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
+        self.inner.clone().meta().is_literal(allow_aliasing)
+    }
+
     fn _meta_selector_add(&self, other: PyExpr) -> PyResult<PyExpr> {
         let out = self
             .inner
diff --git a/py-polars/docs/source/reference/expressions/meta.rst b/py-polars/docs/source/reference/expressions/meta.rst
index 514067e0166f..6e4428381a34 100644
--- a/py-polars/docs/source/reference/expressions/meta.rst
+++ b/py-polars/docs/source/reference/expressions/meta.rst
@@ -13,6 +13,7 @@ The following methods are available under the `expr.meta` attribute.
     Expr.meta.has_multiple_outputs
     Expr.meta.is_column
     Expr.meta.is_column_selection
+    Expr.meta.is_literal
     Expr.meta.is_regex_projection
     Expr.meta.ne
     Expr.meta.output_name
diff --git a/py-polars/polars/expr/meta.py b/py-polars/polars/expr/meta.py
index e6ebc6f40944..d949f7583d14 100644
--- a/py-polars/polars/expr/meta.py
+++ b/py-polars/polars/expr/meta.py
@@ -108,7 +108,7 @@ def is_column_selection(self, *, allow_aliasing: bool = False) -> bool:
         """
         Indicate if this expression only selects columns (optionally with aliasing).
 
-        This can include bare columns, column matches by regex or dtype, selectors
+        This can include bare columns, columns matched by regex or dtype, selectors
         and exclude ops, and (optionally) column/expression aliasing.
 
         .. versionadded:: 0.20.30
@@ -116,7 +116,7 @@ def is_column_selection(self, *, allow_aliasing: bool = False) -> bool:
         Parameters
         ----------
         allow_aliasing
-            If False (default), any aliasing is not considered pure column selection.
+            If False (default), any aliasing is not considered to be column selection.
             Set True to allow for column selection that also includes aliasing.
 
         Examples
@@ -142,6 +142,33 @@ def is_column_selection(self, *, allow_aliasing: bool = False) -> bool:
         """
         return self._pyexpr.meta_is_column_selection(allow_aliasing)
 
+    def is_literal(self, *, allow_aliasing: bool = False) -> bool:
+        """
+        Indicate if this expression is a literal value (optionally aliased).
+
+        .. versionadded:: 1.14
+
+        Parameters
+        ----------
+        allow_aliasing
+            If False (default), only a bare literal will match.
+            Set True to also allow for aliased literals.
+
+        Examples
+        --------
+        >>> from datetime import datetime
+        >>> e = pl.lit(123)
+        >>> e.meta.is_literal()
+        True
+        >>> e = pl.lit(987.654321).alias("foo")
+        >>> e.meta.is_literal()
+        False
+        >>> e = pl.lit(datetime.now()).alias("bar")
+        >>> e.meta.is_literal(allow_aliasing=True)
+        True
+        """
+        return self._pyexpr.meta_is_literal(allow_aliasing)
+
     @overload
     def output_name(self, *, raise_if_undetermined: Literal[True] = True) -> str: ...
 
diff --git a/py-polars/tests/unit/operations/namespaces/test_meta.py b/py-polars/tests/unit/operations/namespaces/test_meta.py
index 38835244557e..5a0c253fbfed 100644
--- a/py-polars/tests/unit/operations/namespaces/test_meta.py
+++ b/py-polars/tests/unit/operations/namespaces/test_meta.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from datetime import date, datetime, time, timedelta
+from typing import TYPE_CHECKING, Any
 
 import pytest
 
@@ -123,6 +124,34 @@ def test_is_column_selection(
         assert not expr.meta.is_column_selection()
 
 
+@pytest.mark.parametrize(
+    "value",
+    [
+        None,
+        1234,
+        567.89,
+        float("inf"),
+        date.today(),
+        datetime.now(),
+        time(10, 30, 45),
+        timedelta(hours=-24),
+        ["x", "y", "z"],
+        pl.Series([None, None]),
+        [[10, 20], [30, 40]],
+        "this is the way",
+    ],
+)
+def test_is_literal(value: Any) -> None:
+    e = pl.lit(value)
+    assert e.meta.is_literal()
+
+    e = pl.lit(value).alias("foo")
+    assert not e.meta.is_literal()
+
+    e = pl.lit(value).alias("foo")
+    assert e.meta.is_literal(allow_aliasing=True)
+
+
 def test_meta_is_regex_projection() -> None:
     e = pl.col("^.*$").name.suffix("_foo")
     assert e.meta.is_regex_projection()

From 97c82d0e45488681a784e0742574c903b4fc2b9d Mon Sep 17 00:00:00 2001
From: nameexhaustion <simonlin.rqmmw@slmail.me>
Date: Fri, 15 Nov 2024 00:07:06 +1100
Subject: [PATCH 17/18] fix: Fix scanning google cloud with service account
 credentials file (#19782)

---
 .../polars/io/cloud/credential_provider.py    | 28 +++++++++++++++++--
 py-polars/polars/meta/versions.py             |  2 ++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py
index 26e8ebf6826e..69f2bdbdf67a 100644
--- a/py-polars/polars/io/cloud/credential_provider.py
+++ b/py-polars/polars/io/cloud/credential_provider.py
@@ -150,8 +150,21 @@ class CredentialProviderGCP(CredentialProvider):
             at any point without it being considered a breaking change.
     """
 
-    def __init__(self) -> None:
-        """Initialize a credential provider for Google Cloud (GCP)."""
+    def __init__(
+        self,
+        *,
+        scopes: Any | None = None,
+        request: Any | None = None,
+        quota_project_id: Any | None = None,
+        default_scopes: Any | None = None,
+    ) -> None:
+        """
+        Initialize a credential provider for Google Cloud (GCP).
+
+        Parameters
+        ----------
+        Parameters are passed to `google.auth.default()`
+        """
         msg = "`CredentialProviderAWS` functionality is considered unstable"
         issue_unstable_warning(msg)
 
@@ -168,7 +181,16 @@ def __init__(self) -> None:
         #
         # So we just bypass it with a __dict__[] (because ruff complains about
         # getattr) :|
-        creds, _ = google.auth.__dict__["default"]()
+        creds, _ = google.auth.__dict__["default"](
+            scopes=(
+                scopes
+                if scopes is not None
+                else ["https://www.googleapis.com/auth/cloud-platform"]
+            ),
+            request=request,
+            quota_project_id=quota_project_id,
+            default_scopes=default_scopes,
+        )
         self.creds = creds
 
     def __call__(self) -> CredentialProviderFunctionReturn:
diff --git a/py-polars/polars/meta/versions.py b/py-polars/polars/meta/versions.py
index 425f01d91a85..f9f631dac5fd 100644
--- a/py-polars/polars/meta/versions.py
+++ b/py-polars/polars/meta/versions.py
@@ -68,12 +68,14 @@ def _get_dependency_list() -> list[str]:
     return [
         "adbc_driver_manager",
         "altair",
+        "boto3",
         "cloudpickle",
         "connectorx",
         "deltalake",
         "fastexcel",
         "fsspec",
         "gevent",
+        "google.auth",
         "great_tables",
         "matplotlib",
         "nest_asyncio",

From 5f11dd958947d7880a0c092b3c94a4c630a32f20 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Thu, 14 Nov 2024 14:26:16 +0100
Subject: [PATCH 18/18] fix(python): Respect schema_overrides in batched csv
 reader (#19755)

---
 crates/polars-python/src/batched_csv.rs   | 11 +++++------
 py-polars/polars/io/csv/batched_reader.py |  2 +-
 py-polars/tests/unit/io/test_csv.py       | 10 ++++++++++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/crates/polars-python/src/batched_csv.rs b/crates/polars-python/src/batched_csv.rs
index d6a2353c6438..a406d7b6ddf3 100644
--- a/crates/polars-python/src/batched_csv.rs
+++ b/crates/polars-python/src/batched_csv.rs
@@ -23,7 +23,7 @@ impl PyBatchedCsv {
     #[staticmethod]
     #[pyo3(signature = (
         infer_schema_length, chunk_size, has_header, ignore_errors, n_rows, skip_rows,
-        projection, separator, rechunk, columns, encoding, n_threads, path, overwrite_dtype,
+        projection, separator, rechunk, columns, encoding, n_threads, path, schema_overrides,
         overwrite_dtype_slice, low_memory, comment_prefix, quote_char, null_values,
         missing_utf8_is_empty_string, try_parse_dates, skip_rows_after_header, row_index,
         eol_char, raise_if_empty, truncate_ragged_lines, decimal_comma)
@@ -42,7 +42,7 @@ impl PyBatchedCsv {
         encoding: Wrap<CsvEncoding>,
         n_threads: Option<usize>,
         path: PathBuf,
-        overwrite_dtype: Option<Vec<(PyBackedStr, Wrap<DataType>)>>,
+        schema_overrides: Option<Vec<(PyBackedStr, Wrap<DataType>)>>,
         overwrite_dtype_slice: Option<Vec<Wrap<DataType>>>,
         low_memory: bool,
         comment_prefix: Option<&str>,
@@ -73,7 +73,7 @@ impl PyBatchedCsv {
             None
         };
 
-        let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
+        let schema_overrides = schema_overrides.map(|overwrite_dtype| {
             overwrite_dtype
                 .iter()
                 .map(|(name, dtype)| {
@@ -105,6 +105,7 @@ impl PyBatchedCsv {
             .with_n_threads(n_threads)
             .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
             .with_low_memory(low_memory)
+            .with_schema_overwrite(schema_overrides.map(Arc::new))
             .with_skip_rows_after_header(skip_rows_after_header)
             .with_row_index(row_index)
             .with_raise_if_empty(raise_if_empty)
@@ -123,9 +124,7 @@ impl PyBatchedCsv {
             )
             .into_reader_with_file_handle(reader);
 
-        let reader = reader
-            .batched(overwrite_dtype.map(Arc::new))
-            .map_err(PyPolarsErr::from)?;
+        let reader = reader.batched(None).map_err(PyPolarsErr::from)?;
 
         Ok(PyBatchedCsv {
             reader: Mutex::new(reader),
diff --git a/py-polars/polars/io/csv/batched_reader.py b/py-polars/polars/io/csv/batched_reader.py
index e0384f03dde2..4207f476ee5c 100644
--- a/py-polars/polars/io/csv/batched_reader.py
+++ b/py-polars/polars/io/csv/batched_reader.py
@@ -89,7 +89,7 @@ def __init__(
             encoding=encoding,
             n_threads=n_threads,
             path=path,
-            overwrite_dtype=dtype_list,
+            schema_overrides=dtype_list,
             overwrite_dtype_slice=dtype_slice,
             low_memory=low_memory,
             comment_prefix=comment_prefix,
diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py
index 226a4e31e2b9..628dc7587387 100644
--- a/py-polars/tests/unit/io/test_csv.py
+++ b/py-polars/tests/unit/io/test_csv.py
@@ -2346,3 +2346,13 @@ def test_csv_read_time_dtype_overwrite(tmp_path: Path) -> None:
         ),
         df,
     )
+
+
+def test_batched_csv_schema_overrides(io_files_path: Path) -> None:
+    foods = io_files_path / "foods1.csv"
+    batched = pl.read_csv_batched(foods, schema_overrides={"calories": pl.String})
+    res = batched.next_batches(1)
+    assert res is not None
+    b = res[0]
+    assert b["calories"].dtype == pl.String
+    assert b.width == 4