diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
index 232f79fb8947..531add1428e6 100644
--- a/.github/workflows/test-coverage.yml
+++ b/.github/workflows/test-coverage.yml
@@ -96,9 +96,13 @@ jobs:
         with:
           python-version: '3.12'
 
-      - name: Create virtual environment
+      - name: Install uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+
+      - name: Create virtual environment
+        run: |
           uv venv
           echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
           echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> $GITHUB_ENV
@@ -165,7 +169,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-        # Needed to fetch the Codecov config file
+      # Needed to fetch the Codecov config file
       - uses: actions/checkout@v4
 
       - name: Download coverage reports
diff --git a/Cargo.lock b/Cargo.lock
index 5176bd831139..d00cfa7ff0a6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -62,9 +62,9 @@ dependencies = [
 
 [[package]]
 name = "allocator-api2"
-version = "0.2.18"
+version = "0.2.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
+checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9"
 
 [[package]]
 name = "android-tzdata"
@@ -89,15 +89,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstyle"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 
 [[package]]
 name = "anyhow"
-version = "1.0.92"
+version = "1.0.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74f37166d7d48a0284b99dd824694c26119c700b53bf0d1540cdb147dbdaaf13"
+checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775"
 
 [[package]]
 name = "apache-avro"
@@ -120,7 +120,7 @@ dependencies = [
  "snap",
  "strum",
  "strum_macros",
- "thiserror",
+ "thiserror 1.0.69",
  "typed-builder",
  "uuid",
 ]
@@ -206,7 +206,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -217,7 +217,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -265,9 +265,9 @@ dependencies = [
 
 [[package]]
 name = "aws-config"
-version = "1.5.9"
+version = "1.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d6448cfb224dd6a9b9ac734f58622dd0d4751f3589f3b777345745f46b2eb14"
+checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -368,9 +368,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.48.0"
+version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ded855583fa1d22e88fe39fd6062b062376e50a8211989e07cf5e38d52eb3453"
+checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -390,9 +390,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.49.0"
+version = "1.50.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9177ea1192e6601ae16c7273385690d88a7ed386a00b74a6bc894d12103cd933"
+checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -412,9 +412,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.48.0"
+version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "823ef553cf36713c97453e2ddff1eb8f62be7f4523544e2a5db64caf80100f0a"
+checksum = "53dcf5e7d9bd1517b8b998e170e650047cea8a2b85fe1835abe3210713e541b7"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -574,9 +574,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.7.2"
+version = "1.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96"
+checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -591,9 +591,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.8"
+version = "1.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07c9cdc179e6afbf5d391ab08c85eac817b51c87e1892a5edb5f7bbdc64314b4"
+checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -794,7 +794,7 @@ checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -845,9 +845,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.31"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
+checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8"
 dependencies = [
  "jobserver",
  "libc",
@@ -1037,9 +1037,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0"
+checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6"
 dependencies = [
  "libc",
 ]
@@ -1252,6 +1252,17 @@ dependencies = [
  "subtle",
 ]
 
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "doc-comment"
 version = "0.3.3"
@@ -1314,7 +1325,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1369,9 +1380,9 @@ checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
 
 [[package]]
 name = "fastrand"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
+checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
 
 [[package]]
 name = "ff"
@@ -1432,9 +1443,9 @@ dependencies = [
 
 [[package]]
 name = "fs4"
-version = "0.11.0"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adc91b3da7f1a7968b00f9f65a4971252f6a927d3cb9eec05d91cbeaff678f9a"
+checksum = "e871a4cfa68bb224863b53149d973df1ac8d1ed2fa1d1bfc37ac1bb65dd37207"
 dependencies = [
  "rustix",
  "windows-sys 0.52.0",
@@ -1496,7 +1507,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1663,9 +1674,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.0"
+version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -1916,14 +1927,143 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "idna"
-version = "0.5.0"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
 dependencies = [
- "unicode-bidi",
- "unicode-normalization",
+ "icu_normalizer",
+ "icu_properties",
 ]
 
 [[package]]
@@ -1933,7 +2073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "serde",
 ]
 
@@ -2053,9 +2193,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "libc"
-version = "0.2.161"
+version = "0.2.162"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
+checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
 
 [[package]]
 name = "libflate"
@@ -2143,6 +2283,12 @@ version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
 
+[[package]]
+name = "litemap"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
+
 [[package]]
 name = "lock_api"
 version = "0.4.12"
@@ -2165,7 +2311,7 @@ version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
 dependencies = [
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
 ]
 
 [[package]]
@@ -2754,7 +2900,7 @@ dependencies = [
  "flate2",
  "futures",
  "getrandom",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "hex",
  "indexmap",
  "itoa",
@@ -2826,7 +2972,7 @@ dependencies = [
  "comfy-table",
  "either",
  "hashbrown 0.14.5",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "ndarray",
  "num-traits",
@@ -2844,7 +2990,7 @@ dependencies = [
  "serde",
  "serde_json",
  "strum_macros",
- "thiserror",
+ "thiserror 1.0.69",
  "version_check",
  "xxhash-rust",
 ]
@@ -2863,6 +3009,20 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "polars-dylib"
+version = "0.44.2"
+dependencies = [
+ "polars",
+ "polars-arrow",
+ "polars-core",
+ "polars-expr",
+ "polars-lazy",
+ "polars-mem-engine",
+ "polars-plan",
+ "polars-python",
+]
+
 [[package]]
 name = "polars-error"
 version = "0.44.2"
@@ -2872,7 +3032,7 @@ dependencies = [
  "polars-arrow-format",
  "regex",
  "simdutf8",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -2881,7 +3041,7 @@ version = "0.44.2"
 dependencies = [
  "ahash",
  "bitflags",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "num-traits",
  "once_cell",
  "polars-arrow",
@@ -2922,7 +3082,7 @@ dependencies = [
  "fs4",
  "futures",
  "glob",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "home",
  "itoa",
  "memchr",
@@ -2963,7 +3123,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "fallible-streaming-iterator",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "itoa",
  "num-traits",
@@ -3036,7 +3196,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "either",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "hex",
  "indexmap",
  "jsonpath_lib_polars_vendor",
@@ -3074,7 +3234,7 @@ dependencies = [
  "fallible-streaming-iterator",
  "flate2",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "lz4",
  "lz4_flex",
  "num-traits",
@@ -3110,7 +3270,7 @@ dependencies = [
  "crossbeam-queue",
  "enum_dispatch",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "num-traits",
  "polars-arrow",
  "polars-compute",
@@ -3140,7 +3300,7 @@ dependencies = [
  "ciborium",
  "either",
  "futures",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "libloading",
  "memmap2",
  "num-traits",
@@ -3196,7 +3356,7 @@ dependencies = [
  "pyo3",
  "recursive",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "version_check",
 ]
 
@@ -3257,6 +3417,7 @@ dependencies = [
  "polars-expr",
  "polars-io",
  "polars-mem-engine",
+ "polars-ops",
  "polars-parquet",
  "polars-plan",
  "polars-utils",
@@ -3296,7 +3457,7 @@ dependencies = [
  "bytemuck",
  "bytes",
  "compact_str",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "indexmap",
  "libc",
  "memmap2",
@@ -3384,16 +3545,16 @@ dependencies = [
 
 [[package]]
 name = "psm"
-version = "0.1.23"
+version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205"
+checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "py-polars"
-version = "1.12.0"
+version = "1.13.1"
 dependencies = [
  "jemallocator",
  "libc",
@@ -3452,7 +3613,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3465,14 +3626,14 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "quad-rand"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db"
+checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
 
 [[package]]
 name = "quick-xml"
@@ -3497,9 +3658,9 @@ dependencies = [
 
 [[package]]
 name = "quinn"
-version = "0.11.5"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684"
+checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
 dependencies = [
  "bytes",
  "pin-project-lite",
@@ -3508,33 +3669,36 @@ dependencies = [
  "rustc-hash 2.0.0",
  "rustls 0.23.16",
  "socket2",
- "thiserror",
+ "thiserror 2.0.3",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.8"
+version = "0.11.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6"
+checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
 dependencies = [
  "bytes",
+ "getrandom",
  "rand",
  "ring",
  "rustc-hash 2.0.0",
  "rustls 0.23.16",
+ "rustls-pki-types",
  "slab",
- "thiserror",
+ "thiserror 2.0.3",
  "tinyvec",
  "tracing",
+ "web-time",
 ]
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.6"
+version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780"
+checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da"
 dependencies = [
  "cfg_aliases",
  "libc",
@@ -3664,7 +3828,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3693,7 +3857,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -3710,9 +3874,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3844,9 +4008,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.38"
+version = "0.38.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a"
+checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0"
 dependencies = [
  "bitflags",
  "errno",
@@ -3929,6 +4093,9 @@ name = "rustls-pki-types"
 version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
+dependencies = [
+ "web-time",
+]
 
 [[package]]
 name = "rustls-webpki"
@@ -4072,9 +4239,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.12.0"
+version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6"
+checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -4088,9 +4255,9 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
 
 [[package]]
 name = "serde"
-version = "1.0.214"
+version = "1.0.215"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
+checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f"
 dependencies = [
  "serde_derive",
 ]
@@ -4106,13 +4273,13 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.214"
+version = "1.0.215"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
+checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4303,6 +4470,12 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "stacker"
 version = "0.1.17"
@@ -4359,7 +4532,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4381,9 +4554,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.86"
+version = "2.0.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4399,6 +4572,17 @@ dependencies = [
  "futures-core",
 ]
 
+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "sysinfo"
 version = "0.32.0"
@@ -4426,9 +4610,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
 
 [[package]]
 name = "tempfile"
-version = "3.13.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b"
+checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
 dependencies = [
  "cfg-if",
  "fastrand",
@@ -4439,22 +4623,42 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.66"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa"
+dependencies = [
+ "thiserror-impl 2.0.3",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
- "thiserror-impl",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.66"
+version = "2.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5"
+checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4487,6 +4691,16 @@ dependencies = [
  "time-core",
 ]
 
+[[package]]
+name = "tinystr"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
 [[package]]
 name = "tinytemplate"
 version = "1.2.1"
@@ -4514,9 +4728,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.41.0"
+version = "1.41.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb"
+checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33"
 dependencies = [
  "backtrace",
  "bytes",
@@ -4537,7 +4751,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4600,7 +4814,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4645,7 +4859,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4660,27 +4874,12 @@ version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
 
-[[package]]
-name = "unicode-bidi"
-version = "0.3.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893"
-
 [[package]]
 name = "unicode-ident"
 version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
 
-[[package]]
-name = "unicode-normalization"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
-dependencies = [
- "tinyvec",
-]
-
 [[package]]
 name = "unicode-reverse"
 version = "1.0.9"
@@ -4716,9 +4915,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "url"
-version = "2.5.2"
+version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
+checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -4731,6 +4930,18 @@ version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
 [[package]]
 name = "uuid"
 version = "1.11.0"
@@ -4812,7 +5023,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
  "wasm-bindgen-shared",
 ]
 
@@ -4846,7 +5057,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4880,6 +5091,16 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -4950,7 +5171,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -4961,7 +5182,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -5142,6 +5363,18 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
 [[package]]
 name = "x11rb"
 version = "0.13.1"
@@ -5171,6 +5404,30 @@ version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984"
 
+[[package]]
+name = "yoke"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
@@ -5189,7 +5446,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.86",
+ "syn 2.0.87",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
 ]
 
 [[package]]
@@ -5198,6 +5476,28 @@ version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "zstd"
 version = "0.13.2"
diff --git a/Cargo.toml b/Cargo.toml
index 35595086f981..34502bb5e9ee 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -94,6 +94,7 @@ zstd = "0.13"
 polars = { version = "0.44.2", path = "crates/polars", default-features = false }
 polars-compute = { version = "0.44.2", path = "crates/polars-compute", default-features = false }
 polars-core = { version = "0.44.2", path = "crates/polars-core", default-features = false }
+polars-dylib = { version = "0.44.2", path = "crates/polars-dyn", default-features = false }
 polars-error = { version = "0.44.2", path = "crates/polars-error", default-features = false }
 polars-expr = { version = "0.44.2", path = "crates/polars-expr", default-features = false }
 polars-ffi = { version = "0.44.2", path = "crates/polars-ffi", default-features = false }
diff --git a/crates/Makefile b/crates/Makefile
index 28622ee061f5..6b3cf3372149 100644
--- a/crates/Makefile
+++ b/crates/Makefile
@@ -152,5 +152,6 @@ check-wasm:  ## Check wasm build without supported features
 		--exclude-features parquet            \
 		--exclude-features performant         \
 		--exclude-features streaming          \
-		--exclude-features http          	  \
+		--exclude-features http               \
+		--exclude-features full               \
 		--exclude-features test
diff --git a/crates/polars-arrow/src/io/ipc/read/common.rs b/crates/polars-arrow/src/io/ipc/read/common.rs
index 6b893c0e8ce3..0a1297bf1184 100644
--- a/crates/polars-arrow/src/io/ipc/read/common.rs
+++ b/crates/polars-arrow/src/io/ipc/read/common.rs
@@ -318,10 +318,14 @@ pub fn read_dictionary<R: Read + Seek>(
     Ok(())
 }
 
-pub fn prepare_projection(
-    schema: &ArrowSchema,
-    mut projection: Vec<usize>,
-) -> (Vec<usize>, PlHashMap<usize, usize>, ArrowSchema) {
+#[derive(Clone)]
+pub struct ProjectionInfo {
+    pub columns: Vec<usize>,
+    pub map: PlHashMap<usize, usize>,
+    pub schema: ArrowSchema,
+}
+
+pub fn prepare_projection(schema: &ArrowSchema, mut projection: Vec<usize>) -> ProjectionInfo {
     let schema = projection
         .iter()
         .map(|x| {
@@ -355,7 +359,11 @@ pub fn prepare_projection(
         }
     }
 
-    (projection, map, schema)
+    ProjectionInfo {
+        columns: projection,
+        map,
+        schema,
+    }
 }
 
 pub fn apply_projection(
diff --git a/crates/polars-arrow/src/io/ipc/read/file.rs b/crates/polars-arrow/src/io/ipc/read/file.rs
index a83e1b758d80..e75fae36730e 100644
--- a/crates/polars-arrow/src/io/ipc/read/file.rs
+++ b/crates/polars-arrow/src/io/ipc/read/file.rs
@@ -305,7 +305,7 @@ fn get_message_from_block_offset<'a, R: Read + Seek>(
         .map_err(|err| polars_err!(oos = OutOfSpecKind::InvalidFlatbufferMessage(err)))
 }
 
-fn get_message_from_block<'a, R: Read + Seek>(
+pub(super) fn get_message_from_block<'a, R: Read + Seek>(
     reader: &mut R,
     block: &arrow_format::ipc::Block,
     message_scratch: &'a mut Vec<u8>,
diff --git a/crates/polars-arrow/src/io/ipc/read/mod.rs b/crates/polars-arrow/src/io/ipc/read/mod.rs
index 88411f9b905f..f4430db7dea2 100644
--- a/crates/polars-arrow/src/io/ipc/read/mod.rs
+++ b/crates/polars-arrow/src/io/ipc/read/mod.rs
@@ -19,6 +19,7 @@ mod schema;
 mod stream;
 
 pub(crate) use common::first_dict_field;
+pub use common::{prepare_projection, ProjectionInfo};
 pub use error::OutOfSpecKind;
 pub use file::{
     deserialize_footer, get_row_count, read_batch, read_file_dictionaries, read_file_metadata,
diff --git a/crates/polars-arrow/src/io/ipc/read/reader.rs b/crates/polars-arrow/src/io/ipc/read/reader.rs
index 8369d2960233..e9523477fe39 100644
--- a/crates/polars-arrow/src/io/ipc/read/reader.rs
+++ b/crates/polars-arrow/src/io/ipc/read/reader.rs
@@ -1,9 +1,9 @@
 use std::io::{Read, Seek};
 
 use polars_error::PolarsResult;
-use polars_utils::aliases::PlHashMap;
 
 use super::common::*;
+use super::file::{get_message_from_block, get_record_batch};
 use super::{read_batch, read_file_dictionaries, Dictionaries, FileMetadata};
 use crate::array::Array;
 use crate::datatypes::ArrowSchema;
@@ -16,7 +16,7 @@ pub struct FileReader<R: Read + Seek> {
     // the dictionaries are going to be read
     dictionaries: Option<Dictionaries>,
     current_block: usize,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     remaining: usize,
     data_scratch: Vec<u8>,
     message_scratch: Vec<u8>,
@@ -32,10 +32,29 @@ impl<R: Read + Seek> FileReader<R> {
         projection: Option<Vec<usize>>,
         limit: Option<usize>,
     ) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
+        Self {
+            reader,
+            metadata,
+            dictionaries: Default::default(),
+            projection,
+            remaining: limit.unwrap_or(usize::MAX),
+            current_block: 0,
+            data_scratch: Default::default(),
+            message_scratch: Default::default(),
+        }
+    }
+
+    /// Creates a new [`FileReader`]. Use `projection` to only take certain columns.
+    /// # Panic
+    /// Panics iff the projection is not in increasing order (e.g. `[1, 0]` nor `[0, 1, 1]` are valid)
+    pub fn new_with_projection_info(
+        reader: R,
+        metadata: FileMetadata,
+        projection: Option<ProjectionInfo>,
+        limit: Option<usize>,
+    ) -> Self {
         Self {
             reader,
             metadata,
@@ -52,7 +71,7 @@ impl<R: Read + Seek> FileReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
@@ -66,9 +85,23 @@ impl<R: Read + Seek> FileReader<R> {
         self.reader
     }
 
+    pub fn set_current_block(&mut self, idx: usize) {
+        self.current_block = idx;
+    }
+
+    pub fn get_current_block(&self) -> usize {
+        self.current_block
+    }
+
+    /// Get the inner memory scratches so they can be reused in a new writer.
+    /// This can be utilized to save memory allocations for performance reasons.
+    pub fn take_projection_info(&mut self) -> Option<ProjectionInfo> {
+        std::mem::take(&mut self.projection)
+    }
+
     /// Get the inner memory scratches so they can be reused in a new writer.
     /// This can be utilized to save memory allocations for performance reasons.
-    pub fn get_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
+    pub fn take_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
         (
             std::mem::take(&mut self.data_scratch),
             std::mem::take(&mut self.message_scratch),
@@ -91,6 +124,43 @@ impl<R: Read + Seek> FileReader<R> {
         };
         Ok(())
     }
+
+    /// Skip over blocks until we have seen at most `offset` rows, returning how many rows we are
+    /// still too see.  
+    ///
+    /// This will never go over the `offset`. Meaning that if the `offset < current_block.len()`,
+    /// the block will not be skipped.
+    pub fn skip_blocks_till_limit(&mut self, offset: u64) -> PolarsResult<u64> {
+        let mut remaining_offset = offset;
+
+        for (i, block) in self.metadata.blocks.iter().enumerate() {
+            let message =
+                get_message_from_block(&mut self.reader, block, &mut self.message_scratch)?;
+            let record_batch = get_record_batch(message)?;
+
+            let length = record_batch.length()?;
+            let length = length as u64;
+
+            if length > remaining_offset {
+                self.current_block = i;
+                return Ok(remaining_offset);
+            }
+
+            remaining_offset -= length;
+        }
+
+        self.current_block = self.metadata.blocks.len();
+        Ok(remaining_offset)
+    }
+
+    pub fn next_record_batch(
+        &mut self,
+    ) -> Option<PolarsResult<arrow_format::ipc::RecordBatchRef<'_>>> {
+        let block = self.metadata.blocks.get(self.current_block)?;
+        self.current_block += 1;
+        let message = get_message_from_block(&mut self.reader, block, &mut self.message_scratch);
+        Some(message.and_then(|m| get_record_batch(m)))
+    }
 }
 
 impl<R: Read + Seek> Iterator for FileReader<R> {
@@ -114,7 +184,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
             &mut self.reader,
             self.dictionaries.as_ref().unwrap(),
             &self.metadata,
-            self.projection.as_ref().map(|x| x.0.as_ref()),
+            self.projection.as_ref().map(|x| x.columns.as_ref()),
             Some(self.remaining),
             block,
             &mut self.message_scratch,
@@ -122,7 +192,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
         );
         self.remaining -= chunk.as_ref().map(|x| x.len()).unwrap_or_default();
 
-        let chunk = if let Some((_, map, _)) = &self.projection {
+        let chunk = if let Some(ProjectionInfo { map, .. }) = &self.projection {
             // re-order according to projection
             chunk.map(|chunk| apply_projection(chunk, map))
         } else {
diff --git a/crates/polars-arrow/src/io/ipc/read/stream.rs b/crates/polars-arrow/src/io/ipc/read/stream.rs
index 87241596cdbe..b2cfb727b385 100644
--- a/crates/polars-arrow/src/io/ipc/read/stream.rs
+++ b/crates/polars-arrow/src/io/ipc/read/stream.rs
@@ -2,7 +2,6 @@ use std::io::Read;
 
 use arrow_format::ipc::planus::ReadAsRoot;
 use polars_error::{polars_bail, polars_err, PolarsError, PolarsResult};
-use polars_utils::aliases::PlHashMap;
 
 use super::super::CONTINUATION_MARKER;
 use super::common::*;
@@ -93,7 +92,7 @@ fn read_next<R: Read>(
     dictionaries: &mut Dictionaries,
     message_buffer: &mut Vec<u8>,
     data_buffer: &mut Vec<u8>,
-    projection: &Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: &Option<ProjectionInfo>,
     scratch: &mut Vec<u8>,
 ) -> PolarsResult<Option<StreamState>> {
     // determine metadata length
@@ -169,7 +168,7 @@ fn read_next<R: Read>(
                 batch,
                 &metadata.schema,
                 &metadata.ipc_schema,
-                projection.as_ref().map(|x| x.0.as_ref()),
+                projection.as_ref().map(|x| x.columns.as_ref()),
                 None,
                 dictionaries,
                 metadata.version,
@@ -179,7 +178,7 @@ fn read_next<R: Read>(
                 scratch,
             );
 
-            if let Some((_, map, _)) = projection {
+            if let Some(ProjectionInfo { map, .. }) = projection {
                 // re-order according to projection
                 chunk
                     .map(|chunk| apply_projection(chunk, map))
@@ -238,7 +237,7 @@ pub struct StreamReader<R: Read> {
     finished: bool,
     data_buffer: Vec<u8>,
     message_buffer: Vec<u8>,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     scratch: Vec<u8>,
 }
 
@@ -249,10 +248,8 @@ impl<R: Read> StreamReader<R> {
     /// encounter a schema.
     /// To check if the reader is done, use `is_finished(self)`
     pub fn new(reader: R, metadata: StreamMetadata, projection: Option<Vec<usize>>) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
 
         Self {
             reader,
@@ -275,7 +272,7 @@ impl<R: Read> StreamReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
diff --git a/crates/polars-arrow/src/record_batch.rs b/crates/polars-arrow/src/record_batch.rs
index f58d129831f1..2b0b8112ea9e 100644
--- a/crates/polars-arrow/src/record_batch.rs
+++ b/crates/polars-arrow/src/record_batch.rs
@@ -9,7 +9,7 @@ use crate::array::{Array, ArrayRef};
 /// the same length, [`RecordBatchT::len`].
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RecordBatchT<A: AsRef<dyn Array>> {
-    length: usize,
+    height: usize,
     arrays: Vec<A>,
 }
 
@@ -29,14 +29,14 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
     ///
     /// # Error
     ///
-    /// I.f.f. the length does not match the length of any of the arrays
-    pub fn try_new(length: usize, arrays: Vec<A>) -> PolarsResult<Self> {
+    /// I.f.f. the height does not match the length of any of the arrays
+    pub fn try_new(height: usize, arrays: Vec<A>) -> PolarsResult<Self> {
         polars_ensure!(
-            arrays.iter().all(|arr| arr.as_ref().len() == length),
+            arrays.iter().all(|arr| arr.as_ref().len() == height),
             ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
         );
 
-        Ok(Self { length, arrays })
+        Ok(Self { height, arrays })
     }
 
     /// returns the [`Array`]s in [`RecordBatchT`]
@@ -51,7 +51,17 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
 
     /// returns the number of rows of every array
     pub fn len(&self) -> usize {
-        self.length
+        self.height
+    }
+
+    /// returns the number of rows of every array
+    pub fn height(&self) -> usize {
+        self.height
+    }
+
+    /// returns the number of arrays
+    pub fn width(&self) -> usize {
+        self.arrays.len()
     }
 
     /// returns whether the columns have any rows
diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs
index 7f257f23f59e..7787ef28076f 100644
--- a/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs
+++ b/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs
@@ -39,6 +39,11 @@ pub fn _arg_bottom_k(
     _broadcast_bools(by_column.len(), &mut sort_options.descending);
     _broadcast_bools(by_column.len(), &mut sort_options.nulls_last);
 
+    // Don't go into row encoding.
+    if by_column.len() == 1 && sort_options.limit.is_some() && !sort_options.maintain_order {
+        return Ok(NoNull::new(by_column[0].arg_sort((&*sort_options).into())));
+    }
+
     let encoded = _get_rows_encoded(
         by_column,
         &sort_options.descending,
diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs
index ca34d37318a7..4f9a1ff9e9b3 100644
--- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs
+++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs
@@ -18,7 +18,7 @@ pub(super) fn arg_sort<I, J, T>(
     iters: I,
     options: SortOptions,
     null_count: usize,
-    len: usize,
+    mut len: usize,
 ) -> IdxCa
 where
     I: IntoIterator<Item = J>,
@@ -49,14 +49,46 @@ where
         vals.extend(iter);
     }
 
-    sort_impl(vals.as_mut_slice(), options);
+    let vals = if let Some((limit, desc)) = options.limit {
+        let limit = limit as usize;
+        // Overwrite output len.
+        len = limit;
+        let out = if limit >= vals.len() {
+            vals.as_mut_slice()
+        } else if desc {
+            let (lower, _el, _upper) = vals
+                .as_mut_slice()
+                .select_nth_unstable_by(limit, |a, b| b.1.tot_cmp(&a.1));
+            lower
+        } else {
+            let (lower, _el, _upper) = vals
+                .as_mut_slice()
+                .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1));
+            lower
+        };
+
+        sort_impl(out, options);
+        out
+    } else {
+        sort_impl(vals.as_mut_slice(), options);
+        vals.as_slice()
+    };
 
-    let iter = vals.into_iter().map(|(idx, _v)| idx);
+    let iter = vals.iter().map(|(idx, _v)| idx).copied();
     let idx = if nulls_last {
         let mut idx = Vec::with_capacity(len);
         idx.extend(iter);
-        idx.extend(nulls_idx);
+
+        let nulls_idx = if options.limit.is_some() {
+            &nulls_idx[..len - idx.len()]
+        } else {
+            &nulls_idx
+        };
+        idx.extend_from_slice(nulls_idx);
         idx
+    } else if options.limit.is_some() {
+        nulls_idx.extend(iter.take(len - nulls_idx.len()));
+        nulls_idx
     } else {
         let ptr = nulls_idx.as_ptr() as usize;
         nulls_idx.extend(iter);
@@ -90,9 +122,29 @@ where
         }));
     }
 
-    sort_impl(vals.as_mut_slice(), options);
+    let vals = if let Some((limit, desc)) = options.limit {
+        let limit = limit as usize;
+        let out = if limit >= vals.len() {
+            vals.as_mut_slice()
+        } else if desc {
+            let (lower, _el, _upper) = vals
+                .as_mut_slice()
+                .select_nth_unstable_by(limit, |a, b| b.1.tot_cmp(&a.1));
+            lower
+        } else {
+            let (lower, _el, _upper) = vals
+                .as_mut_slice()
+                .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1));
+            lower
+        };
+        sort_impl(out, options);
+        out
+    } else {
+        sort_impl(vals.as_mut_slice(), options);
+        vals.as_slice()
+    };
 
-    let iter = vals.into_iter().map(|(idx, _v)| idx);
+    let iter = vals.iter().map(|(idx, _v)| idx).copied();
     let idx: Vec<_> = iter.collect_trusted();
 
     ChunkedArray::with_chunk(name, IdxArr::from_data_default(Buffer::from(idx), None))
diff --git a/crates/polars-core/src/chunked_array/ops/sort/categorical.rs b/crates/polars-core/src/chunked_array/ops/sort/categorical.rs
index 5dd71a7b1eb8..c89e0790f251 100644
--- a/crates/polars-core/src/chunked_array/ops/sort/categorical.rs
+++ b/crates/polars-core/src/chunked_array/ops/sort/categorical.rs
@@ -53,6 +53,7 @@ impl CategoricalChunked {
             descending,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         })
     }
 
diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs
index 727f2ace15a8..add7e8b696a4 100644
--- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs
+++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs
@@ -335,6 +335,7 @@ impl ChunkSort<StringType> for StringChunked {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         })
     }
 
@@ -406,6 +407,7 @@ impl ChunkSort<BinaryType> for BinaryChunked {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         })
     }
 
@@ -536,6 +538,7 @@ impl ChunkSort<BinaryOffsetType> for BinaryOffsetChunked {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         })
     }
 
@@ -672,6 +675,7 @@ impl ChunkSort<BooleanType> for BooleanChunked {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         })
     }
 
@@ -797,6 +801,7 @@ mod test {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
         assert_eq!(
             Vec::from(&out),
@@ -816,6 +821,7 @@ mod test {
             nulls_last: true,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
         assert_eq!(
             Vec::from(&out),
@@ -925,6 +931,7 @@ mod test {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
         let expected = &[None, None, Some("a"), Some("b"), Some("c")];
         assert_eq!(Vec::from(&out), expected);
@@ -934,6 +941,7 @@ mod test {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
 
         let expected = &[None, None, Some("c"), Some("b"), Some("a")];
@@ -944,6 +952,7 @@ mod test {
             nulls_last: true,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
         let expected = &[Some("a"), Some("b"), Some("c"), None, None];
         assert_eq!(Vec::from(&out), expected);
@@ -953,6 +962,7 @@ mod test {
             nulls_last: true,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         });
         let expected = &[Some("c"), Some("b"), Some("a"), None, None];
         assert_eq!(Vec::from(&out), expected);
diff --git a/crates/polars-core/src/chunked_array/ops/sort/options.rs b/crates/polars-core/src/chunked_array/ops/sort/options.rs
index 046d0b251b04..95bff0b1b47a 100644
--- a/crates/polars-core/src/chunked_array/ops/sort/options.rs
+++ b/crates/polars-core/src/chunked_array/ops/sort/options.rs
@@ -41,6 +41,10 @@ pub struct SortOptions {
     /// If true maintain the order of equal elements.
     /// Default `false`.
     pub maintain_order: bool,
+    /// Limit a sort output, this is for optimization purposes and might be ignored.
+    /// - Len
+    /// - Descending
+    pub limit: Option<(IdxSize, bool)>,
 }
 
 /// Sort options for multi-series sorting.
@@ -96,6 +100,10 @@ pub struct SortMultipleOptions {
     pub multithreaded: bool,
     /// Whether maintain the order of equal elements. Default `false`.
     pub maintain_order: bool,
+    /// Limit a sort output, this is for optimization purposes and might be ignored.
+    /// - Len
+    /// - Descending
+    pub limit: Option<(IdxSize, bool)>,
 }
 
 impl Default for SortOptions {
@@ -105,6 +113,7 @@ impl Default for SortOptions {
             nulls_last: false,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         }
     }
 }
@@ -116,6 +125,7 @@ impl Default for SortMultipleOptions {
             nulls_last: vec![false],
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         }
     }
 }
@@ -224,6 +234,7 @@ impl From<&SortOptions> for SortMultipleOptions {
             nulls_last: vec![value.nulls_last],
             multithreaded: value.multithreaded,
             maintain_order: value.maintain_order,
+            limit: value.limit,
         }
     }
 }
@@ -235,6 +246,7 @@ impl From<&SortMultipleOptions> for SortOptions {
             nulls_last: value.nulls_last.first().copied().unwrap_or(false),
             multithreaded: value.multithreaded,
             maintain_order: value.maintain_order,
+            limit: value.limit,
         }
     }
 }
diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs
index 30d96649762f..f4c87ce0ad22 100644
--- a/crates/polars-core/src/datatypes/dtype.rs
+++ b/crates/polars-core/src/datatypes/dtype.rs
@@ -572,6 +572,52 @@ impl DataType {
         }
     }
 
+    /// Try to get the maximum value for this datatype.
+    pub fn max(&self) -> PolarsResult<Scalar> {
+        use DataType::*;
+        let v = match self {
+            #[cfg(feature = "dtype-i8")]
+            Int8 => Scalar::from(i8::MAX),
+            #[cfg(feature = "dtype-i16")]
+            Int16 => Scalar::from(i16::MAX),
+            Int32 => Scalar::from(i32::MAX),
+            Int64 => Scalar::from(i64::MAX),
+            #[cfg(feature = "dtype-u8")]
+            UInt8 => Scalar::from(u8::MAX),
+            #[cfg(feature = "dtype-u16")]
+            UInt16 => Scalar::from(u16::MAX),
+            UInt32 => Scalar::from(u32::MAX),
+            UInt64 => Scalar::from(u64::MAX),
+            Float32 => Scalar::from(f32::INFINITY),
+            Float64 => Scalar::from(f64::INFINITY),
+            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{}`", dt),
+        };
+        Ok(v)
+    }
+
+    /// Try to get the minimum value for this datatype.
+    pub fn min(&self) -> PolarsResult<Scalar> {
+        use DataType::*;
+        let v = match self {
+            #[cfg(feature = "dtype-i8")]
+            Int8 => Scalar::from(i8::MIN),
+            #[cfg(feature = "dtype-i16")]
+            Int16 => Scalar::from(i16::MIN),
+            Int32 => Scalar::from(i32::MIN),
+            Int64 => Scalar::from(i64::MIN),
+            #[cfg(feature = "dtype-u8")]
+            UInt8 => Scalar::from(u8::MIN),
+            #[cfg(feature = "dtype-u16")]
+            UInt16 => Scalar::from(u16::MIN),
+            UInt32 => Scalar::from(u32::MIN),
+            UInt64 => Scalar::from(u64::MIN),
+            Float32 => Scalar::from(f32::NEG_INFINITY),
+            Float64 => Scalar::from(f64::NEG_INFINITY),
+            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
+        };
+        Ok(v)
+    }
+
     /// Convert to an Arrow data type.
     #[inline]
     pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index e3b969a81756..0d8fef7f4c4a 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -3,6 +3,7 @@
 use std::borrow::Cow;
 use std::{mem, ops};
 
+use polars_row::ArrayRef;
 use polars_utils::itertools::Itertools;
 use rayon::prelude::*;
 
@@ -1989,6 +1990,12 @@ impl DataFrame {
             return Ok(out);
         }
         if let Some((0, k)) = slice {
+            let desc = if sort_options.descending.len() == 1 {
+                sort_options.descending[0]
+            } else {
+                false
+            };
+            sort_options.limit = Some((k as IdxSize, desc));
             return self.bottom_k_impl(k, by_column, sort_options);
         }
 
@@ -2012,6 +2019,7 @@ impl DataFrame {
                     nulls_last: sort_options.nulls_last[0],
                     multithreaded: sort_options.multithreaded,
                     maintain_order: sort_options.maintain_order,
+                    limit: sort_options.limit,
                 };
                 // fast path for a frame with a single series
                 // no need to compute the sort indices and then take by these indices
@@ -3327,6 +3335,31 @@ impl DataFrame {
     pub(crate) fn infer_height(cols: &[Column]) -> usize {
         cols.first().map_or(0, Column::len)
     }
+
+    pub fn append_record_batch(&mut self, rb: RecordBatchT<ArrayRef>) -> PolarsResult<()> {
+        polars_ensure!(
+            rb.arrays().len() == self.width(),
+            InvalidOperation: "attempt to extend dataframe of width {} with record batch of width {}",
+            self.width(),
+            rb.arrays().len(),
+        );
+
+        if rb.height() == 0 {
+            return Ok(());
+        }
+
+        // SAFETY:
+        // - we don't adjust the names of the columns
+        // - each column gets appended the same number of rows, which is an invariant of
+        //   record_batch.
+        let columns = unsafe { self.get_columns_mut() };
+        for (col, arr) in columns.iter_mut().zip(rb.into_arrays()) {
+            let arr_series = Series::from_arrow_chunks(PlSmallStr::EMPTY, vec![arr])?.into_column();
+            col.append(&arr_series)?;
+        }
+
+        Ok(())
+    }
 }
 
 pub struct RecordBatchIter<'a> {
diff --git a/crates/polars-core/src/frame/upstream_traits.rs b/crates/polars-core/src/frame/upstream_traits.rs
index 38b346ace652..1392f87c052f 100644
--- a/crates/polars-core/src/frame/upstream_traits.rs
+++ b/crates/polars-core/src/frame/upstream_traits.rs
@@ -1,5 +1,7 @@
 use std::ops::{Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
 
+use arrow::record_batch::RecordBatchT;
+
 use crate::prelude::*;
 
 impl FromIterator<Series> for DataFrame {
@@ -22,6 +24,32 @@ impl FromIterator<Column> for DataFrame {
     }
 }
 
+impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch)?;
+        }
+
+        Ok(())
+    }
+}
+
+impl TryExtend<PolarsResult<RecordBatchT<Box<dyn Array>>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = PolarsResult<RecordBatchT<Box<dyn Array>>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch?)?;
+        }
+
+        Ok(())
+    }
+}
+
 impl Index<usize> for DataFrame {
     type Output = Column;
 
diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs
index 3af8671dadd1..c104c2ea8573 100644
--- a/crates/polars-core/src/scalar/from.rs
+++ b/crates/polars-core/src/scalar/from.rs
@@ -1,3 +1,5 @@
+use polars_utils::pl_str::PlSmallStr;
+
 use super::{AnyValue, DataType, Scalar};
 
 macro_rules! impl_from {
@@ -25,4 +27,5 @@ impl_from! {
     (u64, UInt64, UInt64)
     (f32, Float32, Float32)
     (f64, Float64, Float64)
+    (PlSmallStr, StringOwned, String)
 }
diff --git a/crates/polars-dylib/Cargo.toml b/crates/polars-dylib/Cargo.toml
new file mode 100644
index 000000000000..5cc963f2d701
--- /dev/null
+++ b/crates/polars-dylib/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "polars-dylib"
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+
+[lib]
+crate-type = ["dylib", "rlib"]
+
+[dependencies]
+arrow = { workspace = true, optional = true, features = ["io_flight"] }
+polars = { workspace = true, features = ["full"] }
+polars-core = { workspace = true, optional = true }
+polars-expr = { workspace = true, optional = true }
+polars-lazy = { workspace = true, optional = true }
+polars-mem-engine = { workspace = true, optional = true }
+polars-plan = { workspace = true, optional = true }
+polars-python = { workspace = true, optional = true, default-features = true }
+
+[features]
+private = ["polars-plan", "arrow", "polars-core", "polars-lazy", "polars-expr", "polars-mem-engine"]
+python = ["polars-plan?/python", "polars-python", "polars-lazy?/python"]
diff --git a/crates/polars-dylib/README.md b/crates/polars-dylib/README.md
new file mode 100644
index 000000000000..3fd4b30de8f7
--- /dev/null
+++ b/crates/polars-dylib/README.md
@@ -0,0 +1,16 @@
+# Polars dynamic library
+
+```toml
+# Cargo.toml
+[workspace.dependencies.polars]
+package = "polars-dylib"
+```
+
+```toml
+# .cargo/config.toml
+[build]
+rustflags = [
+  "-C",
+  "prefer-dynamic",
+]
+```
diff --git a/crates/polars-dylib/src/lib.rs b/crates/polars-dylib/src/lib.rs
new file mode 100644
index 000000000000..907ce175aec8
--- /dev/null
+++ b/crates/polars-dylib/src/lib.rs
@@ -0,0 +1,15 @@
+#[cfg(feature = "private")]
+pub use arrow as _arrow;
+pub use polars::*;
+#[cfg(feature = "private")]
+pub use polars_core as _core;
+#[cfg(feature = "private")]
+pub use polars_expr as _expr;
+#[cfg(feature = "private")]
+pub use polars_lazy as _lazy;
+#[cfg(feature = "private")]
+pub use polars_mem_engine as _mem_engine;
+#[cfg(feature = "private")]
+pub use polars_plan as _plan;
+#[cfg(feature = "python")]
+pub use polars_python as _python;
diff --git a/crates/polars-expr/src/expressions/sortby.rs b/crates/polars-expr/src/expressions/sortby.rs
index 1624d7c9bcd6..fad081cb49ed 100644
--- a/crates/polars-expr/src/expressions/sortby.rs
+++ b/crates/polars-expr/src/expressions/sortby.rs
@@ -160,6 +160,7 @@ fn sort_by_groups_multiple_by(
                 nulls_last: nulls_last.to_owned(),
                 multithreaded,
                 maintain_order,
+                limit: None,
             };
 
             let sorted_idx = groups[0]
@@ -180,6 +181,7 @@ fn sort_by_groups_multiple_by(
                 nulls_last: nulls_last.to_owned(),
                 multithreaded,
                 maintain_order,
+                limit: None,
             };
             let sorted_idx = groups[0]
                 .as_materialized_series()
diff --git a/crates/polars-io/src/cloud/polars_object_store.rs b/crates/polars-io/src/cloud/polars_object_store.rs
index 9738e0cbdbe4..084408e8bc41 100644
--- a/crates/polars-io/src/cloud/polars_object_store.rs
+++ b/crates/polars-io/src/cloud/polars_object_store.rs
@@ -2,14 +2,16 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use bytes::Bytes;
-use futures::StreamExt;
+use futures::{StreamExt, TryStreamExt};
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
-use polars_error::{to_compute_err, PolarsResult};
+use polars_core::prelude::{InitHashMaps, PlHashMap};
+use polars_error::{to_compute_err, PolarsError, PolarsResult};
 use tokio::io::AsyncWriteExt;
 
 use crate::pl_async::{
-    self, tune_with_concurrency_budget, with_concurrency_budget, MAX_BUDGET_PER_REQUEST,
+    self, get_concurrency_limit, get_download_chunk_size, tune_with_concurrency_budget,
+    with_concurrency_budget, MAX_BUDGET_PER_REQUEST,
 };
 
 /// Polars specific wrapper for `Arc<dyn ObjectStore>` that limits the number of
@@ -23,63 +25,184 @@ impl PolarsObjectStore {
         Self(store)
     }
 
-    pub async fn get(&self, path: &Path) -> PolarsResult<Bytes> {
-        tune_with_concurrency_budget(1, || async {
-            self.0
-                .get(path)
-                .await
-                .map_err(to_compute_err)?
-                .bytes()
-                .await
-                .map_err(to_compute_err)
-        })
-        .await
+    /// Returns a buffered stream that downloads concurrently up to the concurrency limit.
+    fn get_buffered_ranges_stream<'a, T: Iterator<Item = Range<usize>>>(
+        &'a self,
+        path: &'a Path,
+        ranges: T,
+    ) -> impl StreamExt<Item = PolarsResult<Bytes>>
+           + TryStreamExt<Ok = Bytes, Error = PolarsError, Item = PolarsResult<Bytes>>
+           + use<'a, T> {
+        futures::stream::iter(
+            ranges
+                .map(|range| async { self.0.get_range(path, range).await.map_err(to_compute_err) }),
+        )
+        // Add a limit locally as this gets run inside a single `tune_with_concurrency_budget`.
+        .buffered(get_concurrency_limit() as usize)
     }
 
     pub async fn get_range(&self, path: &Path, range: Range<usize>) -> PolarsResult<Bytes> {
-        tune_with_concurrency_budget(1, || self.0.get_range(path, range))
-            .await
-            .map_err(to_compute_err)
+        let parts = split_range(range.clone());
+
+        if parts.len() == 1 {
+            tune_with_concurrency_budget(1, || self.0.get_range(path, range))
+                .await
+                .map_err(to_compute_err)
+        } else {
+            let parts = tune_with_concurrency_budget(
+                parts.len().clamp(0, MAX_BUDGET_PER_REQUEST) as u32,
+                || {
+                    self.get_buffered_ranges_stream(path, parts)
+                        .try_collect::<Vec<Bytes>>()
+                },
+            )
+            .await?;
+
+            let mut combined = Vec::with_capacity(range.len());
+
+            for part in parts {
+                combined.extend_from_slice(&part)
+            }
+
+            assert_eq!(combined.len(), range.len());
+
+            PolarsResult::Ok(Bytes::from(combined))
+        }
     }
 
-    pub async fn get_ranges(
+    /// Fetch byte ranges into a HashMap keyed by the range start. This will mutably sort the
+    /// `ranges` slice for coalescing.
+    ///
+    /// # Panics
+    /// Panics if the same range start is used by more than 1 range.
+    pub async fn get_ranges_sort<
+        K: TryFrom<usize, Error = impl std::fmt::Debug> + std::hash::Hash + Eq,
+        T: From<Bytes>,
+    >(
         &self,
         path: &Path,
-        ranges: &[Range<usize>],
-    ) -> PolarsResult<Vec<Bytes>> {
+        ranges: &mut [Range<usize>],
+    ) -> PolarsResult<PlHashMap<K, T>> {
+        if ranges.is_empty() {
+            return Ok(Default::default());
+        }
+
+        let mut out = PlHashMap::with_capacity(ranges.len());
+
+        ranges.sort_unstable_by_key(|x| x.start);
+
+        let (merged_ranges, merged_ends): (Vec<_>, Vec<_>) = merge_ranges(ranges).unzip();
+
+        let mut stream = self.get_buffered_ranges_stream(path, merged_ranges.iter().cloned());
+
         tune_with_concurrency_budget(
-            (ranges.len() as u32).clamp(0, MAX_BUDGET_PER_REQUEST as u32),
-            || self.0.get_ranges(path, ranges),
+            merged_ranges.len().clamp(0, MAX_BUDGET_PER_REQUEST) as u32,
+            || async {
+                let mut len = 0;
+                let mut current_offset = 0;
+                let mut ends_iter = merged_ends.iter();
+
+                let mut splitted_parts = vec![];
+
+                while let Some(bytes) = stream.try_next().await? {
+                    len += bytes.len();
+                    let end = *ends_iter.next().unwrap();
+
+                    if end == 0 {
+                        splitted_parts.push(bytes);
+                        continue;
+                    }
+
+                    let full_range = ranges[current_offset..end]
+                        .iter()
+                        .cloned()
+                        .reduce(|l, r| l.start.min(r.start)..l.end.max(r.end))
+                        .unwrap();
+
+                    let bytes = if splitted_parts.is_empty() {
+                        bytes
+                    } else {
+                        let mut out = Vec::with_capacity(full_range.len());
+
+                        for x in splitted_parts.drain(..) {
+                            out.extend_from_slice(&x);
+                        }
+
+                        out.extend_from_slice(&bytes);
+                        Bytes::from(out)
+                    };
+
+                    assert_eq!(bytes.len(), full_range.len());
+
+                    for range in &ranges[current_offset..end] {
+                        let v = out.insert(
+                            K::try_from(range.start).unwrap(),
+                            T::from(bytes.slice(
+                                range.start - full_range.start..range.end - full_range.start,
+                            )),
+                        );
+
+                        assert!(v.is_none()); // duplicate range start
+                    }
+
+                    current_offset = end;
+                }
+
+                assert!(splitted_parts.is_empty());
+
+                PolarsResult::Ok(pl_async::Size::from(len as u64))
+            },
         )
-        .await
-        .map_err(to_compute_err)
+        .await?;
+
+        Ok(out)
     }
 
-    pub async fn download<F: tokio::io::AsyncWrite + std::marker::Unpin>(
-        &self,
-        path: &Path,
-        file: &mut F,
-    ) -> PolarsResult<()> {
-        tune_with_concurrency_budget(1, || async {
-            let mut stream = self
-                .0
-                .get(path)
-                .await
-                .map_err(to_compute_err)?
-                .into_stream();
-
-            let mut len = 0;
-            while let Some(bytes) = stream.next().await {
-                let bytes = bytes.map_err(to_compute_err)?;
-                len += bytes.len();
-                file.write_all(bytes.as_ref())
+    pub async fn download(&self, path: &Path, file: &mut tokio::fs::File) -> PolarsResult<()> {
+        let opt_size = self.head(path).await.ok().map(|x| x.size);
+        let parts = opt_size.map(|x| split_range(0..x)).filter(|x| x.len() > 1);
+
+        if let Some(parts) = parts {
+            tune_with_concurrency_budget(
+                parts.len().clamp(0, MAX_BUDGET_PER_REQUEST) as u32,
+                || async {
+                    let mut stream = self.get_buffered_ranges_stream(path, parts);
+                    let mut len = 0;
+                    while let Some(bytes) = stream.try_next().await? {
+                        len += bytes.len();
+                        file.write_all(&bytes).await.map_err(to_compute_err)?;
+                    }
+
+                    assert_eq!(len, opt_size.unwrap());
+
+                    PolarsResult::Ok(pl_async::Size::from(len as u64))
+                },
+            )
+            .await?
+        } else {
+            tune_with_concurrency_budget(1, || async {
+                let mut stream = self
+                    .0
+                    .get(path)
                     .await
-                    .map_err(to_compute_err)?;
-            }
+                    .map_err(to_compute_err)?
+                    .into_stream();
+
+                let mut len = 0;
+                while let Some(bytes) = stream.try_next().await? {
+                    len += bytes.len();
+                    file.write_all(&bytes).await.map_err(to_compute_err)?;
+                }
+
+                PolarsResult::Ok(pl_async::Size::from(len as u64))
+            })
+            .await?
+        };
+
+        // Dropping is delayed for tokio async files so we need to explicitly
+        // flush here (https://github.com/tokio-rs/tokio/issues/2307#issuecomment-596336451).
+        file.sync_all().await.map_err(PolarsError::from)?;
 
-            PolarsResult::Ok(pl_async::Size::from(len as u64))
-        })
-        .await?;
         Ok(())
     }
 
@@ -113,3 +236,229 @@ impl PolarsObjectStore {
         .map_err(to_compute_err)
     }
 }
+
+/// Splits a single range into multiple smaller ranges, which can be downloaded concurrently for
+/// much higher throughput.
+fn split_range(range: Range<usize>) -> impl ExactSizeIterator<Item = Range<usize>> {
+    let chunk_size = get_download_chunk_size();
+
+    // Calculate n_parts such that we are as close as possible to the `chunk_size`.
+    let n_parts = [
+        (range.len().div_ceil(chunk_size)).max(1),
+        (range.len() / chunk_size).max(1),
+    ]
+    .into_iter()
+    .min_by_key(|x| (range.len() / *x).abs_diff(chunk_size))
+    .unwrap();
+
+    let chunk_size = (range.len() / n_parts).max(1);
+
+    assert_eq!(n_parts, (range.len() / chunk_size).max(1));
+    let bytes_rem = range.len() % chunk_size;
+
+    (0..n_parts).map(move |part_no| {
+        let (start, end) = if part_no == 0 {
+            // Download remainder length in the first chunk since it starts downloading first.
+            let end = range.start + chunk_size + bytes_rem;
+            let end = if end > range.end { range.end } else { end };
+            (range.start, end)
+        } else {
+            let start = bytes_rem + range.start + part_no * chunk_size;
+            (start, start + chunk_size)
+        };
+
+        start..end
+    })
+}
+
+/// Note: For optimal performance, `ranges` should be sorted. More generally,
+/// ranges placed next to each other should also be close in range value.
+///
+/// # Returns
+/// `[(range1, end1), (range2, end2)]`, where:
+/// * `range1` contains bytes for the ranges from `ranges[0..end1]`
+/// * `range2` contains bytes for the ranges from `ranges[end1..end2]`
+/// * etc..
+///
+/// Note that if an end value is 0, it means the range is a splitted part and should be combined.
+fn merge_ranges(ranges: &[Range<usize>]) -> impl Iterator<Item = (Range<usize>, usize)> + '_ {
+    let chunk_size = get_download_chunk_size();
+
+    let mut current_merged_range = ranges.first().map_or(0..0, Clone::clone);
+    // Number of fetched bytes excluding excess.
+    let mut current_n_bytes = current_merged_range.len();
+
+    (0..ranges.len())
+        .filter_map(move |current_idx| {
+            let current_idx = 1 + current_idx;
+
+            if current_idx == ranges.len() {
+                // No more items - flush current state.
+                Some((current_merged_range.clone(), current_idx))
+            } else {
+                let range = ranges[current_idx].clone();
+
+                let new_merged = current_merged_range.start.min(range.start)
+                    ..current_merged_range.end.max(range.end);
+
+                // E.g.:
+                // |--------|
+                //  oo        // range1
+                //       oo   // range2
+                //    ^^^     // distance = 3, is_overlapping = false
+                // E.g.:
+                // |--------|
+                //  ooooo     // range1
+                //     ooooo  // range2
+                //     ^^     // distance = 2, is_overlapping = true
+                let (distance, is_overlapping) = {
+                    let l = current_merged_range.end.min(range.end);
+                    let r = current_merged_range.start.max(range.start);
+
+                    (r.abs_diff(l), r < l)
+                };
+
+                let should_merge = is_overlapping || {
+                    let leq_current_len_dist_to_chunk_size = new_merged.len().abs_diff(chunk_size)
+                        <= current_merged_range.len().abs_diff(chunk_size);
+                    let gap_tolerance =
+                        (current_n_bytes.max(range.len()) / 8).clamp(1024 * 1024, 8 * 1024 * 1024);
+
+                    leq_current_len_dist_to_chunk_size && distance <= gap_tolerance
+                };
+
+                if should_merge {
+                    // Merge to existing range
+                    current_merged_range = new_merged;
+                    current_n_bytes += if is_overlapping {
+                        range.len() - distance
+                    } else {
+                        range.len()
+                    };
+                    None
+                } else {
+                    let out = (current_merged_range.clone(), current_idx);
+                    current_merged_range = range;
+                    current_n_bytes = current_merged_range.len();
+                    Some(out)
+                }
+            }
+        })
+        .flat_map(|x| {
+            // Split large individual ranges within the list of ranges.
+            let (range, end) = x;
+            let split = split_range(range.clone());
+            let len = split.len();
+
+            split
+                .enumerate()
+                .map(move |(i, range)| (range, if 1 + i == len { end } else { 0 }))
+        })
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn test_split_range() {
+        use super::{get_download_chunk_size, split_range};
+
+        let chunk_size = get_download_chunk_size();
+
+        assert_eq!(chunk_size, 64 * 1024 * 1024);
+
+        #[allow(clippy::single_range_in_vec_init)]
+        {
+            // Round-trip empty ranges.
+            assert_eq!(split_range(0..0).collect::<Vec<_>>(), [0..0]);
+            assert_eq!(split_range(3..3).collect::<Vec<_>>(), [3..3]);
+        }
+
+        // Threshold to start splitting to 2 ranges
+        //
+        // n - chunk_size == chunk_size - n / 2
+        // n + n / 2 == 2 * chunk_size
+        // 3 * n == 4 * chunk_size
+        // n = 4 * chunk_size / 3
+        let n = 4 * chunk_size / 3;
+
+        #[allow(clippy::single_range_in_vec_init)]
+        {
+            assert_eq!(split_range(0..n).collect::<Vec<_>>(), [0..89478485]);
+        }
+
+        assert_eq!(
+            split_range(0..n + 1).collect::<Vec<_>>(),
+            [0..44739243, 44739243..89478486]
+        );
+
+        // Threshold to start splitting to 3 ranges
+        //
+        // n / 2 - chunk_size == chunk_size - n / 3
+        // n / 2 + n / 3 == 2 * chunk_size
+        // 5 * n == 12 * chunk_size
+        // n == 12 * chunk_size / 5
+        let n = 12 * chunk_size / 5;
+
+        assert_eq!(
+            split_range(0..n).collect::<Vec<_>>(),
+            [0..80530637, 80530637..161061273]
+        );
+
+        assert_eq!(
+            split_range(0..n + 1).collect::<Vec<_>>(),
+            [0..53687092, 53687092..107374183, 107374183..161061274]
+        );
+    }
+
+    #[test]
+    fn test_merge_ranges() {
+        use super::{get_download_chunk_size, merge_ranges};
+
+        let chunk_size = get_download_chunk_size();
+
+        assert_eq!(chunk_size, 64 * 1024 * 1024);
+
+        // Round-trip empty slice
+        assert_eq!(merge_ranges(&[]).collect::<Vec<_>>(), []);
+
+        // We have 1 tiny request followed by 1 huge request. They are combined as it reduces the
+        // `abs_diff()` to the `chunk_size`, but afterwards they are split to 2 evenly sized
+        // requests.
+        assert_eq!(
+            merge_ranges(&[0..1, 1..127 * 1024 * 1024]).collect::<Vec<_>>(),
+            [(0..66584576, 0), (66584576..133169152, 2)]
+        );
+
+        // <= 1MiB gap, merge
+        assert_eq!(
+            merge_ranges(&[0..1, 1024 * 1024 + 1..1024 * 1024 + 2]).collect::<Vec<_>>(),
+            [(0..1048578, 2)]
+        );
+
+        // > 1MiB gap, do not merge
+        assert_eq!(
+            merge_ranges(&[0..1, 1024 * 1024 + 2..1024 * 1024 + 3]).collect::<Vec<_>>(),
+            [(0..1, 1), (1048578..1048579, 2)]
+        );
+
+        // <= 12.5% gap, merge
+        assert_eq!(
+            merge_ranges(&[0..8, 10..11]).collect::<Vec<_>>(),
+            [(0..11, 2)]
+        );
+
+        // <= 12.5% gap relative to RHS, merge
+        assert_eq!(
+            merge_ranges(&[0..1, 3..11]).collect::<Vec<_>>(),
+            [(0..11, 2)]
+        );
+
+        // Overlapping range, merge
+        assert_eq!(
+            merge_ranges(&[0..80 * 1024 * 1024, 10 * 1024 * 1024..70 * 1024 * 1024])
+                .collect::<Vec<_>>(),
+            [(0..80 * 1024 * 1024, 2)]
+        );
+    }
+}
diff --git a/crates/polars-io/src/file_cache/file_fetcher.rs b/crates/polars-io/src/file_cache/file_fetcher.rs
index bd16dff7fda4..3d712ba955fc 100644
--- a/crates/polars-io/src/file_cache/file_fetcher.rs
+++ b/crates/polars-io/src/file_cache/file_fetcher.rs
@@ -116,12 +116,7 @@ impl FileFetcher for CloudFileFetcher {
                 .await
                 .map_err(PolarsError::from)?;
 
-            self.object_store.download(&self.cloud_path, file).await?;
-            // Dropping is delayed for tokio async files so we need to explicitly
-            // flush here (https://github.com/tokio-rs/tokio/issues/2307#issuecomment-596336451).
-            file.sync_all().await.map_err(PolarsError::from)?;
-            PolarsResult::Ok(())
-        })?;
-        Ok(())
+            self.object_store.download(&self.cloud_path, file).await
+        })
     }
 }
diff --git a/crates/polars-io/src/parquet/read/async_impl.rs b/crates/polars-io/src/parquet/read/async_impl.rs
index da50364855da..053aad67464a 100644
--- a/crates/polars-io/src/parquet/read/async_impl.rs
+++ b/crates/polars-io/src/parquet/read/async_impl.rs
@@ -21,7 +21,7 @@ use crate::parquet::metadata::FileMetadataRef;
 use crate::pl_async::get_runtime;
 use crate::predicates::PhysicalIoExpr;
 
-type DownloadedRowGroup = Vec<(u64, Bytes)>;
+type DownloadedRowGroup = PlHashMap<u64, Bytes>;
 type QueuePayload = (usize, DownloadedRowGroup);
 type QueueSend = Arc<Sender<PolarsResult<QueuePayload>>>;
 
@@ -49,14 +49,8 @@ impl ParquetObjectStore {
         })
     }
 
-    async fn get_range(&self, start: usize, length: usize) -> PolarsResult<Bytes> {
-        self.store
-            .get_range(&self.path, start..start + length)
-            .await
-    }
-
-    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<Bytes>> {
-        self.store.get_ranges(&self.path, ranges).await
+    async fn get_ranges(&self, ranges: &mut [Range<usize>]) -> PolarsResult<PlHashMap<u64, Bytes>> {
+        self.store.get_ranges_sort(&self.path, ranges).await
     }
 
     /// Initialize the length property of the object, unless it has already been fetched.
@@ -194,16 +188,10 @@ async fn download_projection(
         }
     });
 
-    let result = async_reader.get_ranges(&ranges).await.map(|bytes| {
-        (
-            rg_index,
-            bytes
-                .into_iter()
-                .zip(offsets)
-                .map(|(bytes, offset)| (offset, bytes))
-                .collect::<Vec<_>>(),
-        )
-    });
+    let result = async_reader
+        .get_ranges(&mut ranges)
+        .await
+        .map(|bytes_map| (rg_index, bytes_map));
     sender.send(result).await.is_ok()
 }
 
@@ -217,33 +205,20 @@ async fn download_row_group(
         return true;
     }
 
-    let full_byte_range = rg.full_byte_range();
-    let full_byte_range = full_byte_range.start as usize..full_byte_range.end as usize;
-
-    let result = async_reader
-        .get_range(
-            full_byte_range.start,
-            full_byte_range.end - full_byte_range.start,
+    let mut ranges = rg
+        .byte_ranges_iter()
+        .map(|x| x.start as usize..x.end as usize)
+        .collect::<Vec<_>>();
+
+    sender
+        .send(
+            async_reader
+                .get_ranges(&mut ranges)
+                .await
+                .map(|bytes_map| (rg_index, bytes_map)),
         )
         .await
-        .map(|bytes| {
-            (
-                rg_index,
-                rg.byte_ranges_iter()
-                    .map(|range| {
-                        (
-                            range.start,
-                            bytes.slice(
-                                range.start as usize - full_byte_range.start
-                                    ..range.end as usize - full_byte_range.start,
-                            ),
-                        )
-                    })
-                    .collect::<DownloadedRowGroup>(),
-            )
-        });
-
-    sender.send(result).await.is_ok()
+        .is_ok()
 }
 
 pub struct FetchRowGroupsFromObjectStore {
diff --git a/crates/polars-io/src/pl_async.rs b/crates/polars-io/src/pl_async.rs
index cc43a908cda3..4c95c96f7733 100644
--- a/crates/polars-io/src/pl_async.rs
+++ b/crates/polars-io/src/pl_async.rs
@@ -4,7 +4,7 @@ use std::ops::Deref;
 use std::sync::atomic::{AtomicBool, AtomicU64, AtomicU8, Ordering};
 
 use once_cell::sync::Lazy;
-use polars_core::config::verbose;
+use polars_core::config::{self, verbose};
 use polars_core::POOL;
 use tokio::runtime::{Builder, Runtime};
 use tokio::sync::Semaphore;
@@ -12,6 +12,25 @@ use tokio::sync::Semaphore;
 static CONCURRENCY_BUDGET: std::sync::OnceLock<(Semaphore, u32)> = std::sync::OnceLock::new();
 pub(super) const MAX_BUDGET_PER_REQUEST: usize = 10;
 
+/// Used to determine chunks when splitting large ranges, or combining small
+/// ranges.
+pub(super) static DOWNLOAD_CHUNK_SIZE: Lazy<usize> = Lazy::new(|| {
+    let v: usize = std::env::var("POLARS_DOWNLOAD_CHUNK_SIZE")
+        .as_deref()
+        .map(|x| x.parse().expect("integer"))
+        .unwrap_or(64 * 1024 * 1024);
+
+    if config::verbose() {
+        eprintln!("async download_chunk_size: {}", v)
+    }
+
+    v
+});
+
+pub(super) fn get_download_chunk_size() -> usize {
+    *DOWNLOAD_CHUNK_SIZE
+}
+
 pub trait GetSize {
     fn size(&self) -> u64;
 }
@@ -158,6 +177,10 @@ fn get_semaphore() -> &'static (Semaphore, u32) {
     })
 }
 
+pub(crate) fn get_concurrency_limit() -> u32 {
+    get_semaphore().1
+}
+
 pub async fn tune_with_concurrency_budget<F, Fut>(requested_budget: u32, callable: F) -> Fut::Output
 where
     F: FnOnce() -> Fut,
diff --git a/crates/polars-io/src/utils/byte_source.rs b/crates/polars-io/src/utils/byte_source.rs
index e2dd3e876c2a..af37d32b36da 100644
--- a/crates/polars-io/src/utils/byte_source.rs
+++ b/crates/polars-io/src/utils/byte_source.rs
@@ -1,6 +1,7 @@
 use std::ops::Range;
 use std::sync::Arc;
 
+use polars_core::prelude::PlHashMap;
 use polars_error::PolarsResult;
 use polars_utils::_limit_path_len_io_err;
 use polars_utils::mmap::MemSlice;
@@ -16,7 +17,11 @@ pub trait ByteSource: Send + Sync {
     /// # Panics
     /// Panics if `range` is not in bounds.
     async fn get_range(&self, range: Range<usize>) -> PolarsResult<MemSlice>;
-    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>>;
+    /// Note: This will mutably sort ranges for coalescing.
+    async fn get_ranges(
+        &self,
+        ranges: &mut [Range<usize>],
+    ) -> PolarsResult<PlHashMap<usize, MemSlice>>;
 }
 
 /// Byte source backed by a `MemSlice`, which can potentially be memory-mapped.
@@ -49,11 +54,14 @@ impl ByteSource for MemSliceByteSource {
         Ok(out)
     }
 
-    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
+    async fn get_ranges(
+        &self,
+        ranges: &mut [Range<usize>],
+    ) -> PolarsResult<PlHashMap<usize, MemSlice>> {
         Ok(ranges
             .iter()
-            .map(|x| self.0.slice(x.clone()))
-            .collect::<Vec<_>>())
+            .map(|x| (x.start, self.0.slice(x.clone())))
+            .collect())
     }
 }
 
@@ -88,9 +96,11 @@ impl ByteSource for ObjectStoreByteSource {
         Ok(mem_slice)
     }
 
-    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
-        let ranges = self.store.get_ranges(&self.path, ranges).await?;
-        Ok(ranges.into_iter().map(MemSlice::from_bytes).collect())
+    async fn get_ranges(
+        &self,
+        ranges: &mut [Range<usize>],
+    ) -> PolarsResult<PlHashMap<usize, MemSlice>> {
+        self.store.get_ranges_sort(&self.path, ranges).await
     }
 }
 
@@ -130,7 +140,10 @@ impl ByteSource for DynByteSource {
         }
     }
 
-    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
+    async fn get_ranges(
+        &self,
+        ranges: &mut [Range<usize>],
+    ) -> PolarsResult<PlHashMap<usize, MemSlice>> {
         match self {
             Self::MemSlice(v) => v.get_ranges(ranges).await,
             Self::Cloud(v) => v.get_ranges(ranges).await,
diff --git a/crates/polars-io/src/utils/other.rs b/crates/polars-io/src/utils/other.rs
index 4e039124933f..f4ef629821a9 100644
--- a/crates/polars-io/src/utils/other.rs
+++ b/crates/polars-io/src/utils/other.rs
@@ -45,7 +45,7 @@ pub fn get_reader_bytes<R: Read + MmapBytesReader + ?Sized>(
     feature = "parquet",
     feature = "avro"
 ))]
-pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
+pub fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
     projection
         .iter()
         .map(|idx| schema.get_at_index(*idx).unwrap())
@@ -59,14 +59,14 @@ pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> Ar
     feature = "avro",
     feature = "parquet"
 ))]
-pub(crate) fn columns_to_projection(
-    columns: &[String],
+pub fn columns_to_projection<T: AsRef<str>>(
+    columns: &[T],
     schema: &ArrowSchema,
 ) -> PolarsResult<Vec<usize>> {
     let mut prj = Vec::with_capacity(columns.len());
 
     for column in columns {
-        let i = schema.try_index_of(column)?;
+        let i = schema.try_index_of(column.as_ref())?;
         prj.push(i);
     }
 
diff --git a/crates/polars-lazy/src/tests/aggregations.rs b/crates/polars-lazy/src/tests/aggregations.rs
index 6b2d8cb05da0..2ab337ef51e9 100644
--- a/crates/polars-lazy/src/tests/aggregations.rs
+++ b/crates/polars-lazy/src/tests/aggregations.rs
@@ -450,6 +450,7 @@ fn take_aggregations() -> PolarsResult<()> {
                             nulls_last: false,
                             multithreaded: true,
                             maintain_order: false,
+                            limit: None,
                         })
                         .head(Some(2)),
                 )
@@ -489,6 +490,7 @@ fn test_take_consistency() -> PolarsResult<()> {
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             })
             .get(lit(0))])
         .collect()?;
@@ -507,6 +509,7 @@ fn test_take_consistency() -> PolarsResult<()> {
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             })
             .get(lit(0))])
         .collect()?;
@@ -526,6 +529,7 @@ fn test_take_consistency() -> PolarsResult<()> {
                     nulls_last: false,
                     multithreaded: true,
                     maintain_order: false,
+                    limit: None,
                 })
                 .get(lit(0))
                 .alias("1"),
@@ -537,6 +541,7 @@ fn test_take_consistency() -> PolarsResult<()> {
                             nulls_last: false,
                             multithreaded: true,
                             maintain_order: false,
+                            limit: None,
                         })
                         .get(lit(0)),
                 )
diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs
index 95cbf586be67..0c4e518b5042 100644
--- a/crates/polars-lazy/src/tests/queries.rs
+++ b/crates/polars-lazy/src/tests/queries.rs
@@ -1666,6 +1666,7 @@ fn test_single_group_result() -> PolarsResult<()> {
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             })
             .over([col("a")])])
         .collect()?;
diff --git a/crates/polars-ops/src/chunked_array/top_k.rs b/crates/polars-ops/src/chunked_array/top_k.rs
index 9caf861b6cd9..ef37e267c10f 100644
--- a/crates/polars-ops/src/chunked_array/top_k.rs
+++ b/crates/polars-ops/src/chunked_array/top_k.rs
@@ -285,6 +285,7 @@ fn top_k_by_impl(
         nulls_last: vec![true; by.len()],
         multithreaded,
         maintain_order: false,
+        limit: None,
     };
 
     let idx = _arg_bottom_k(k, by, &mut sort_options)?;
diff --git a/crates/polars-ops/src/frame/join/args.rs b/crates/polars-ops/src/frame/join/args.rs
index d34c37e7ff67..def36b76a677 100644
--- a/crates/polars-ops/src/frame/join/args.rs
+++ b/crates/polars-ops/src/frame/join/args.rs
@@ -237,6 +237,7 @@ impl JoinValidation {
         s_left: &Series,
         s_right: &Series,
         build_shortest_table: bool,
+        join_nulls: bool,
     ) -> PolarsResult<()> {
         // In default, probe is the left series.
         //
@@ -253,7 +254,13 @@ impl JoinValidation {
             // Only check the `build` side.
             // The other side use `validate_build` to check
             ManyToMany | ManyToOne => true,
-            OneToMany | OneToOne => probe.n_unique()? == probe.len(),
+            OneToMany | OneToOne => {
+                if !join_nulls && probe.null_count() > 0 {
+                    probe.n_unique()? - 1 == probe.len() - probe.null_count()
+                } else {
+                    probe.n_unique()? == probe.len()
+                }
+            },
         };
         polars_ensure!(valid, ComputeError: "join keys did not fulfill {} validation", self);
         Ok(())
diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs
index f79e8759d9e8..7c365210b208 100644
--- a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs
+++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs
@@ -20,7 +20,7 @@ pub trait SeriesJoin: SeriesSealed + Sized {
     ) -> PolarsResult<LeftJoinIds> {
         let s_self = self.as_series();
         let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr());
-        validate.validate_probe(&lhs, &rhs, false)?;
+        validate.validate_probe(&lhs, &rhs, false, join_nulls)?;
 
         let lhs_dtype = lhs.dtype();
         let rhs_dtype = rhs.dtype();
@@ -35,7 +35,8 @@ pub trait SeriesJoin: SeriesSealed + Sized {
                 let (lhs, rhs, _, _) = prepare_binary::<BinaryType>(lhs, rhs, false);
                 let lhs = lhs.iter().map(|v| v.as_slice()).collect::<Vec<_>>();
                 let rhs = rhs.iter().map(|v| v.as_slice()).collect::<Vec<_>>();
-                hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls)
+                let build_null_count = other.null_count();
+                hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls, build_null_count)
             },
             T::BinaryOffset => {
                 let lhs = lhs.binary_offset().unwrap();
@@ -44,7 +45,8 @@ pub trait SeriesJoin: SeriesSealed + Sized {
                 // Take slices so that vecs are not copied
                 let lhs = lhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
                 let rhs = rhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
-                hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls)
+                let build_null_count = other.null_count();
+                hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls, build_null_count)
             },
             x if x.is_float() => {
                 with_match_physical_float_polars_type!(lhs.dtype(), |$T| {
@@ -168,7 +170,7 @@ pub trait SeriesJoin: SeriesSealed + Sized {
     ) -> PolarsResult<(InnerJoinIds, bool)> {
         let s_self = self.as_series();
         let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr());
-        validate.validate_probe(&lhs, &rhs, true)?;
+        validate.validate_probe(&lhs, &rhs, true, join_nulls)?;
 
         let lhs_dtype = lhs.dtype();
         let rhs_dtype = rhs.dtype();
@@ -184,8 +186,20 @@ pub trait SeriesJoin: SeriesSealed + Sized {
                 // Take slices so that vecs are not copied
                 let lhs = lhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
                 let rhs = rhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
+                let build_null_count = if swapped {
+                    s_self.null_count()
+                } else {
+                    other.null_count()
+                };
                 Ok((
-                    hash_join_tuples_inner(lhs, rhs, swapped, validate, join_nulls)?,
+                    hash_join_tuples_inner(
+                        lhs,
+                        rhs,
+                        swapped,
+                        validate,
+                        join_nulls,
+                        build_null_count,
+                    )?,
                     !swapped,
                 ))
             },
@@ -196,8 +210,20 @@ pub trait SeriesJoin: SeriesSealed + Sized {
                 // Take slices so that vecs are not copied
                 let lhs = lhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
                 let rhs = rhs.iter().map(|k| k.as_slice()).collect::<Vec<_>>();
+                let build_null_count = if swapped {
+                    s_self.null_count()
+                } else {
+                    other.null_count()
+                };
                 Ok((
-                    hash_join_tuples_inner(lhs, rhs, swapped, validate, join_nulls)?,
+                    hash_join_tuples_inner(
+                        lhs,
+                        rhs,
+                        swapped,
+                        validate,
+                        join_nulls,
+                        build_null_count,
+                    )?,
                     !swapped,
                 ))
             },
@@ -244,7 +270,7 @@ pub trait SeriesJoin: SeriesSealed + Sized {
     ) -> PolarsResult<(PrimitiveArray<IdxSize>, PrimitiveArray<IdxSize>)> {
         let s_self = self.as_series();
         let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr());
-        validate.validate_probe(&lhs, &rhs, true)?;
+        validate.validate_probe(&lhs, &rhs, true, join_nulls)?;
 
         let lhs_dtype = lhs.dtype();
         let rhs_dtype = rhs.dtype();
@@ -352,20 +378,38 @@ where
                     .map(|arr| arr.as_slice().unwrap())
                     .collect::<Vec<_>>();
                 Ok((
-                    hash_join_tuples_inner(splitted_a, splitted_b, swapped, validate, join_nulls)?,
+                    hash_join_tuples_inner(
+                        splitted_a, splitted_b, swapped, validate, join_nulls, 0,
+                    )?,
                     !swapped,
                 ))
             } else {
                 Ok((
-                    hash_join_tuples_inner(splitted_a, splitted_b, swapped, validate, join_nulls)?,
+                    hash_join_tuples_inner(
+                        splitted_a, splitted_b, swapped, validate, join_nulls, 0,
+                    )?,
                     !swapped,
                 ))
             }
         },
-        _ => Ok((
-            hash_join_tuples_inner(splitted_a, splitted_b, swapped, validate, join_nulls)?,
-            !swapped,
-        )),
+        _ => {
+            let build_null_count = if swapped {
+                left.null_count()
+            } else {
+                right.null_count()
+            };
+            Ok((
+                hash_join_tuples_inner(
+                    splitted_a,
+                    splitted_b,
+                    swapped,
+                    validate,
+                    join_nulls,
+                    build_null_count,
+                )?,
+                !swapped,
+            ))
+        },
     }
 }
 
@@ -430,7 +474,7 @@ where
         (0, 0, 1, 1) => {
             let keys_a = chunks_as_slices(&splitted_a);
             let keys_b = chunks_as_slices(&splitted_b);
-            hash_join_tuples_left(keys_a, keys_b, None, None, validate, join_nulls)
+            hash_join_tuples_left(keys_a, keys_b, None, None, validate, join_nulls, 0)
         },
         (0, 0, _, _) => {
             let keys_a = chunks_as_slices(&splitted_a);
@@ -445,6 +489,7 @@ where
                 mapping_right.as_deref(),
                 validate,
                 join_nulls,
+                0,
             )
         },
         _ => {
@@ -452,6 +497,7 @@ where
             let keys_b = get_arrays(&splitted_b);
             let (mapping_left, mapping_right) =
                 create_mappings(left.chunks(), right.chunks(), left.len(), right.len());
+            let build_null_count = right.null_count();
             hash_join_tuples_left(
                 keys_a,
                 keys_b,
@@ -459,6 +505,7 @@ where
                 mapping_right.as_deref(),
                 validate,
                 join_nulls,
+                build_null_count,
             )
         },
     }
diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs
index f01c99529aea..aeca8bb32546 100644
--- a/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs
+++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs
@@ -44,6 +44,8 @@ pub(super) fn hash_join_tuples_inner<T, I>(
     swapped: bool,
     validate: JoinValidation,
     join_nulls: bool,
+    // Null count is required for join validation
+    build_null_count: usize,
 ) -> PolarsResult<(Vec<IdxSize>, Vec<IdxSize>)>
 where
     I: IntoIterator<Item = T> + Send + Sync + Clone,
@@ -53,10 +55,13 @@ where
     // NOTE: see the left join for more elaborate comments
     // first we hash one relation
     let hash_tbls = if validate.needs_checks() {
-        let expected_size = build
+        let mut expected_size = build
             .iter()
             .map(|v| v.clone().into_iter().size_hint().1.unwrap())
             .sum();
+        if !join_nulls {
+            expected_size -= build_null_count;
+        }
         let hash_tbls = build_tables(build, join_nulls);
         let build_size = hash_tbls.iter().map(|m| m.len()).sum();
         validate.validate_build(build_size, expected_size, swapped)?;
diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs
index 91c4f0cd1008..b23d9de1776f 100644
--- a/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs
+++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs
@@ -112,6 +112,8 @@ pub(super) fn hash_join_tuples_left<T, I>(
     chunk_mapping_right: Option<&[ChunkId]>,
     validate: JoinValidation,
     join_nulls: bool,
+    // We should know the number of nulls to avoid extra calculation
+    build_null_count: usize,
 ) -> PolarsResult<LeftJoinIds>
 where
     I: IntoIterator<Item = T>,
@@ -123,7 +125,10 @@ where
     let build = build.into_iter().map(|i| i.into_iter()).collect::<Vec<_>>();
     // first we hash one relation
     let hash_tbls = if validate.needs_checks() {
-        let expected_size = build.iter().map(|v| v.size_hint().1.unwrap()).sum();
+        let mut expected_size = build.iter().map(|v| v.size_hint().1.unwrap()).sum();
+        if !join_nulls {
+            expected_size -= build_null_count;
+        }
         let hash_tbls = build_tables(build, join_nulls);
         let build_size = hash_tbls.iter().map(|m| m.len()).sum();
         validate.validate_build(build_size, expected_size, false)?;
diff --git a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs
index fce2f2bf6cf0..95cde8387733 100644
--- a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs
+++ b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs
@@ -225,6 +225,7 @@ pub(crate) fn _sort_or_hash_inner(
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             });
             let s_right = unsafe { s_right.take_unchecked(&sort_idx) };
             let ids = par_sorted_merge_inner_no_nulls(s_left, &s_right);
@@ -252,6 +253,7 @@ pub(crate) fn _sort_or_hash_inner(
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             });
             let s_left = unsafe { s_left.take_unchecked(&sort_idx) };
             let ids = par_sorted_merge_inner_no_nulls(&s_left, s_right);
@@ -323,6 +325,7 @@ pub(crate) fn sort_or_hash_left(
                 nulls_last: false,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             });
             let s_right = unsafe { s_right.take_unchecked(&sort_idx) };
 
diff --git a/crates/polars-pipe/src/executors/sinks/sort/source.rs b/crates/polars-pipe/src/executors/sinks/sort/source.rs
index 1c1fa2984a0e..6f544e8e6ef1 100644
--- a/crates/polars-pipe/src/executors/sinks/sort/source.rs
+++ b/crates/polars-pipe/src/executors/sinks/sort/source.rs
@@ -101,6 +101,7 @@ impl SortSource {
                     nulls_last: self.nulls_last,
                     multithreaded: true,
                     maintain_order: false,
+                    limit: None,
                 },
             ),
             Some((offset, len)) => {
@@ -119,6 +120,7 @@ impl SortSource {
                             nulls_last: self.nulls_last,
                             multithreaded: true,
                             maintain_order: false,
+                            limit: None,
                         },
                     );
                     *len = len.saturating_sub(df_len);
diff --git a/crates/polars-plan/src/dsl/function_expr/bounds.rs b/crates/polars-plan/src/dsl/function_expr/bounds.rs
index 77c8a6f3ef5f..ae0f36a0956e 100644
--- a/crates/polars-plan/src/dsl/function_expr/bounds.rs
+++ b/crates/polars-plan/src/dsl/function_expr/bounds.rs
@@ -2,50 +2,12 @@ use super::*;
 
 pub(super) fn upper_bound(s: &Column) -> PolarsResult<Column> {
     let name = s.name().clone();
-    use DataType::*;
-    let s = match s.dtype().to_physical() {
-        #[cfg(feature = "dtype-i8")]
-        Int8 => Column::new_scalar(name, Scalar::from(i8::MAX), 1),
-        #[cfg(feature = "dtype-i16")]
-        Int16 => Column::new_scalar(name, Scalar::from(i16::MAX), 1),
-        Int32 => Column::new_scalar(name, Scalar::from(i32::MAX), 1),
-        Int64 => Column::new_scalar(name, Scalar::from(i64::MAX), 1),
-        #[cfg(feature = "dtype-u8")]
-        UInt8 => Column::new_scalar(name, Scalar::from(u8::MAX), 1),
-        #[cfg(feature = "dtype-u16")]
-        UInt16 => Column::new_scalar(name, Scalar::from(u16::MAX), 1),
-        UInt32 => Column::new_scalar(name, Scalar::from(u32::MAX), 1),
-        UInt64 => Column::new_scalar(name, Scalar::from(u64::MAX), 1),
-        Float32 => Column::new_scalar(name, Scalar::from(f32::INFINITY), 1),
-        Float64 => Column::new_scalar(name, Scalar::from(f64::INFINITY), 1),
-        dt => polars_bail!(
-            ComputeError: "cannot determine upper bound for dtype `{}`", dt,
-        ),
-    };
-    Ok(s)
+    let scalar = s.dtype().to_physical().max()?;
+    Ok(Column::new_scalar(name, scalar, 1))
 }
 
 pub(super) fn lower_bound(s: &Column) -> PolarsResult<Column> {
     let name = s.name().clone();
-    use DataType::*;
-    let s = match s.dtype().to_physical() {
-        #[cfg(feature = "dtype-i8")]
-        Int8 => Column::new_scalar(name, Scalar::from(i8::MIN), 1),
-        #[cfg(feature = "dtype-i16")]
-        Int16 => Column::new_scalar(name, Scalar::from(i16::MIN), 1),
-        Int32 => Column::new_scalar(name, Scalar::from(i32::MIN), 1),
-        Int64 => Column::new_scalar(name, Scalar::from(i64::MIN), 1),
-        #[cfg(feature = "dtype-u8")]
-        UInt8 => Column::new_scalar(name, Scalar::from(u8::MIN), 1),
-        #[cfg(feature = "dtype-u16")]
-        UInt16 => Column::new_scalar(name, Scalar::from(u16::MIN), 1),
-        UInt32 => Column::new_scalar(name, Scalar::from(u32::MIN), 1),
-        UInt64 => Column::new_scalar(name, Scalar::from(u64::MIN), 1),
-        Float32 => Column::new_scalar(name, Scalar::from(f32::NEG_INFINITY), 1),
-        Float64 => Column::new_scalar(name, Scalar::from(f64::NEG_INFINITY), 1),
-        dt => polars_bail!(
-            ComputeError: "cannot determine lower bound for dtype `{}`", dt,
-        ),
-    };
-    Ok(s)
+    let scalar = s.dtype().to_physical().min()?;
+    Ok(Column::new_scalar(name, scalar, 1))
 }
diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs
index 7105855636c5..6c1b675b2bd8 100644
--- a/crates/polars-plan/src/plans/aexpr/schema.rs
+++ b/crates/polars-plan/src/plans/aexpr/schema.rs
@@ -32,50 +32,57 @@ impl AExpr {
         ctx: Context,
         arena: &Arena<AExpr>,
     ) -> PolarsResult<Field> {
-        // During aggregation a column that isn't aggregated gets an extra nesting level
-        //      col(foo: i64) -> list[i64]
-        // But not if we do an aggregation:
-        //      col(foo: i64).sum() -> i64
-        // The `nested` keeps track of the nesting we need to add.
-        let mut nested = matches!(ctx, Context::Aggregation) as u8;
-        let mut field = self.to_field_impl(schema, ctx, arena, &mut nested)?;
+        // Indicates whether we should auto-implode the result. This is initialized to true if we are
+        // in an aggregation context, so functions that return scalars should explicitly set this
+        // to false in `to_field_impl`.
+        let mut agg_list = matches!(ctx, Context::Aggregation);
+        let mut field = self.to_field_impl(schema, ctx, arena, &mut agg_list)?;
 
-        if nested >= 1 {
+        if agg_list {
             field.coerce(field.dtype().clone().implode());
         }
+
         Ok(field)
     }
 
     /// Get Field result of the expression. The schema is the input data.
+    ///
+    /// This is taken as `&mut bool` as for some expressions this is determined by the upper node
+    /// (e.g. `alias`, `cast`).
     #[recursive]
     pub fn to_field_impl(
         &self,
         schema: &Schema,
         ctx: Context,
         arena: &Arena<AExpr>,
-        nested: &mut u8,
+        agg_list: &mut bool,
     ) -> PolarsResult<Field> {
         use AExpr::*;
         use DataType::*;
         match self {
             Len => {
-                *nested = 0;
+                *agg_list = false;
                 Ok(Field::new(PlSmallStr::from_static(LEN), IDX_DTYPE))
             },
             Window {
                 function, options, ..
             } => {
-                if let WindowType::Over(mapping) = options {
-                    *nested += matches!(mapping, WindowMapping::Join) as u8;
+                if let WindowType::Over(WindowMapping::Join) = options {
+                    // expr.over(..), defaults to agg-list unless explicitly unset
+                    // by the `to_field_impl` of the `expr`
+                    *agg_list = true;
                 }
+
                 let e = arena.get(*function);
-                e.to_field_impl(schema, ctx, arena, nested)
+                e.to_field_impl(schema, ctx, arena, agg_list)
             },
             Explode(expr) => {
                 // `Explode` is a "flatten" operation, which is not the same as returning a scalar.
                 // Namely, it should be auto-imploded in the aggregation context, so we don't update
-                // the `nested` state here.
-                let field = arena.get(*expr).to_field_impl(schema, ctx, arena, &mut 0)?;
+                // the `agg_list` state here.
+                let field = arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, &mut false)?;
 
                 if let List(inner) = field.dtype() {
                     Ok(Field::new(field.name().clone(), *inner.clone()))
@@ -87,14 +94,14 @@ impl AExpr {
                 name.clone(),
                 arena
                     .get(*expr)
-                    .to_field_impl(schema, ctx, arena, nested)?
+                    .to_field_impl(schema, ctx, arena, agg_list)?
                     .dtype,
             )),
             Column(name) => schema
                 .get_field(name)
                 .ok_or_else(|| PolarsError::ColumnNotFound(name.to_string().into())),
             Literal(sv) => {
-                *nested = 0;
+                *agg_list = false;
                 Ok(match sv {
                     LiteralValue::Series(s) => s.field().into_owned(),
                     _ => Field::new(sv.output_name().clone(), sv.get_datatype()),
@@ -116,35 +123,42 @@ impl AExpr {
                     | Operator::LogicalOr => {
                         let out_field;
                         let out_name = {
-                            out_field =
-                                arena.get(*left).to_field_impl(schema, ctx, arena, nested)?;
+                            out_field = arena
+                                .get(*left)
+                                .to_field_impl(schema, ctx, arena, agg_list)?;
                             out_field.name()
                         };
                         Field::new(out_name.clone(), Boolean)
                     },
                     Operator::TrueDivide => {
-                        return get_truediv_field(*left, *right, arena, ctx, schema, nested)
+                        return get_truediv_field(*left, *right, arena, ctx, schema, agg_list)
                     },
                     _ => {
-                        return get_arithmetic_field(*left, *right, arena, *op, ctx, schema, nested)
+                        return get_arithmetic_field(
+                            *left, *right, arena, *op, ctx, schema, agg_list,
+                        )
                     },
                 };
 
                 Ok(field)
             },
-            Sort { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, nested),
+            Sort { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, agg_list),
             Gather {
                 expr,
                 returns_scalar,
                 ..
             } => {
                 if *returns_scalar {
-                    *nested = nested.saturating_sub(1);
+                    *agg_list = false;
                 }
-                arena.get(*expr).to_field_impl(schema, ctx, arena, nested)
+                arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, &mut false)
             },
-            SortBy { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, nested),
-            Filter { input, .. } => arena.get(*input).to_field_impl(schema, ctx, arena, nested),
+            SortBy { expr, .. } => arena.get(*expr).to_field_impl(schema, ctx, arena, agg_list),
+            Filter { input, .. } => arena
+                .get(*input)
+                .to_field_impl(schema, ctx, arena, agg_list),
             Agg(agg) => {
                 use IRAggExpr::*;
                 match agg {
@@ -152,13 +166,16 @@ impl AExpr {
                     | Min { input: expr, .. }
                     | First(expr)
                     | Last(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        arena.get(*expr).to_field_impl(schema, ctx, arena, nested)
+                        *agg_list = false;
+                        arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)
                     },
                     Sum(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         let dt = match field.dtype() {
                             Boolean => Some(IDX_DTYPE),
                             UInt8 | Int8 | Int16 | UInt16 => Some(Int64),
@@ -170,9 +187,10 @@ impl AExpr {
                         Ok(field)
                     },
                     Median(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         match field.dtype {
                             Date => field.coerce(Datetime(TimeUnit::Milliseconds, None)),
                             _ => float_type(&mut field),
@@ -180,9 +198,10 @@ impl AExpr {
                         Ok(field)
                     },
                     Mean(expr) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         match field.dtype {
                             Date => field.coerce(Datetime(TimeUnit::Milliseconds, None)),
                             _ => float_type(&mut field),
@@ -190,69 +209,80 @@ impl AExpr {
                         Ok(field)
                     },
                     Implode(expr) => {
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(DataType::List(field.dtype().clone().into()));
                         Ok(field)
                     },
                     Std(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     Var(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     NUnique(expr) => {
-                        *nested = 0;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(IDX_DTYPE);
                         Ok(field)
                     },
                     Count(expr, _) => {
-                        *nested = 0;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(IDX_DTYPE);
                         Ok(field)
                     },
                     AggGroups(expr) => {
-                        *nested = 1;
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = true;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         field.coerce(List(IDX_DTYPE.into()));
                         Ok(field)
                     },
                     Quantile { expr, .. } => {
-                        *nested = nested.saturating_sub(1);
-                        let mut field =
-                            arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let mut field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         float_type(&mut field);
                         Ok(field)
                     },
                     #[cfg(feature = "bitwise")]
                     Bitwise(expr, _) => {
-                        *nested = nested.saturating_sub(1);
-                        let field = arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                        *agg_list = false;
+                        let field = arena
+                            .get(*expr)
+                            .to_field_impl(schema, ctx, arena, &mut false)?;
                         // @Q? Do we need to coerce here?
                         Ok(field)
                     },
                 }
             },
             Cast { expr, dtype, .. } => {
-                let field = arena.get(*expr).to_field_impl(schema, ctx, arena, nested)?;
+                let field = arena
+                    .get(*expr)
+                    .to_field_impl(schema, ctx, arena, agg_list)?;
                 Ok(Field::new(field.name().clone(), dtype.clone()))
             },
             Ternary { truthy, falsy, .. } => {
-                let mut nested_truthy = *nested;
-                let mut nested_falsy = *nested;
+                let mut agg_list_truthy = *agg_list;
+                let mut agg_list_falsy = *agg_list;
 
                 // During aggregation:
                 // left: col(foo):              list<T>         nesting: 1
@@ -261,11 +291,11 @@ impl AExpr {
                 let mut truthy =
                     arena
                         .get(*truthy)
-                        .to_field_impl(schema, ctx, arena, &mut nested_truthy)?;
+                        .to_field_impl(schema, ctx, arena, &mut agg_list_truthy)?;
                 let falsy =
                     arena
                         .get(*falsy)
-                        .to_field_impl(schema, ctx, arena, &mut nested_falsy)?;
+                        .to_field_impl(schema, ctx, arena, &mut agg_list_falsy)?;
 
                 let st = if let DataType::Null = *truthy.dtype() {
                     falsy.dtype().clone()
@@ -273,7 +303,7 @@ impl AExpr {
                     try_get_supertype(truthy.dtype(), falsy.dtype())?
                 };
 
-                *nested = std::cmp::max(nested_truthy, nested_falsy);
+                *agg_list = agg_list_truthy | agg_list_falsy;
 
                 truthy.coerce(st);
                 Ok(truthy)
@@ -284,14 +314,14 @@ impl AExpr {
                 options,
                 ..
             } => {
-                let fields = func_args_to_fields(input, ctx, schema, arena, nested)?;
+                let fields = func_args_to_fields(input, ctx, schema, arena, agg_list)?;
                 polars_ensure!(!fields.is_empty(), ComputeError: "expression: '{}' didn't get any inputs", options.fmt_str);
                 let out = output_type.get_field(schema, ctx, &fields)?;
 
                 if options.flags.contains(FunctionFlags::RETURNS_SCALAR) {
-                    *nested = 0;
+                    *agg_list = false;
                 } else if matches!(ctx, Context::Aggregation) {
-                    *nested += 1;
+                    *agg_list = true;
                 }
 
                 Ok(out)
@@ -301,19 +331,21 @@ impl AExpr {
                 input,
                 options,
             } => {
-                let fields = func_args_to_fields(input, ctx, schema, arena, nested)?;
+                let fields = func_args_to_fields(input, ctx, schema, arena, agg_list)?;
                 polars_ensure!(!fields.is_empty(), ComputeError: "expression: '{}' didn't get any inputs", function);
                 let out = function.get_field(schema, ctx, &fields)?;
 
                 if options.flags.contains(FunctionFlags::RETURNS_SCALAR) {
-                    *nested = 0;
+                    *agg_list = false;
                 } else if matches!(ctx, Context::Aggregation) {
-                    *nested += 1;
+                    *agg_list = true;
                 }
 
                 Ok(out)
             },
-            Slice { input, .. } => arena.get(*input).to_field_impl(schema, ctx, arena, nested),
+            Slice { input, .. } => arena
+                .get(*input)
+                .to_field_impl(schema, ctx, arena, agg_list),
         }
     }
 }
@@ -323,25 +355,28 @@ fn func_args_to_fields(
     ctx: Context,
     schema: &Schema,
     arena: &Arena<AExpr>,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Vec<Field>> {
-    let mut first = true;
     input
         .iter()
+        .enumerate()
         // Default context because `col()` would return a list in aggregation context
-        .map(|e| {
-            // Only mutate first nested as that is the dtype of the function.
-            let mut nested_tmp = *nested;
-            let nested = if first {
-                first = false;
-                &mut *nested
-            } else {
-                &mut nested_tmp
-            };
+        .map(|(i, e)| {
+            let tmp = &mut false;
 
             arena
                 .get(e.node())
-                .to_field_impl(schema, ctx, arena, nested)
+                .to_field_impl(
+                    schema,
+                    ctx,
+                    arena,
+                    if i == 0 {
+                        // Only mutate first agg_list as that is the dtype of the function.
+                        agg_list
+                    } else {
+                        tmp
+                    },
+                )
                 .map(|mut field| {
                     field.name = e.output_name().clone();
                     field
@@ -357,7 +392,7 @@ fn get_arithmetic_field(
     op: Operator,
     ctx: Context,
     schema: &Schema,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Field> {
     use DataType::*;
     let left_ae = arena.get(left);
@@ -371,11 +406,11 @@ fn get_arithmetic_field(
     // leading to quadratic behavior. # 4736
     //
     // further right_type is only determined when needed.
-    let mut left_field = left_ae.to_field_impl(schema, ctx, arena, nested)?;
+    let mut left_field = left_ae.to_field_impl(schema, ctx, arena, agg_list)?;
 
     let super_type = match op {
         Operator::Minus => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
             match (&left_field.dtype, &right_type) {
                 #[cfg(feature = "dtype-struct")]
                 (Struct(_), Struct(_)) => {
@@ -430,7 +465,7 @@ fn get_arithmetic_field(
             }
         },
         Operator::Plus => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
             match (&left_field.dtype, &right_type) {
                 (Duration(_), Datetime(_, _))
                 | (Datetime(_, _), Duration(_))
@@ -472,7 +507,7 @@ fn get_arithmetic_field(
             }
         },
         _ => {
-            let right_type = right_ae.to_field_impl(schema, ctx, arena, nested)?.dtype;
+            let right_type = right_ae.to_field_impl(schema, ctx, arena, agg_list)?.dtype;
 
             match (&left_field.dtype, &right_type) {
                 #[cfg(feature = "dtype-struct")]
@@ -558,10 +593,14 @@ fn get_truediv_field(
     arena: &Arena<AExpr>,
     ctx: Context,
     schema: &Schema,
-    nested: &mut u8,
+    agg_list: &mut bool,
 ) -> PolarsResult<Field> {
-    let mut left_field = arena.get(left).to_field_impl(schema, ctx, arena, nested)?;
-    let right_field = arena.get(right).to_field_impl(schema, ctx, arena, nested)?;
+    let mut left_field = arena
+        .get(left)
+        .to_field_impl(schema, ctx, arena, agg_list)?;
+    let right_field = arena
+        .get(right)
+        .to_field_impl(schema, ctx, arena, agg_list)?;
     use DataType::*;
 
     // TODO: Re-investigate this. A lot of "_" is being used on the RHS match because this code
diff --git a/crates/polars-plan/src/plans/optimizer/mod.rs b/crates/polars-plan/src/plans/optimizer/mod.rs
index 70880ca78359..dc0d330d8b86 100644
--- a/crates/polars-plan/src/plans/optimizer/mod.rs
+++ b/crates/polars-plan/src/plans/optimizer/mod.rs
@@ -89,6 +89,7 @@ pub fn optimize(
     let simplify_expr = opt_state.contains(OptFlags::SIMPLIFY_EXPR);
     let slice_pushdown = opt_state.contains(OptFlags::SLICE_PUSHDOWN);
     let streaming = opt_state.contains(OptFlags::STREAMING);
+    let new_streaming = opt_state.contains(OptFlags::NEW_STREAMING);
     let fast_projection = opt_state.contains(OptFlags::FAST_PROJECTION);
 
     // Don't run optimizations that don't make sense on a single node.
@@ -181,7 +182,7 @@ pub fn optimize(
     }
 
     if slice_pushdown {
-        let slice_pushdown_opt = SlicePushDown::new(streaming);
+        let slice_pushdown_opt = SlicePushDown::new(streaming, new_streaming);
         let alp = lp_arena.take(lp_top);
         let alp = slice_pushdown_opt.optimize(alp, lp_arena, expr_arena)?;
 
diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
index 9c2f8497fac8..a5ff806abae9 100644
--- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
+++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
@@ -5,6 +5,7 @@ use crate::prelude::*;
 
 pub(super) struct SlicePushDown {
     streaming: bool,
+    new_streaming: bool,
     pub scratch: Vec<Node>,
 }
 
@@ -59,9 +60,10 @@ fn can_pushdown_slice_past_projections(exprs: &[ExprIR], arena: &Arena<AExpr>) -
 }
 
 impl SlicePushDown {
-    pub(super) fn new(streaming: bool) -> Self {
+    pub(super) fn new(streaming: bool, new_streaming: bool) -> Self {
         Self {
             streaming,
+            new_streaming,
             scratch: vec![],
         }
     }
@@ -211,6 +213,32 @@ impl SlicePushDown {
 
                 Ok(lp)
             },
+
+            #[cfg(feature = "ipc")]
+            (Scan {
+                sources,
+                file_info,
+                hive_parts,
+                output_schema,
+                mut file_options,
+                predicate,
+                scan_type: scan_type @ FileScan::Ipc { .. },
+            }, Some(state)) if self.new_streaming && predicate.is_none() =>  {
+                file_options.slice = Some((state.offset, state.len as usize));
+
+                let lp = Scan {
+                    sources,
+                    file_info,
+                    hive_parts,
+                    output_schema,
+                    scan_type,
+                    file_options,
+                    predicate,
+                };
+
+                Ok(lp)
+            },
+
             // TODO! we currently skip slice pushdown if there is a predicate.
             (Scan {
                 sources,
diff --git a/crates/polars-python/src/datatypes.rs b/crates/polars-python/src/datatypes.rs
index a31a2301f866..ea7686a29ec6 100644
--- a/crates/polars-python/src/datatypes.rs
+++ b/crates/polars-python/src/datatypes.rs
@@ -1,10 +1,12 @@
 use polars::prelude::*;
 use polars_core::utils::arrow::array::Utf8ViewArray;
+use polars_lazy::dsl;
 use pyo3::prelude::*;
 
+use crate::error::PyPolarsErr;
 #[cfg(feature = "object")]
 use crate::object::OBJECT_NAME;
-use crate::Wrap;
+use crate::{PyExpr, Wrap};
 
 // Don't change the order of these!
 #[repr(u8)]
@@ -117,3 +119,15 @@ impl<'py> FromPyObject<'py> for PyDataType {
         Ok(dt.0.into())
     }
 }
+
+#[pyfunction]
+pub fn _get_dtype_max(dt: Wrap<DataType>) -> PyResult<PyExpr> {
+    let v = dt.0.max().map_err(PyPolarsErr::from)?;
+    Ok(dsl::lit(v).into())
+}
+
+#[pyfunction]
+pub fn _get_dtype_min(dt: Wrap<DataType>) -> PyResult<PyExpr> {
+    let v = dt.0.min().map_err(PyPolarsErr::from)?;
+    Ok(dsl::lit(v).into())
+}
diff --git a/crates/polars-python/src/expr/general.rs b/crates/polars-python/src/expr/general.rs
index 7125388e88cd..fe5fdafdbbb8 100644
--- a/crates/polars-python/src/expr/general.rs
+++ b/crates/polars-python/src/expr/general.rs
@@ -260,6 +260,7 @@ impl PyExpr {
                 nulls_last,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             })
             .into()
     }
@@ -272,6 +273,7 @@ impl PyExpr {
                 nulls_last,
                 multithreaded: true,
                 maintain_order: false,
+                limit: None,
             })
             .into()
     }
@@ -349,6 +351,7 @@ impl PyExpr {
                     nulls_last,
                     multithreaded,
                     maintain_order,
+                    limit: None,
                 },
             )
             .into()
diff --git a/crates/polars-python/src/functions/lazy.rs b/crates/polars-python/src/functions/lazy.rs
index d3ebb376d10f..1c4e738ea69c 100644
--- a/crates/polars-python/src/functions/lazy.rs
+++ b/crates/polars-python/src/functions/lazy.rs
@@ -75,6 +75,7 @@ pub fn arg_sort_by(
             nulls_last,
             multithreaded,
             maintain_order,
+            limit: None,
         },
     )
     .into()
diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs
index fd89884ece82..f9fb740d4cae 100644
--- a/crates/polars-python/src/lazyframe/general.rs
+++ b/crates/polars-python/src/lazyframe/general.rs
@@ -539,6 +539,7 @@ impl PyLazyFrame {
                 nulls_last: vec![nulls_last],
                 multithreaded,
                 maintain_order,
+                limit: None,
             },
         )
         .into()
@@ -561,6 +562,7 @@ impl PyLazyFrame {
                 nulls_last,
                 maintain_order,
                 multithreaded,
+                limit: None,
             },
         )
         .into()
diff --git a/crates/polars-python/src/series/aggregation.rs b/crates/polars-python/src/series/aggregation.rs
index 5aa8ee16639e..c4fe8d3447ec 100644
--- a/crates/polars-python/src/series/aggregation.rs
+++ b/crates/polars-python/src/series/aggregation.rs
@@ -8,37 +8,39 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn any(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
-        let s = self.series.bool().map_err(PyPolarsErr::from)?;
-        Ok(if ignore_nulls {
-            Some(s.any())
-        } else {
-            s.any_kleene()
+    fn any(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<bool>> {
+        py.allow_threads(|| {
+            let s = self.series.bool().map_err(PyPolarsErr::from)?;
+            Ok(if ignore_nulls {
+                Some(s.any())
+            } else {
+                s.any_kleene()
+            })
         })
     }
 
-    fn all(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
-        let s = self.series.bool().map_err(PyPolarsErr::from)?;
-        Ok(if ignore_nulls {
-            Some(s.all())
-        } else {
-            s.all_kleene()
+    fn all(&self, py: Python, ignore_nulls: bool) -> PyResult<Option<bool>> {
+        py.allow_threads(|| {
+            let s = self.series.bool().map_err(PyPolarsErr::from)?;
+            Ok(if ignore_nulls {
+                Some(s.all())
+            } else {
+                s.all_kleene()
+            })
         })
     }
 
-    fn arg_max(&self) -> Option<usize> {
-        self.series.arg_max()
+    fn arg_max(&self, py: Python) -> Option<usize> {
+        py.allow_threads(|| self.series.arg_max())
     }
 
-    fn arg_min(&self) -> Option<usize> {
-        self.series.arg_min()
+    fn arg_min(&self, py: Python) -> Option<usize> {
+        py.allow_threads(|| self.series.arg_min())
     }
 
     fn max(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .max_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.max_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -47,49 +49,42 @@ impl PySeries {
     fn mean(&self, py: Python) -> PyResult<PyObject> {
         match self.series.dtype() {
             Boolean => Ok(Wrap(
-                self.series
-                    .cast(&DataType::UInt8)
-                    .unwrap()
-                    .mean_reduce()
+                py.allow_threads(|| self.series.cast(&DataType::UInt8).unwrap().mean_reduce())
                     .as_any_value(),
             )
             .into_py(py)),
             // For non-numeric output types we require mean_reduce.
-            dt if dt.is_temporal() => {
-                Ok(Wrap(self.series.mean_reduce().as_any_value()).into_py(py))
-            },
-            _ => Ok(self.series.mean().into_py(py)),
+            dt if dt.is_temporal() => Ok(Wrap(
+                py.allow_threads(|| self.series.mean_reduce())
+                    .as_any_value(),
+            )
+            .into_py(py)),
+            _ => Ok(py.allow_threads(|| self.series.mean()).into_py(py)),
         }
     }
 
     fn median(&self, py: Python) -> PyResult<PyObject> {
         match self.series.dtype() {
             Boolean => Ok(Wrap(
-                self.series
-                    .cast(&DataType::UInt8)
-                    .unwrap()
-                    .median_reduce()
+                py.allow_threads(|| self.series.cast(&DataType::UInt8).unwrap().median_reduce())
                     .map_err(PyPolarsErr::from)?
                     .as_any_value(),
             )
             .into_py(py)),
             // For non-numeric output types we require median_reduce.
             dt if dt.is_temporal() => Ok(Wrap(
-                self.series
-                    .median_reduce()
+                py.allow_threads(|| self.series.median_reduce())
                     .map_err(PyPolarsErr::from)?
                     .as_any_value(),
             )
             .into_py(py)),
-            _ => Ok(self.series.median().into_py(py)),
+            _ => Ok(py.allow_threads(|| self.series.median()).into_py(py)),
         }
     }
 
     fn min(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .min_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.min_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -97,26 +92,27 @@ impl PySeries {
 
     fn product(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .product()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.product().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
     }
 
-    fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> PyResult<PyObject> {
-        let bind = self.series.quantile_reduce(quantile, interpolation.0);
+    fn quantile(
+        &self,
+        py: Python,
+        quantile: f64,
+        interpolation: Wrap<QuantileMethod>,
+    ) -> PyResult<PyObject> {
+        let bind = py.allow_threads(|| self.series.quantile_reduce(quantile, interpolation.0));
         let sc = bind.map_err(PyPolarsErr::from)?;
 
-        Ok(Python::with_gil(|py| Wrap(sc.as_any_value()).into_py(py)))
+        Ok(Wrap(sc.as_any_value()).into_py(py))
     }
 
     fn std(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .std_reduce(ddof)
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.std_reduce(ddof).map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -124,9 +120,7 @@ impl PySeries {
 
     fn var(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .var_reduce(ddof)
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.var_reduce(ddof).map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -134,37 +128,31 @@ impl PySeries {
 
     fn sum(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .sum_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.sum_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
     }
 
     fn first(&self, py: Python) -> PyObject {
-        Wrap(self.series.first().as_any_value()).into_py(py)
+        Wrap(py.allow_threads(|| self.series.first()).as_any_value()).into_py(py)
     }
 
     fn last(&self, py: Python) -> PyObject {
-        Wrap(self.series.last().as_any_value()).into_py(py)
+        Wrap(py.allow_threads(|| self.series.last()).as_any_value()).into_py(py)
     }
 
     #[cfg(feature = "approx_unique")]
     fn approx_n_unique(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self
-            .series
-            .approx_n_unique()
-            .map_err(PyPolarsErr::from)?
+        Ok(py
+            .allow_threads(|| self.series.approx_n_unique().map_err(PyPolarsErr::from))?
             .into_py(py))
     }
 
     #[cfg(feature = "bitwise")]
     fn bitwise_and(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .and_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.and_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -173,9 +161,7 @@ impl PySeries {
     #[cfg(feature = "bitwise")]
     fn bitwise_or(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .or_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.or_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
@@ -184,9 +170,7 @@ impl PySeries {
     #[cfg(feature = "bitwise")]
     fn bitwise_xor(&self, py: Python) -> PyResult<PyObject> {
         Ok(Wrap(
-            self.series
-                .xor_reduce()
-                .map_err(PyPolarsErr::from)?
+            py.allow_threads(|| self.series.xor_reduce().map_err(PyPolarsErr::from))?
                 .as_any_value(),
         )
         .into_py(py))
diff --git a/crates/polars-python/src/series/arithmetic.rs b/crates/polars-python/src/series/arithmetic.rs
index c5483aced1e7..62edd00a7656 100644
--- a/crates/polars-python/src/series/arithmetic.rs
+++ b/crates/polars-python/src/series/arithmetic.rs
@@ -6,28 +6,33 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn add(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series + &other.series)
+    fn add(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series + &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn sub(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series - &other.series)
+    fn sub(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series - &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn div(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series / &other.series)
+    fn div(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series / &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn mul(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series * &other.series)
+    fn mul(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series * &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
-    fn rem(&self, other: &PySeries) -> PyResult<Self> {
-        Ok((&self.series % &other.series)
+    fn rem(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| &self.series % &other.series)
             .map(Into::into)
             .map_err(PyPolarsErr::from)?)
     }
@@ -37,8 +42,8 @@ macro_rules! impl_arithmetic {
     ($name:ident, $type:ty, $operand:tt) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, other: $type) -> PyResult<Self> {
-                Ok((&self.series $operand other).into())
+            fn $name(&self, py: Python, other: $type) -> PyResult<Self> {
+                Ok(py.allow_threads(|| {&self.series $operand other}).into())
             }
         }
     };
@@ -103,8 +108,8 @@ macro_rules! impl_rhs_arithmetic {
     ($name:ident, $type:ty, $operand:ident) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, other: $type) -> PyResult<Self> {
-                Ok(other.$operand(&self.series).into())
+            fn $name(&self, py: Python, other: $type) -> PyResult<Self> {
+                Ok(py.allow_threads(|| other.$operand(&self.series)).into())
             }
         }
     };
diff --git a/crates/polars-python/src/series/buffers.rs b/crates/polars-python/src/series/buffers.rs
index 939159220277..e3b9402d4d47 100644
--- a/crates/polars-python/src/series/buffers.rs
+++ b/crates/polars-python/src/series/buffers.rs
@@ -82,9 +82,9 @@ impl PySeries {
     }
 
     /// Return the underlying values, validity, and offsets buffers as Series.
-    fn _get_buffers(&self) -> PyResult<(Self, Option<Self>, Option<Self>)> {
+    fn _get_buffers(&self, py: Python) -> PyResult<(Self, Option<Self>, Option<Self>)> {
         let s = &self.series;
-        match s.dtype().to_physical() {
+        py.allow_threads(|| match s.dtype().to_physical() {
             dt if dt.is_numeric() => get_buffers_from_primitive(s),
             DataType::Boolean => get_buffers_from_primitive(s),
             DataType::String => get_buffers_from_string(s),
@@ -92,7 +92,7 @@ impl PySeries {
                 let msg = format!("`_get_buffers` not implemented for `dtype` {dt}");
                 Err(PyTypeError::new_err(msg))
             },
-        }
+        })
     }
 }
 
@@ -253,6 +253,7 @@ impl PySeries {
     #[staticmethod]
     #[pyo3(signature = (dtype, data, validity=None))]
     unsafe fn _from_buffers(
+        py: Python,
         dtype: Wrap<DataType>,
         data: Vec<PySeries>,
         validity: Option<PySeries>,
@@ -320,7 +321,7 @@ impl PySeries {
                     )),
                 };
                 let values = series_to_buffer::<UInt8Type>(values);
-                from_buffers_string_impl(values, validity, offsets)?
+                py.allow_threads(|| from_buffers_string_impl(values, validity, offsets))?
             },
             dt => {
                 let msg = format!("`_from_buffers` not implemented for `dtype` {dt}");
diff --git a/crates/polars-python/src/series/comparison.rs b/crates/polars-python/src/series/comparison.rs
index 7064edb7698a..2b7de37931f9 100644
--- a/crates/polars-python/src/series/comparison.rs
+++ b/crates/polars-python/src/series/comparison.rs
@@ -6,36 +6,45 @@ use crate::PySeries;
 
 #[pymethods]
 impl PySeries {
-    fn eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.equal(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.equal(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn neq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self
-            .series
-            .not_equal(&rhs.series)
+    fn neq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.not_equal(&rhs.series))
             .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn gt(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.gt(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn gt(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.gt(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn gt_eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.gt_eq(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn gt_eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.gt_eq(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn lt(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.lt(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn lt(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.lt(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 
-    fn lt_eq(&self, rhs: &PySeries) -> PyResult<Self> {
-        let s = self.series.lt_eq(&rhs.series).map_err(PyPolarsErr::from)?;
+    fn lt_eq(&self, py: Python, rhs: &PySeries) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.lt_eq(&rhs.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into_series().into())
     }
 }
@@ -44,8 +53,10 @@ macro_rules! impl_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.equal(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.equal(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -69,8 +80,10 @@ macro_rules! impl_neq_num {
         #[allow(clippy::nonstandard_macro_braces)]
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.not_equal(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.not_equal(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -93,8 +106,10 @@ macro_rules! impl_gt_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.gt(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.gt(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -117,8 +132,10 @@ macro_rules! impl_gt_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.gt_eq(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.gt_eq(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -142,8 +159,10 @@ macro_rules! impl_lt_num {
         #[allow(clippy::nonstandard_macro_braces)]
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.lt(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.lt(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -166,8 +185,10 @@ macro_rules! impl_lt_eq_num {
     ($name:ident, $type:ty) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: $type) -> PyResult<Self> {
-                let s = self.series.lt_eq(rhs).map_err(PyPolarsErr::from)?;
+            fn $name(&self, py: Python, rhs: $type) -> PyResult<Self> {
+                let s = py
+                    .allow_threads(|| self.series.lt_eq(rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
@@ -226,12 +247,14 @@ macro_rules! impl_decimal {
     ($name:ident, $method:ident) => {
         #[pymethods]
         impl PySeries {
-            fn $name(&self, rhs: PyDecimal) -> PyResult<Self> {
+            fn $name(&self, py: Python, rhs: PyDecimal) -> PyResult<Self> {
                 let rhs = Series::new(
                     PlSmallStr::from_static("decimal"),
                     &[AnyValue::Decimal(rhs.0, rhs.1)],
                 );
-                let s = self.series.$method(&rhs).map_err(PyPolarsErr::from)?;
+                let s = py
+                    .allow_threads(|| self.series.$method(&rhs))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(s.into_series().into())
             }
         }
diff --git a/crates/polars-python/src/series/construction.rs b/crates/polars-python/src/series/construction.rs
index 5935f1e7b0ce..e9dbdf264d8c 100644
--- a/crates/polars-python/src/series/construction.rs
+++ b/crates/polars-python/src/series/construction.rs
@@ -71,10 +71,11 @@ impl PySeries {
         if nan_is_null {
             let array = array.readonly();
             let vals = array.as_slice().unwrap();
-            let ca: Float32Chunked = vals
-                .iter()
-                .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
-                .collect_trusted();
+            let ca: Float32Chunked = py.allow_threads(|| {
+                vals.iter()
+                    .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
+                    .collect_trusted()
+            });
             ca.with_name(name.into()).into_series().into()
         } else {
             mmap_numpy_array(py, name, array)
@@ -86,10 +87,11 @@ impl PySeries {
         if nan_is_null {
             let array = array.readonly();
             let vals = array.as_slice().unwrap();
-            let ca: Float64Chunked = vals
-                .iter()
-                .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
-                .collect_trusted();
+            let ca: Float64Chunked = py.allow_threads(|| {
+                vals.iter()
+                    .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
+                    .collect_trusted()
+            });
             ca.with_name(name.into()).into_series().into()
         } else {
             mmap_numpy_array(py, name, array)
diff --git a/crates/polars-python/src/series/export.rs b/crates/polars-python/src/series/export.rs
index 886b6114427a..959b2dd47293 100644
--- a/crates/polars-python/src/series/export.rs
+++ b/crates/polars-python/src/series/export.rs
@@ -147,17 +147,11 @@ impl PySeries {
 
     /// Return the underlying Arrow array.
     #[allow(clippy::wrong_self_convention)]
-    fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult<PyObject> {
-        self.rechunk(true);
-        Python::with_gil(|py| {
-            let pyarrow = py.import_bound("pyarrow")?;
+    fn to_arrow(&mut self, py: Python, compat_level: PyCompatLevel) -> PyResult<PyObject> {
+        self.rechunk(py, true);
+        let pyarrow = py.import_bound("pyarrow")?;
 
-            interop::arrow::to_py::to_py_array(
-                self.series.to_arrow(0, compat_level.0),
-                py,
-                &pyarrow,
-            )
-        })
+        interop::arrow::to_py::to_py_array(self.series.to_arrow(0, compat_level.0), py, &pyarrow)
     }
 
     #[allow(unused_variables)]
diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs
index b14285e77aa0..3134f5354f09 100644
--- a/crates/polars-python/src/series/general.rs
+++ b/crates/polars-python/src/series/general.rs
@@ -16,9 +16,9 @@ use crate::py_modules::POLARS;
 
 #[pymethods]
 impl PySeries {
-    fn struct_unnest(&self) -> PyResult<PyDataFrame> {
+    fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
         let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
-        let df: DataFrame = ca.clone().unnest();
+        let df: DataFrame = py.allow_threads(|| ca.clone().unnest());
         Ok(df.into())
     }
 
@@ -56,9 +56,9 @@ impl PySeries {
         Ok(ca.get_rev_map().is_local())
     }
 
-    pub fn cat_to_local(&self) -> PyResult<Self> {
+    pub fn cat_to_local(&self, py: Python) -> PyResult<Self> {
         let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
-        Ok(ca.to_local().into_series().into())
+        Ok(py.allow_threads(|| ca.to_local().into_series().into()))
     }
 
     fn estimated_size(&self) -> usize {
@@ -78,15 +78,14 @@ impl PySeries {
     }
 
     #[cfg(feature = "dtype-array")]
-    fn reshape(&self, dims: Vec<i64>) -> PyResult<Self> {
+    fn reshape(&self, py: Python, dims: Vec<i64>) -> PyResult<Self> {
         let dims = dims
             .into_iter()
             .map(ReshapeDimension::new)
             .collect::<Vec<_>>();
 
-        let out = self
-            .series
-            .reshape_array(&dims)
+        let out = py
+            .allow_threads(|| self.series.reshape_array(&dims))
             .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -114,8 +113,8 @@ impl PySeries {
         }
     }
 
-    pub fn rechunk(&mut self, in_place: bool) -> Option<Self> {
-        let series = self.series.rechunk();
+    pub fn rechunk(&mut self, py: Python, in_place: bool) -> Option<Self> {
+        let series = py.allow_threads(|| self.series.rechunk());
         if in_place {
             self.series = series;
             None
@@ -167,16 +166,23 @@ impl PySeries {
         self.get_index(py, index)
     }
 
-    fn bitand(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series & &other.series).map_err(PyPolarsErr::from)?;
+    fn bitand(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series & &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
-    fn bitor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series | &other.series).map_err(PyPolarsErr::from)?;
+
+    fn bitor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series | &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
-    fn bitxor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = (&self.series ^ &other.series).map_err(PyPolarsErr::from)?;
+    fn bitxor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| &self.series ^ &other.series)
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 
@@ -217,48 +223,58 @@ impl PySeries {
         Ok(())
     }
 
-    fn extend(&mut self, other: &PySeries) -> PyResult<()> {
-        self.series
-            .extend(&other.series)
+    fn extend(&mut self, py: Python, other: &PySeries) -> PyResult<()> {
+        py.allow_threads(|| self.series.extend(&other.series))
             .map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    fn new_from_index(&self, index: usize, length: usize) -> PyResult<Self> {
+    fn new_from_index(&self, py: Python, index: usize, length: usize) -> PyResult<Self> {
         if index >= self.series.len() {
             Err(PyValueError::new_err("index is out of bounds"))
         } else {
-            Ok(self.series.new_from_index(index, length).into())
+            Ok(py.allow_threads(|| self.series.new_from_index(index, length).into()))
         }
     }
 
-    fn filter(&self, filter: &PySeries) -> PyResult<Self> {
+    fn filter(&self, py: Python, filter: &PySeries) -> PyResult<Self> {
         let filter_series = &filter.series;
         if let Ok(ca) = filter_series.bool() {
-            let series = self.series.filter(ca).map_err(PyPolarsErr::from)?;
+            let series = py
+                .allow_threads(|| self.series.filter(ca))
+                .map_err(PyPolarsErr::from)?;
             Ok(PySeries { series })
         } else {
             Err(PyRuntimeError::new_err("Expected a boolean mask"))
         }
     }
 
-    fn sort(&mut self, descending: bool, nulls_last: bool, multithreaded: bool) -> PyResult<Self> {
-        Ok(self
-            .series
-            .sort(
-                SortOptions::default()
-                    .with_order_descending(descending)
-                    .with_nulls_last(nulls_last)
-                    .with_multithreaded(multithreaded),
-            )
+    fn sort(
+        &mut self,
+        py: Python,
+        descending: bool,
+        nulls_last: bool,
+        multithreaded: bool,
+    ) -> PyResult<Self> {
+        Ok(py
+            .allow_threads(|| {
+                self.series.sort(
+                    SortOptions::default()
+                        .with_order_descending(descending)
+                        .with_nulls_last(nulls_last)
+                        .with_multithreaded(multithreaded),
+                )
+            })
             .map_err(PyPolarsErr::from)?
             .into())
     }
 
-    fn gather_with_series(&self, indices: &PySeries) -> PyResult<Self> {
-        let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
-        let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
-        Ok(s.into())
+    fn gather_with_series(&self, py: Python, indices: &PySeries) -> PyResult<Self> {
+        py.allow_threads(|| {
+            let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
+            let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
+            Ok(s.into())
+        })
     }
 
     fn null_count(&self) -> PyResult<usize> {
@@ -271,6 +287,7 @@ impl PySeries {
 
     fn equals(
         &self,
+        py: Python,
         other: &PySeries,
         check_dtypes: bool,
         check_names: bool,
@@ -283,9 +300,9 @@ impl PySeries {
             return false;
         }
         if null_equal {
-            self.series.equals_missing(&other.series)
+            py.allow_threads(|| self.series.equals_missing(&other.series))
         } else {
-            self.series.equals(&other.series)
+            py.allow_threads(|| self.series.equals(&other.series))
         }
     }
 
@@ -300,8 +317,10 @@ impl PySeries {
 
     /// Rechunk and return a pointer to the start of the Series.
     /// Only implemented for numeric types
-    fn as_single_ptr(&mut self) -> PyResult<usize> {
-        let ptr = self.series.as_single_ptr().map_err(PyPolarsErr::from)?;
+    fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
+        let ptr = py
+            .allow_threads(|| self.series.as_single_ptr())
+            .map_err(PyPolarsErr::from)?;
         Ok(ptr)
     }
 
@@ -309,20 +328,23 @@ impl PySeries {
         self.series.clone().into()
     }
 
-    fn zip_with(&self, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
+    fn zip_with(&self, py: Python, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
         let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
-        let s = self
-            .series
-            .zip_with(mask, &other.series)
+        let s = py
+            .allow_threads(|| self.series.zip_with(mask, &other.series))
             .map_err(PyPolarsErr::from)?;
         Ok(s.into())
     }
 
     #[pyo3(signature = (separator, drop_first=false))]
-    fn to_dummies(&self, separator: Option<&str>, drop_first: bool) -> PyResult<PyDataFrame> {
-        let df = self
-            .series
-            .to_dummies(separator, drop_first)
+    fn to_dummies(
+        &self,
+        py: Python,
+        separator: Option<&str>,
+        drop_first: bool,
+    ) -> PyResult<PyDataFrame> {
+        let df = py
+            .allow_threads(|| self.series.to_dummies(separator, drop_first))
             .map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
@@ -332,18 +354,22 @@ impl PySeries {
         Some(ca.get_as_series(index)?.into())
     }
 
-    fn n_unique(&self) -> PyResult<usize> {
-        let n = self.series.n_unique().map_err(PyPolarsErr::from)?;
+    fn n_unique(&self, py: Python) -> PyResult<usize> {
+        let n = py
+            .allow_threads(|| self.series.n_unique())
+            .map_err(PyPolarsErr::from)?;
         Ok(n)
     }
 
-    fn floor(&self) -> PyResult<Self> {
-        let s = self.series.floor().map_err(PyPolarsErr::from)?;
+    fn floor(&self, py: Python) -> PyResult<Self> {
+        let s = py
+            .allow_threads(|| self.series.floor())
+            .map_err(PyPolarsErr::from)?;
         Ok(s.into())
     }
 
-    fn shrink_to_fit(&mut self) {
-        self.series.shrink_to_fit();
+    fn shrink_to_fit(&mut self, py: Python) {
+        py.allow_threads(|| self.series.shrink_to_fit());
     }
 
     fn dot(&self, other: &PySeries, py: Python) -> PyResult<PyObject> {
@@ -358,15 +384,11 @@ impl PySeries {
         }
 
         let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
-            (&self.series * &other.series)
-                .map_err(PyPolarsErr::from)?
-                .sum::<f64>()
+            py.allow_threads(|| (&self.series * &other.series)?.sum::<f64>())
                 .map_err(PyPolarsErr::from)?
                 .into()
         } else {
-            (&self.series * &other.series)
-                .map_err(PyPolarsErr::from)?
-                .sum::<i64>()
+            py.allow_threads(|| (&self.series * &other.series)?.sum::<i64>())
                 .map_err(PyPolarsErr::from)?
                 .into()
         };
@@ -413,20 +435,27 @@ impl PySeries {
         }
     }
 
-    fn skew(&self, bias: bool) -> PyResult<Option<f64>> {
-        let out = self.series.skew(bias).map_err(PyPolarsErr::from)?;
+    fn skew(&self, py: Python, bias: bool) -> PyResult<Option<f64>> {
+        let out = py
+            .allow_threads(|| self.series.skew(bias))
+            .map_err(PyPolarsErr::from)?;
         Ok(out)
     }
 
-    fn kurtosis(&self, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
-        let out = self
-            .series
-            .kurtosis(fisher, bias)
+    fn kurtosis(&self, py: Python, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
+        let out = py
+            .allow_threads(|| self.series.kurtosis(fisher, bias))
             .map_err(PyPolarsErr::from)?;
         Ok(out)
     }
 
-    fn cast(&self, dtype: Wrap<DataType>, strict: bool, wrap_numerical: bool) -> PyResult<Self> {
+    fn cast(
+        &self,
+        py: Python,
+        dtype: Wrap<DataType>,
+        strict: bool,
+        wrap_numerical: bool,
+    ) -> PyResult<Self> {
         let options = if wrap_numerical {
             CastOptions::Overflowing
         } else if strict {
@@ -436,7 +465,7 @@ impl PySeries {
         };
 
         let dtype = dtype.0;
-        let out = self.series.cast_with_options(&dtype, options);
+        let out = py.allow_threads(|| self.series.cast_with_options(&dtype, options));
         let out = out.map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -451,38 +480,44 @@ impl PySeries {
         })
     }
 
-    fn is_sorted(&self, descending: bool, nulls_last: bool) -> PyResult<bool> {
+    fn is_sorted(&self, py: Python, descending: bool, nulls_last: bool) -> PyResult<bool> {
         let options = SortOptions {
             descending,
             nulls_last,
             multithreaded: true,
             maintain_order: false,
+            limit: None,
         };
-        Ok(self.series.is_sorted(options).map_err(PyPolarsErr::from)?)
+        Ok(py
+            .allow_threads(|| self.series.is_sorted(options))
+            .map_err(PyPolarsErr::from)?)
     }
 
     fn clear(&self) -> Self {
         self.series.clear().into()
     }
 
-    fn head(&self, n: usize) -> Self {
-        self.series.head(Some(n)).into()
+    fn head(&self, py: Python, n: usize) -> Self {
+        py.allow_threads(|| self.series.head(Some(n))).into()
     }
 
-    fn tail(&self, n: usize) -> Self {
-        self.series.tail(Some(n)).into()
+    fn tail(&self, py: Python, n: usize) -> Self {
+        py.allow_threads(|| self.series.tail(Some(n))).into()
     }
 
     fn value_counts(
         &self,
+        py: Python,
         sort: bool,
         parallel: bool,
         name: String,
         normalize: bool,
     ) -> PyResult<PyDataFrame> {
-        let out = self
-            .series
-            .value_counts(sort, parallel, name.into(), normalize)
+        let out = py
+            .allow_threads(|| {
+                self.series
+                    .value_counts(sort, parallel, name.into(), normalize)
+            })
             .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
@@ -493,8 +528,10 @@ impl PySeries {
         self.series.slice(offset, length).into()
     }
 
-    pub fn not_(&self) -> PyResult<Self> {
-        let out = polars_ops::series::negate_bitwise(&self.series).map_err(PyPolarsErr::from)?;
+    pub fn not_(&self, py: Python) -> PyResult<Self> {
+        let out = py
+            .allow_threads(|| polars_ops::series::negate_bitwise(&self.series))
+            .map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 }
@@ -515,8 +552,15 @@ macro_rules! impl_set_with_mask {
         #[pymethods]
         impl PySeries {
             #[pyo3(signature = (filter, value))]
-            fn $name(&self, filter: &PySeries, value: Option<$native>) -> PyResult<Self> {
-                let series = $name(&self.series, filter, value).map_err(PyPolarsErr::from)?;
+            fn $name(
+                &self,
+                py: Python,
+                filter: &PySeries,
+                value: Option<$native>,
+            ) -> PyResult<Self> {
+                let series = py
+                    .allow_threads(|| $name(&self.series, filter, value))
+                    .map_err(PyPolarsErr::from)?;
                 Ok(Self::new(series))
             }
         }
diff --git a/crates/polars-python/src/series/scatter.rs b/crates/polars-python/src/series/scatter.rs
index 97df60ef205b..798cd189a9b6 100644
--- a/crates/polars-python/src/series/scatter.rs
+++ b/crates/polars-python/src/series/scatter.rs
@@ -7,11 +7,12 @@ use crate::error::PyPolarsErr;
 
 #[pymethods]
 impl PySeries {
-    fn scatter(&mut self, idx: PySeries, values: PySeries) -> PyResult<()> {
+    fn scatter(&mut self, py: Python, idx: PySeries, values: PySeries) -> PyResult<()> {
         // we take the value because we want a ref count of 1 so that we can
         // have mutable access cheaply via _get_inner_mut().
         let s = std::mem::take(&mut self.series);
-        match scatter(s, &idx.series, &values.series) {
+        let result = py.allow_threads(|| scatter(s, &idx.series, &values.series));
+        match result {
             Ok(out) => {
                 self.series = out;
                 Ok(())
diff --git a/crates/polars-sql/src/sql_expr.rs b/crates/polars-sql/src/sql_expr.rs
index 9e068efb6064..a2ada46e1c68 100644
--- a/crates/polars-sql/src/sql_expr.rs
+++ b/crates/polars-sql/src/sql_expr.rs
@@ -919,7 +919,7 @@ impl SQLExprVisitor<'_> {
             }
             let else_res = match else_result {
                 Some(else_res) => self.visit_expr(else_res)?,
-                None => polars_bail!(SQLSyntax: "ELSE expression is required"),
+                None => lit(Null), // ELSE clause is optional; when omitted, it is implicitly NULL
             };
             if let Some(operand_expr) = operand {
                 let first_operand_expr = self.visit_expr(operand_expr)?;
diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml
index c40f477ff741..f0b3b1c30e35 100644
--- a/crates/polars-stream/Cargo.toml
+++ b/crates/polars-stream/Cargo.toml
@@ -28,6 +28,7 @@ polars-core = { workspace = true }
 polars-error = { workspace = true }
 polars-expr = { workspace = true }
 polars-mem-engine = { workspace = true }
+polars-ops = { workspace = true }
 polars-parquet = { workspace = true }
 polars-plan = { workspace = true }
 
diff --git a/crates/polars-stream/src/nodes/io_sources/ipc.rs b/crates/polars-stream/src/nodes/io_sources/ipc.rs
new file mode 100644
index 000000000000..3a83c8e3132c
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/ipc.rs
@@ -0,0 +1,557 @@
+use std::cmp::Reverse;
+use std::io::Cursor;
+use std::ops::Range;
+use std::sync::Arc;
+
+use polars_core::config;
+use polars_core::frame::DataFrame;
+use polars_core::prelude::{Column, DataType};
+use polars_core::scalar::Scalar;
+use polars_core::utils::arrow::array::TryExtend;
+use polars_core::utils::arrow::io::ipc::read::{
+    prepare_projection, read_file_metadata, FileMetadata, FileReader, ProjectionInfo,
+};
+use polars_error::{ErrString, PolarsError, PolarsResult};
+use polars_expr::prelude::PhysicalExpr;
+use polars_expr::state::ExecutionState;
+use polars_io::cloud::CloudOptions;
+use polars_io::ipc::IpcScanOptions;
+use polars_io::utils::columns_to_projection;
+use polars_io::RowIndex;
+use polars_plan::plans::hive::HivePartitions;
+use polars_plan::plans::{FileInfo, ScanSources};
+use polars_plan::prelude::FileScanOptions;
+use polars_utils::mmap::MemSlice;
+use polars_utils::pl_str::PlSmallStr;
+use polars_utils::priority::Priority;
+use polars_utils::IdxSize;
+
+use crate::async_primitives::distributor_channel::distributor_channel;
+use crate::async_primitives::linearizer::Linearizer;
+use crate::morsel::{get_ideal_morsel_size, SourceToken};
+use crate::nodes::{
+    ComputeNode, JoinHandle, Morsel, MorselSeq, PortState, TaskPriority, TaskScope,
+};
+use crate::pipe::{RecvPort, SendPort};
+use crate::{DEFAULT_DISTRIBUTOR_BUFFER_SIZE, DEFAULT_LINEARIZER_BUFFER_SIZE};
+
+const ROW_COUNT_OVERFLOW_ERR: PolarsError = PolarsError::ComputeError(ErrString::new_static(
+    "\
+IPC file produces more than 2^32 rows; \
+consider compiling with polars-bigidx feature (polars-u64-idx package on python)",
+));
+
+pub struct IpcSourceNode {
+    sources: ScanSources,
+
+    config: IpcSourceNodeConfig,
+    num_pipelines: usize,
+
+    /// Every phase we need to be able to continue from where we left off, so we save the state of
+    /// the Walker task.
+    state: IpcSourceNodeState,
+}
+
+pub struct IpcSourceNodeConfig {
+    row_index: Option<RowIndex>,
+    projection_info: Option<ProjectionInfo>,
+
+    rechunk: bool,
+    include_file_paths: Option<PlSmallStr>,
+
+    first_metadata: FileMetadata,
+}
+
+pub struct IpcSourceNodeState {
+    morsel_seq: u64,
+    row_idx_offset: IdxSize,
+
+    slice: Range<usize>,
+
+    source_idx: usize,
+    source: Option<Source>,
+}
+
+pub struct Source {
+    file_path: Option<Arc<str>>,
+
+    memslice: Arc<MemSlice>,
+    metadata: Arc<FileMetadata>,
+
+    block_offset: usize,
+}
+
+impl IpcSourceNode {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        sources: ScanSources,
+        _file_info: FileInfo,
+        _hive_parts: Option<Arc<Vec<HivePartitions>>>, // @TODO
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        options: IpcScanOptions,
+        _cloud_options: Option<CloudOptions>,
+        file_options: FileScanOptions,
+        mut first_metadata: Option<FileMetadata>,
+    ) -> PolarsResult<Self> {
+        // These should have all been removed during lower_ir
+        assert!(predicate.is_none());
+        assert!(!sources.is_empty());
+
+        let IpcScanOptions = options;
+
+        let FileScanOptions {
+            slice,
+            with_columns,
+            cache: _, // @TODO
+            row_index,
+            rechunk,
+            file_counter: _, // @TODO
+            hive_options: _, // @TODO
+            glob: _,         // @TODO
+            include_file_paths,
+            allow_missing_columns: _, // @TODO
+        } = file_options;
+
+        let first_metadata = match first_metadata.take() {
+            Some(md) => md,
+            None => {
+                let source = sources.iter().next().unwrap();
+                let source = source.to_memslice()?;
+                read_file_metadata(&mut std::io::Cursor::new(&*source))?
+            },
+        };
+
+        let projection = with_columns
+            .as_ref()
+            .map(|cols| columns_to_projection(cols, &first_metadata.schema))
+            .transpose()?;
+        let projection_info = projection
+            .as_ref()
+            .map(|p| prepare_projection(&first_metadata.schema, p.clone()));
+
+        let state = IpcSourceNodeState {
+            morsel_seq: 0,
+            row_idx_offset: row_index.as_ref().map_or(0, |ri| ri.offset),
+
+            // Always create a slice. If no slice was given, just make the biggest slice possible.
+            slice: slice.map_or(0..usize::MAX, |(offset, length)| {
+                let offset = offset as usize;
+                offset..offset + length
+            }),
+
+            source_idx: 0,
+            source: None,
+        };
+
+        Ok(IpcSourceNode {
+            sources,
+
+            config: IpcSourceNodeConfig {
+                row_index,
+                projection_info,
+
+                rechunk,
+                include_file_paths,
+
+                first_metadata,
+            },
+
+            num_pipelines: 0,
+
+            state,
+        })
+    }
+}
+
+/// Move `slice` forward by `n` and return the slice until then.
+fn slice_take(slice: &mut Range<usize>, n: usize) -> Range<usize> {
+    let offset = slice.start;
+    let length = slice.len();
+
+    assert!(offset < n);
+
+    let chunk_length = (n - offset).min(length);
+    let rng = offset..offset + chunk_length;
+    *slice = 0..length - chunk_length;
+
+    rng
+}
+
+fn get_max_morsel_size() -> usize {
+    std::env::var("POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE")
+        .map_or_else(
+            |_| get_ideal_morsel_size(),
+            |v| {
+                v.parse::<usize>().expect(
+                    "POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE does not contain valid size",
+                )
+            },
+        )
+        .max(1)
+}
+
+impl ComputeNode for IpcSourceNode {
+    fn name(&self) -> &str {
+        "ipc_source"
+    }
+
+    fn initialize(&mut self, num_pipelines: usize) {
+        self.num_pipelines = num_pipelines;
+    }
+
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
+        assert!(recv.is_empty());
+        assert_eq!(send.len(), 1);
+
+        if self.state.slice.is_empty() || self.state.source_idx >= self.sources.len() {
+            send[0] = PortState::Done;
+        }
+
+        if send[0] != PortState::Done {
+            send[0] = PortState::Ready;
+        }
+
+        Ok(())
+    }
+
+    fn spawn<'env, 's>(
+        &'env mut self,
+        scope: &'s TaskScope<'s, 'env>,
+        recv_ports: &mut [Option<RecvPort<'_>>],
+        send_ports: &mut [Option<SendPort<'_>>],
+        _state: &'s ExecutionState,
+        join_handles: &mut Vec<JoinHandle<PolarsResult<()>>>,
+    ) {
+        assert!(recv_ports.is_empty());
+        assert_eq!(send_ports.len(), 1);
+
+        // Split size for morsels.
+        let max_morsel_size = get_max_morsel_size();
+        let source_token = SourceToken::new();
+
+        let num_pipelines = self.num_pipelines;
+        let config = &self.config;
+        let sources = &self.sources;
+        let state = &mut self.state;
+
+        /// Messages sent from Walker task to Decoder tasks.
+        struct BatchMessage {
+            memslice: Arc<MemSlice>,
+            metadata: Arc<FileMetadata>,
+            file_path: Option<Arc<str>>,
+            row_idx_offset: IdxSize,
+            slice: Range<usize>,
+            block_range: Range<usize>,
+            morsel_seq_base: u64,
+        }
+
+        // Walker task -> Decoder tasks.
+        let (mut batch_tx, batch_rxs) =
+            distributor_channel::<BatchMessage>(num_pipelines, DEFAULT_DISTRIBUTOR_BUFFER_SIZE);
+        // Decoder tasks -> Distributor task.
+        let (mut decoded_rx, decoded_tx) = Linearizer::<Priority<Reverse<MorselSeq>, Morsel>>::new(
+            num_pipelines,
+            DEFAULT_LINEARIZER_BUFFER_SIZE,
+        );
+        // Distributor task -> output.
+        let mut sender = send_ports[0].take().unwrap().serial();
+
+        // Distributor task.
+        //
+        // Shuffles morsels from `n` producers amongst `n` consumers.
+        //
+        // If record batches in the source IPC file are large, one decoder might produce many
+        // morsels at the same time. At the same time, other decoders might not produce anything.
+        // Therefore, we would like to distribute the output of a single decoder task over the
+        // available output pipelines.
+        join_handles.push(scope.spawn_task(TaskPriority::High, async move {
+            while let Some(morsel) = decoded_rx.get().await {
+                if sender.send(morsel.1).await.is_err() {
+                    break;
+                }
+            }
+            PolarsResult::Ok(())
+        }));
+
+        // Decoder tasks.
+        //
+        // Tasks a IPC file and certain number of blocks and decodes each block as a record batch.
+        // Then, all record batches are concatenated into a DataFrame. If the resulting DataFrame
+        // is too large, which happens when we have one very large block, the DataFrame is split
+        // into smaller pieces an spread among the pipelines.
+        let decoder_tasks = decoded_tx.into_iter().zip(batch_rxs)
+            .map(|(mut send, mut rx)| {
+                let source_token = source_token.clone();
+                scope.spawn_task(TaskPriority::Low, async move {
+                    // Amortize allocations.
+                    let mut data_scratch = Vec::new();
+                    let mut message_scratch = Vec::new();
+                    let mut projection_info = config.projection_info.clone();
+
+                    let schema = projection_info.as_ref().map_or(config.first_metadata.schema.as_ref(), |ProjectionInfo { schema, .. }| schema);
+                    let pl_schema = schema
+                        .iter()
+                        .map(|(n, f)| (n.clone(), DataType::from_arrow(&f.dtype, true)))
+                        .collect();
+
+                    while let Ok(m) = rx.recv().await {
+                        let BatchMessage {
+                            memslice: source,
+                            metadata,
+                            file_path,
+                            row_idx_offset,
+                            slice,
+                            morsel_seq_base,
+                            block_range,
+                        } = m;
+
+                        let mut reader = FileReader::new_with_projection_info(
+                            Cursor::new(source.as_ref()),
+                            metadata.as_ref().clone(),
+                            std::mem::take(&mut projection_info),
+                            None,
+                        );
+                        reader.set_current_block(block_range.start);
+                        reader.set_scratches((
+                            std::mem::take(&mut data_scratch),
+                            std::mem::take(&mut message_scratch),
+                        ));
+
+                        // Create the DataFrame with the appropriate schema and append all the record
+                        // batches to it. This will perform schema validation as well.
+                        let mut df = DataFrame::empty_with_schema(&pl_schema);
+                        df.try_extend(reader.by_ref().take(block_range.len()))?;
+
+                        df = df.slice(slice.start as i64, slice.len());
+
+                        if config.rechunk {
+                            df.rechunk_mut();
+                        }
+
+                        if let Some(RowIndex { name, offset: _ }) = &config.row_index {
+                            let offset = row_idx_offset + slice.start as IdxSize;
+                            df = df.with_row_index(name.clone(), Some(offset))?;
+                        }
+
+                        if let Some(col) = config.include_file_paths.as_ref() {
+                            let file_path = file_path.unwrap();
+                            let file_path = Scalar::from(PlSmallStr::from(file_path.as_ref()));
+                            df.with_column(Column::new_scalar(
+                                col.clone(),
+                                file_path,
+                                df.height(),
+                            ))?;
+                        }
+
+                        // If the block is very large, we want to split the block amongst the
+                        // pipelines. That will at least allow some parallelism.
+                        if df.height() > max_morsel_size && config::verbose() {
+                            eprintln!("IPC source encountered a (too) large record batch of {} rows. Splitting and continuing.", df.height());
+                        }
+                        for i in 0..df.height().div_ceil(max_morsel_size) {
+                            let morsel = df.slice((i * max_morsel_size) as i64, max_morsel_size);
+                            let seq = MorselSeq::new(morsel_seq_base + i as u64);
+                            let morsel = Morsel::new(
+                                morsel,
+                                seq,
+                                source_token.clone(),
+                            );
+                            if send.insert(Priority(Reverse(seq), morsel)).await.is_err() {
+                                break;
+                            }
+                        }
+
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                    }
+
+                    PolarsResult::Ok(())
+                })
+            })
+            .collect::<Vec<_>>();
+
+        // Walker task.
+        //
+        // Walks all the sources and supplies block ranges to the decoder tasks.
+        join_handles.push(scope.spawn_task(TaskPriority::Low, async move {
+            struct Batch {
+                row_idx_offset: IdxSize,
+                block_start: usize,
+                num_rows: usize,
+            }
+
+            // Batch completion parameters
+            let batch_size_limit = get_ideal_morsel_size();
+            let sliced_batch_size_limit = state.slice.len().div_ceil(num_pipelines);
+            let batch_block_limit = if sources.len() >= num_pipelines {
+                // If there are more files than decoder tasks, try to subdivide the files instead
+                // of the blocks.
+                usize::MAX
+            } else {
+                config.first_metadata.blocks.len().div_ceil(num_pipelines)
+            };
+
+            // Amortize allocations
+            let mut data_scratch = Vec::new();
+            let mut message_scratch = Vec::new();
+            let mut projection_info = config.projection_info.clone();
+
+            'source_loop: while !state.slice.is_empty() {
+                let source = match state.source {
+                    Some(ref mut source) => source,
+                    None => {
+                        let Some(source) = sources.get(state.source_idx) else {
+                            break;
+                        };
+
+                        let file_path: Option<Arc<str>> = config
+                            .include_file_paths
+                            .as_ref()
+                            .map(|_| source.to_include_path_name().into());
+                        let memslice = source.to_memslice()?;
+                        let metadata = if state.source_idx == 0 {
+                            config.first_metadata.clone()
+                        } else {
+                            read_file_metadata(&mut std::io::Cursor::new(memslice.as_ref()))?
+                        };
+
+                        state.source.insert(Source {
+                            file_path,
+                            memslice: Arc::new(memslice),
+                            metadata: Arc::new(metadata),
+                            block_offset: 0,
+                        })
+                    },
+                };
+
+                let mut reader = FileReader::new_with_projection_info(
+                    Cursor::new(source.memslice.as_ref()),
+                    source.metadata.as_ref().clone(),
+                    std::mem::take(&mut projection_info),
+                    None,
+                );
+                reader.set_current_block(source.block_offset);
+                reader.set_scratches((
+                    std::mem::take(&mut data_scratch),
+                    std::mem::take(&mut message_scratch),
+                ));
+
+                if state.slice.start > 0 {
+                    // Skip over all blocks that the slice would skip anyway.
+                    let new_offset = reader.skip_blocks_till_limit(state.slice.start as u64)?;
+
+                    state.row_idx_offset += (state.slice.start as u64 - new_offset) as IdxSize;
+                    state.slice = new_offset as usize..new_offset as usize + state.slice.len();
+
+                    // If we skip the entire file. Don't even try to read from it.
+                    if reader.get_current_block() == reader.metadata().blocks.len() {
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                        state.source.take();
+                        state.source_idx += 1;
+                        continue;
+                    }
+                }
+
+                let mut batch = Batch {
+                    row_idx_offset: state.row_idx_offset,
+                    block_start: reader.get_current_block(),
+                    num_rows: 0,
+                };
+
+                // We don't yet want to commit these values to the state in case this batch gets
+                // cancelled.
+                let mut uncommitted_slice = state.slice.clone();
+                let mut uncommitted_row_idx_offset = state.row_idx_offset;
+                while !state.slice.is_empty() {
+                    let mut is_batch_complete = false;
+
+                    match reader.next_record_batch() {
+                        None if batch.num_rows == 0 => break,
+
+                        // If we have no more record batches available, we want to send what is
+                        // left.
+                        None => is_batch_complete = true,
+                        Some(record_batch) => {
+                            let rb_num_rows = record_batch?.length()? as usize;
+                            batch.num_rows += rb_num_rows;
+
+                            // We need to ensure that we are not overflowing the IdxSize maximum
+                            // capacity.
+                            let rb_num_rows = IdxSize::try_from(rb_num_rows)
+                                .map_err(|_| ROW_COUNT_OVERFLOW_ERR)?;
+                            uncommitted_row_idx_offset = uncommitted_row_idx_offset
+                                .checked_add(rb_num_rows)
+                                .ok_or(ROW_COUNT_OVERFLOW_ERR)?;
+                        },
+                    }
+
+                    let current_block = reader.get_current_block();
+
+                    // Subdivide into batches for large files.
+                    is_batch_complete |= batch.num_rows >= batch_size_limit;
+                    // Subdivide into batches if the file is sliced.
+                    is_batch_complete |= batch.num_rows >= sliced_batch_size_limit;
+                    // Subdivide into batches for small files.
+                    is_batch_complete |= current_block - batch.block_start >= batch_block_limit;
+
+                    // Batch blocks such that we send appropriately sized morsels. We guarantee a
+                    // lower bound here, but not an upper bound.
+                    if is_batch_complete {
+                        let batch_slice = slice_take(&mut uncommitted_slice, batch.num_rows);
+                        let batch_slice_len = batch_slice.len();
+                        let block_range = batch.block_start..current_block;
+
+                        let message = BatchMessage {
+                            memslice: source.memslice.clone(),
+                            metadata: source.metadata.clone(),
+                            file_path: source.file_path.clone(),
+                            row_idx_offset: batch.row_idx_offset,
+                            slice: batch_slice,
+                            morsel_seq_base: state.morsel_seq,
+                            block_range,
+                        };
+
+                        if source_token.stop_requested() {
+                            break 'source_loop;
+                        }
+
+                        if batch_tx.send(message).await.is_err() {
+                            // This should only happen if the receiver of the decoder
+                            // has broken off, meaning no further input will be needed.
+                            break 'source_loop;
+                        }
+
+                        // Commit the changes to the state.
+                        // Now, we know that the a decoder will process it.
+                        //
+                        // This might generate several morsels if the record batch is very large.
+                        state.morsel_seq += batch_slice_len.div_ceil(max_morsel_size) as u64;
+                        state.slice = uncommitted_slice.clone();
+                        state.row_idx_offset = uncommitted_row_idx_offset;
+                        source.block_offset = current_block;
+
+                        batch = Batch {
+                            row_idx_offset: state.row_idx_offset,
+                            block_start: current_block,
+                            num_rows: 0,
+                        };
+                    }
+                }
+
+                (data_scratch, message_scratch) = reader.take_scratches();
+                projection_info = reader.take_projection_info();
+
+                state.source.take();
+                state.source_idx += 1;
+            }
+
+            drop(batch_tx); // Inform decoder tasks to stop.
+            for decoder_task in decoder_tasks {
+                decoder_task.await?;
+            }
+
+            PolarsResult::Ok(())
+        }));
+    }
+}
diff --git a/crates/polars-stream/src/nodes/io_sources/mod.rs b/crates/polars-stream/src/nodes/io_sources/mod.rs
new file mode 100644
index 000000000000..ce14ad3b0f7a
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/mod.rs
@@ -0,0 +1 @@
+pub mod ipc;
diff --git a/crates/polars-stream/src/nodes/joins/in_memory.rs b/crates/polars-stream/src/nodes/joins/in_memory.rs
new file mode 100644
index 000000000000..a98c23a435b0
--- /dev/null
+++ b/crates/polars-stream/src/nodes/joins/in_memory.rs
@@ -0,0 +1,119 @@
+use std::sync::Arc;
+
+use polars_core::schema::Schema;
+
+use crate::nodes::compute_node_prelude::*;
+use crate::nodes::in_memory_sink::InMemorySinkNode;
+use crate::nodes::in_memory_source::InMemorySourceNode;
+
+enum InMemoryJoinState {
+    Sink {
+        left: InMemorySinkNode,
+        right: InMemorySinkNode,
+    },
+    Source(InMemorySourceNode),
+    Done,
+}
+
+pub struct InMemoryJoinNode {
+    state: InMemoryJoinState,
+    num_pipelines: usize,
+    joiner: Arc<dyn Fn(DataFrame, DataFrame) -> PolarsResult<DataFrame> + Send + Sync>,
+}
+
+impl InMemoryJoinNode {
+    pub fn new(
+        left_input_schema: Arc<Schema>,
+        right_input_schema: Arc<Schema>,
+        joiner: Arc<dyn Fn(DataFrame, DataFrame) -> PolarsResult<DataFrame> + Send + Sync>,
+    ) -> Self {
+        Self {
+            state: InMemoryJoinState::Sink {
+                left: InMemorySinkNode::new(left_input_schema),
+                right: InMemorySinkNode::new(right_input_schema),
+            },
+            num_pipelines: 0,
+            joiner,
+        }
+    }
+}
+
+impl ComputeNode for InMemoryJoinNode {
+    fn name(&self) -> &str {
+        "in_memory_join"
+    }
+
+    fn initialize(&mut self, num_pipelines: usize) {
+        self.num_pipelines = num_pipelines;
+    }
+
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
+        assert!(recv.len() == 2 && send.len() == 1);
+
+        // If the output doesn't want any more data, transition to being done.
+        if send[0] == PortState::Done && !matches!(self.state, InMemoryJoinState::Done) {
+            self.state = InMemoryJoinState::Done;
+        }
+
+        // If the input is done, transition to being a source.
+        if let InMemoryJoinState::Sink { left, right } = &mut self.state {
+            if recv[0] == PortState::Done && recv[1] == PortState::Done {
+                let left_df = left.get_output()?.unwrap();
+                let right_df = right.get_output()?.unwrap();
+                let mut source_node =
+                    InMemorySourceNode::new(Arc::new((self.joiner)(left_df, right_df)?));
+                source_node.initialize(self.num_pipelines);
+                self.state = InMemoryJoinState::Source(source_node);
+            }
+        }
+
+        match &mut self.state {
+            InMemoryJoinState::Sink { left, right, .. } => {
+                left.update_state(&mut recv[0..1], &mut [])?;
+                right.update_state(&mut recv[1..2], &mut [])?;
+                send[0] = PortState::Blocked;
+            },
+            InMemoryJoinState::Source(source_node) => {
+                recv[0] = PortState::Done;
+                recv[1] = PortState::Done;
+                source_node.update_state(&mut [], send)?;
+            },
+            InMemoryJoinState::Done => {
+                recv[0] = PortState::Done;
+                recv[1] = PortState::Done;
+                send[0] = PortState::Done;
+            },
+        }
+        Ok(())
+    }
+
+    fn is_memory_intensive_pipeline_blocker(&self) -> bool {
+        matches!(self.state, InMemoryJoinState::Sink { .. })
+    }
+
+    fn spawn<'env, 's>(
+        &'env mut self,
+        scope: &'s TaskScope<'s, 'env>,
+        recv_ports: &mut [Option<RecvPort<'_>>],
+        send_ports: &mut [Option<SendPort<'_>>],
+        state: &'s ExecutionState,
+        join_handles: &mut Vec<JoinHandle<PolarsResult<()>>>,
+    ) {
+        assert!(recv_ports.len() == 2);
+        assert!(send_ports.len() == 1);
+        match &mut self.state {
+            InMemoryJoinState::Sink { left, right, .. } => {
+                if recv_ports[0].is_some() {
+                    left.spawn(scope, &mut recv_ports[0..1], &mut [], state, join_handles);
+                }
+                if recv_ports[1].is_some() {
+                    right.spawn(scope, &mut recv_ports[1..2], &mut [], state, join_handles);
+                }
+            },
+            InMemoryJoinState::Source(source) => {
+                source.spawn(scope, &mut [], send_ports, state, join_handles)
+            },
+            InMemoryJoinState::Done => unreachable!(),
+        }
+    }
+}
diff --git a/crates/polars-stream/src/nodes/joins/mod.rs b/crates/polars-stream/src/nodes/joins/mod.rs
new file mode 100644
index 000000000000..fa2e12699f5e
--- /dev/null
+++ b/crates/polars-stream/src/nodes/joins/mod.rs
@@ -0,0 +1 @@
+pub mod in_memory;
diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs
index 4fb42daddd6b..effebe67c34b 100644
--- a/crates/polars-stream/src/nodes/mod.rs
+++ b/crates/polars-stream/src/nodes/mod.rs
@@ -5,6 +5,8 @@ pub mod in_memory_sink;
 pub mod in_memory_source;
 pub mod input_independent_select;
 pub mod io_sinks;
+pub mod io_sources;
+pub mod joins;
 pub mod map;
 pub mod multiplexer;
 pub mod ordered_union;
diff --git a/crates/polars-stream/src/nodes/parquet_source/row_group_data_fetch.rs b/crates/polars-stream/src/nodes/parquet_source/row_group_data_fetch.rs
index 9a87f0f91b7c..bf2e7e60ea6e 100644
--- a/crates/polars-stream/src/nodes/parquet_source/row_group_data_fetch.rs
+++ b/crates/polars-stream/src/nodes/parquet_source/row_group_data_fetch.rs
@@ -1,7 +1,7 @@
 use std::future::Future;
 use std::sync::Arc;
 
-use polars_core::prelude::{ArrowSchema, InitHashMaps, PlHashMap};
+use polars_core::prelude::{ArrowSchema, PlHashMap};
 use polars_core::series::IsSorted;
 use polars_core::utils::operation_exceeded_idxsize_msg;
 use polars_error::{polars_err, PolarsResult};
@@ -197,46 +197,37 @@ impl RowGroupDataFetcher {
                             mem_slice,
                         }
                     } else if let Some(columns) = projection.as_ref() {
-                        let ranges = get_row_group_byte_ranges_for_projection(
+                        let mut ranges = get_row_group_byte_ranges_for_projection(
                             &row_group_metadata,
                             columns.as_ref(),
                         )
                         .collect::<Vec<_>>();
 
-                        let bytes = current_byte_source.get_ranges(ranges.as_ref()).await?;
+                        let n_ranges = ranges.len();
 
-                        assert_eq!(bytes.len(), ranges.len());
+                        let bytes_map = current_byte_source.get_ranges(&mut ranges).await?;
 
-                        let mut bytes_map = PlHashMap::with_capacity(ranges.len());
-
-                        for (range, bytes) in ranges.iter().zip(bytes) {
-                            memory_prefetch_func(bytes.as_ref());
-                            let v = bytes_map.insert(range.start, bytes);
-                            debug_assert!(v.is_none(), "duplicate range start {}", range.start);
-                        }
+                        assert_eq!(bytes_map.len(), n_ranges);
 
                         FetchedBytes::BytesMap(bytes_map)
                     } else {
-                        // We have a dedicated code-path for a full projection that performs a
-                        // single range request for the entire row group. During testing this
-                        // provided much higher throughput from cloud than making multiple range
-                        // request with `get_ranges()`.
-                        let full_range = row_group_metadata.full_byte_range();
-                        let full_range = full_range.start as usize..full_range.end as usize;
-
-                        let mem_slice = {
-                            let full_range_2 = full_range.clone();
-                            task_handles_ext::AbortOnDropHandle(io_runtime.spawn(async move {
-                                current_byte_source.get_range(full_range_2).await
-                            }))
-                            .await
-                            .unwrap()?
-                        };
+                        // We still prefer `get_ranges()` over a single `get_range()` for downloading
+                        // the entire row group, as it can have less memory-copying. A single `get_range()`
+                        // would naively concatenate the memory blocks of the entire row group, while
+                        // `get_ranges()` can skip concatenation since the downloaded blocks are
+                        // aligned to the columns.
+                        let mut ranges = row_group_metadata
+                            .byte_ranges_iter()
+                            .map(|x| x.start as usize..x.end as usize)
+                            .collect::<Vec<_>>();
 
-                        FetchedBytes::MemSlice {
-                            offset: full_range.start,
-                            mem_slice,
-                        }
+                        let n_ranges = ranges.len();
+
+                        let bytes_map = current_byte_source.get_ranges(&mut ranges).await?;
+
+                        assert_eq!(bytes_map.len(), n_ranges);
+
+                        FetchedBytes::BytesMap(bytes_map)
                     };
 
                     PolarsResult::Ok(RowGroupData {
diff --git a/crates/polars-stream/src/physical_plan/fmt.rs b/crates/polars-stream/src/physical_plan/fmt.rs
index e0735144da79..7ef74d5b0ad9 100644
--- a/crates/polars-stream/src/physical_plan/fmt.rs
+++ b/crates/polars-stream/src/physical_plan/fmt.rs
@@ -200,16 +200,34 @@ fn visualize_plan_rec(
 
             (out, &[][..])
         },
-        PhysNodeKind::GroupBy { input, key, aggs } => {
-            let label = "group-by";
-            (
-                format!(
-                    "{label}\\nkey:\\n{}\\naggs:\\n{}",
-                    fmt_exprs(key, expr_arena),
-                    fmt_exprs(aggs, expr_arena)
-                ),
-                from_ref(input),
+        PhysNodeKind::GroupBy { input, key, aggs } => (
+            format!(
+                "group-by\\nkey:\\n{}\\naggs:\\n{}",
+                fmt_exprs(key, expr_arena),
+                fmt_exprs(aggs, expr_arena)
+            ),
+            from_ref(input),
+        ),
+        PhysNodeKind::InMemoryJoin {
+            input_left,
+            input_right,
+            left_on,
+            right_on,
+            args,
+        } => {
+            let mut label = "in-memory-join".to_string();
+            write!(label, r"\nleft_on:\n{}", fmt_exprs(left_on, expr_arena)).unwrap();
+            write!(label, r"\nright_on:\n{}", fmt_exprs(right_on, expr_arena)).unwrap();
+            write!(
+                label,
+                r"\nhow: {}",
+                escape_graphviz(&format!("{:?}", args.how))
             )
+            .unwrap();
+            if args.join_nulls {
+                write!(label, r"\njoin-nulls").unwrap();
+            }
+            (label, &[*input_left, *input_right][..])
         },
     };
 
diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs
index d57a8667c479..063c94081dbc 100644
--- a/crates/polars-stream/src/physical_plan/lower_ir.rs
+++ b/crates/polars-stream/src/physical_plan/lower_ir.rs
@@ -1,10 +1,11 @@
 use std::sync::Arc;
 
+use polars_core::frame::DataFrame;
 use polars_core::prelude::{InitHashMaps, PlHashMap, PlIndexMap};
 use polars_core::schema::Schema;
 use polars_error::{polars_ensure, PolarsResult};
 use polars_plan::plans::expr_ir::{ExprIR, OutputName};
-use polars_plan::plans::{AExpr, FunctionIR, IRAggExpr, IR};
+use polars_plan::plans::{AExpr, FileScan, FunctionIR, IRAggExpr, IR};
 use polars_plan::prelude::{FileType, SinkType};
 use polars_utils::arena::{Arena, Node};
 use polars_utils::itertools::Itertools;
@@ -314,23 +315,67 @@ pub fn lower_ir(
                 sources: scan_sources,
                 file_info,
                 hive_parts,
-                output_schema,
+                output_schema: scan_output_schema,
                 scan_type,
-                predicate,
+                mut predicate,
                 file_options,
             } = v.clone()
             else {
                 unreachable!();
             };
 
-            PhysNodeKind::FileScan {
-                scan_sources,
-                file_info,
-                hive_parts,
-                output_schema,
-                scan_type,
-                predicate,
-                file_options,
+            if scan_sources.is_empty() {
+                // If there are no sources, just provide an empty in-memory source with the right
+                // schema.
+                PhysNodeKind::InMemorySource {
+                    df: Arc::new(DataFrame::empty_with_schema(output_schema.as_ref())),
+                }
+            } else {
+                if matches!(scan_type, FileScan::Ipc { .. }) {
+                    // @TODO: All the things the IPC source does not support yet.
+                    if hive_parts.is_some()
+                        || scan_sources.is_cloud_url()
+                        || file_options.allow_missing_columns
+                        || file_options.slice.is_some_and(|(offset, _)| offset < 0)
+                    {
+                        todo!();
+                    }
+                }
+
+                // If the node itself would just filter on the whole output then there is no real
+                // reason to do it in the source node itself.
+                let do_filter_in_separate_node =
+                    predicate.is_some() && matches!(scan_type, FileScan::Ipc { .. });
+
+                if do_filter_in_separate_node {
+                    assert!(file_options.slice.is_none()); // Invariant of the scan
+                    let predicate = predicate.take().unwrap();
+
+                    let input = phys_sm.insert(PhysNode::new(
+                        output_schema.clone(),
+                        PhysNodeKind::FileScan {
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            output_schema: scan_output_schema,
+                            scan_type,
+                            predicate: None,
+                            file_options,
+                        },
+                    ));
+
+                    PhysNodeKind::Filter { input, predicate }
+                } else {
+                    PhysNodeKind::FileScan {
+                        scan_sources,
+                        file_info,
+                        hive_parts,
+                        output_schema: scan_output_schema,
+                        scan_type,
+                        predicate,
+                        file_options,
+                    }
+                }
             }
         },
 
@@ -415,7 +460,29 @@ pub fn lower_ir(
             }
             return Ok(node);
         },
-        IR::Join { .. } => todo!(),
+        IR::Join {
+            input_left,
+            input_right,
+            schema: _,
+            left_on,
+            right_on,
+            options,
+        } => {
+            let input_left = *input_left;
+            let input_right = *input_right;
+            let left_on = left_on.clone();
+            let right_on = right_on.clone();
+            let args = options.args.clone();
+            let phys_left = lower_ir!(input_left)?;
+            let phys_right = lower_ir!(input_right)?;
+            PhysNodeKind::InMemoryJoin {
+                input_left: phys_left,
+                input_right: phys_right,
+                left_on,
+                right_on,
+                args,
+            }
+        },
         IR::Distinct { .. } => todo!(),
         IR::ExtContext { .. } => todo!(),
         IR::Invalid => unreachable!(),
diff --git a/crates/polars-stream/src/physical_plan/mod.rs b/crates/polars-stream/src/physical_plan/mod.rs
index 3b4643100249..707c2a53dec2 100644
--- a/crates/polars-stream/src/physical_plan/mod.rs
+++ b/crates/polars-stream/src/physical_plan/mod.rs
@@ -5,6 +5,7 @@ use polars_core::frame::DataFrame;
 use polars_core::prelude::{IdxSize, InitHashMaps, PlHashMap, SortMultipleOptions};
 use polars_core::schema::{Schema, SchemaRef};
 use polars_error::PolarsResult;
+use polars_ops::frame::JoinArgs;
 use polars_plan::plans::hive::HivePartitions;
 use polars_plan::plans::{AExpr, DataFrameUdf, FileInfo, FileScan, ScanSources, IR};
 use polars_plan::prelude::expr_ir::ExprIR;
@@ -100,6 +101,9 @@ pub enum PhysNodeKind {
         input: PhysNodeKey,
     },
 
+    /// Generic fallback for (as-of-yet) unsupported streaming mappings.
+    /// Fully sinks all data to an in-memory data frame and uses the in-memory
+    /// engine to perform the map.
     InMemoryMap {
         input: PhysNodeKey,
         map: Arc<dyn DataFrameUdf>,
@@ -149,6 +153,17 @@ pub enum PhysNodeKind {
         key: Vec<ExprIR>,
         aggs: Vec<ExprIR>,
     },
+
+    /// Generic fallback for (as-of-yet) unsupported streaming joins.
+    /// Fully sinks all data to in-memory data frames and uses the in-memory
+    /// engine to perform the join.
+    InMemoryJoin {
+        input_left: PhysNodeKey,
+        input_right: PhysNodeKey,
+        left_on: Vec<ExprIR>,
+        right_on: Vec<ExprIR>,
+        args: JoinArgs,
+    },
 }
 
 #[recursive::recursive]
@@ -198,6 +213,16 @@ fn insert_multiplexers(
                 insert_multiplexers(*input, phys_sm, referenced);
             },
 
+            PhysNodeKind::InMemoryJoin {
+                input_left,
+                input_right,
+                ..
+            } => {
+                let input_right = *input_right;
+                insert_multiplexers(*input_left, phys_sm, referenced);
+                insert_multiplexers(input_right, phys_sm, referenced);
+            },
+
             PhysNodeKind::OrderedUnion { inputs } | PhysNodeKind::Zip { inputs, .. } => {
                 for input in inputs.clone() {
                     insert_multiplexers(input, phys_sm, referenced);
diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs
index 472cf982a253..b701696972a9 100644
--- a/crates/polars-stream/src/physical_plan/to_graph.rs
+++ b/crates/polars-stream/src/physical_plan/to_graph.rs
@@ -8,6 +8,7 @@ use polars_expr::planner::{create_physical_expr, get_expr_depth_limit, Expressio
 use polars_expr::reduce::into_reduction;
 use polars_expr::state::ExecutionState;
 use polars_mem_engine::create_physical_plan;
+use polars_plan::dsl::JoinOptions;
 use polars_plan::global::_set_n_rows_for_scan;
 use polars_plan::plans::expr_ir::ExprIR;
 use polars_plan::plans::{AExpr, ArenaExprIter, Context, IR};
@@ -366,6 +367,23 @@ fn to_graph_rec<'a>(
                             todo!()
                         }
                     },
+                    FileScan::Ipc {
+                        options,
+                        cloud_options,
+                        metadata: first_metadata,
+                    } => ctx.graph.add_node(
+                        nodes::io_sources::ipc::IpcSourceNode::new(
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            options,
+                            cloud_options,
+                            file_options,
+                            first_metadata,
+                        )?,
+                        [],
+                    ),
                     _ => todo!(),
                 }
             }
@@ -410,6 +428,61 @@ fn to_graph_rec<'a>(
                 [input_key],
             )
         },
+
+        InMemoryJoin {
+            input_left,
+            input_right,
+            left_on,
+            right_on,
+            args,
+        } => {
+            let left_input_key = to_graph_rec(*input_left, ctx)?;
+            let right_input_key = to_graph_rec(*input_right, ctx)?;
+            let left_input_schema = ctx.phys_sm[*input_left].output_schema.clone();
+            let right_input_schema = ctx.phys_sm[*input_right].output_schema.clone();
+
+            let mut lp_arena = Arena::default();
+            let left_lmdf = Arc::new(LateMaterializedDataFrame::default());
+            let right_lmdf = Arc::new(LateMaterializedDataFrame::default());
+
+            let left_node = lp_arena.add(left_lmdf.clone().as_ir_node(left_input_schema.clone()));
+            let right_node =
+                lp_arena.add(right_lmdf.clone().as_ir_node(right_input_schema.clone()));
+            let join_node = lp_arena.add(IR::Join {
+                input_left: left_node,
+                input_right: right_node,
+                schema: node.output_schema.clone(),
+                left_on: left_on.clone(),
+                right_on: right_on.clone(),
+                options: Arc::new(JoinOptions {
+                    allow_parallel: true,
+                    force_parallel: false,
+                    args: args.clone(),
+                    rows_left: (None, 0),
+                    rows_right: (None, 0),
+                }),
+            });
+
+            let executor = Mutex::new(create_physical_plan(
+                join_node,
+                &mut lp_arena,
+                ctx.expr_arena,
+            )?);
+
+            ctx.graph.add_node(
+                nodes::joins::in_memory::InMemoryJoinNode::new(
+                    left_input_schema,
+                    right_input_schema,
+                    Arc::new(move |left, right| {
+                        left_lmdf.set_materialized_dataframe(left);
+                        right_lmdf.set_materialized_dataframe(right);
+                        let mut state = ExecutionState::new();
+                        executor.lock().execute(&mut state)
+                    }),
+                ),
+                [left_input_key, right_input_key],
+            )
+        },
     };
 
     ctx.phys_to_graph.insert(phys_node_key, graph_key);
diff --git a/crates/polars-utils/src/mmap.rs b/crates/polars-utils/src/mmap.rs
index 0ac1a643d93d..ef07714d591f 100644
--- a/crates/polars-utils/src/mmap.rs
+++ b/crates/polars-utils/src/mmap.rs
@@ -130,6 +130,12 @@ mod private {
             out
         }
     }
+
+    impl From<bytes::Bytes> for MemSlice {
+        fn from(value: bytes::Bytes) -> Self {
+            Self::from_bytes(value)
+        }
+    }
 }
 
 use memmap::MmapOptions;
diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml
index 9ff45610a3c7..7c054c21f59b 100644
--- a/crates/polars/Cargo.toml
+++ b/crates/polars/Cargo.toml
@@ -417,12 +417,21 @@ docs-selection = [
   "replace",
   "approx_unique",
   "unique_counts",
+  "polars_cloud",
+  "serde",
+  "ir_serde",
+  "cloud",
+  "async",
+  "cloud_write",
 ]
 
 bench = [
   "lazy",
 ]
 
+# All features expect python
+full = ["docs-selection", "performant", "fmt"]
+
 [package.metadata.docs.rs]
 # all-features = true
 features = ["docs-selection"]
diff --git a/docs/source/user-guide/expressions/index.md b/docs/source/user-guide/expressions/index.md
index 7e4b6f0a8b1a..b4442d6f4289 100644
--- a/docs/source/user-guide/expressions/index.md
+++ b/docs/source/user-guide/expressions/index.md
@@ -4,19 +4,21 @@ We [introduced the concept of “expressions” in a previous section](../concep
 In this section we will focus on exploring the types of expressions that Polars offers.
 Each section gives an overview of what they do and provides additional examples.
 
+<!-- dprint-ignore-start -->
 - Essentials:
-  - [Basic operations](basic-operations.md) – how to do basic operations on dataframe columns, like arithmetic calculations, comparisons, and other common, general-purpose operations
-  - [Expression expansion](expression-expansion.md) – what is expression expansion and how to use it
-  - [Casting](casting.md) – how to convert / cast values to different data types
+    - [Basic operations](basic-operations.md) – how to do basic operations on dataframe columns, like arithmetic calculations, comparisons, and other common, general-purpose operations
+    - [Expression expansion](expression-expansion.md) – what is expression expansion and how to use it
+    - [Casting](casting.md) – how to convert / cast values to different data types
 - How to work with specific types of data or data type namespaces:
-  - [Strings](strings.md) – how to work with strings and the namespace `str`
-  - [Lists and arrays](lists-and-arrays.md) – the differences between the data types `List` and `Array`, when to use them, and how to use them
-  - [Categorical data and enums](categorical-data-and-enums.md) – the differences between the data types `Categorical` and `Enum`, when to use them, and how to use them
-  - [Structs](structs.md) – when to use the data type `Struct` and how to use it
-  - [Missing data](missing-data.md) – how to work with missing data and how to fill missing data
+    - [Strings](strings.md) – how to work with strings and the namespace `str`
+    - [Lists and arrays](lists-and-arrays.md) – the differences between the data types `List` and `Array`, when to use them, and how to use them
+    - [Categorical data and enums](categorical-data-and-enums.md) – the differences between the data types `Categorical` and `Enum`, when to use them, and how to use them
+    - [Structs](structs.md) – when to use the data type `Struct` and how to use it
+    - [Missing data](missing-data.md) – how to work with missing data and how to fill missing data
 - Types of operations:
-  - [Aggregation](aggregation.md) – how to work with aggregating contexts like `group_by`
-  - [Window functions](window-functions.md) – how to apply window functions over columns in a dataframe
-  - [Folds](folds.md) – how to perform arbitrary computations horizontally across columns
+    - [Aggregation](aggregation.md) – how to work with aggregating contexts like `group_by`
+    - [Window functions](window-functions.md) – how to apply window functions over columns in a dataframe
+    - [Folds](folds.md) – how to perform arbitrary computations horizontally across columns
 - [User-defined Python functions](user-defined-python-functions.md) – how to apply user-defined Python functions to dataframe columns or to column values
 - [Numpy functions](numpy-functions.md) – how to use NumPy native functions on Polars dataframes and series
+<!-- dprint-ignore-end -->
diff --git a/docs/source/user-guide/transformations/joins.md b/docs/source/user-guide/transformations/joins.md
index b135a45f53d3..5b55386b70f0 100644
--- a/docs/source/user-guide/transformations/joins.md
+++ b/docs/source/user-guide/transformations/joins.md
@@ -15,14 +15,14 @@ If you want to learn about joins in general and how to work with them in Polars,
 === ":fontawesome-brands-python: Python"
 
     [:material-api: `join`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join.html)
-    [:material-api: `join_where`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html)
-    [:material-api: `join_asof`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_where.html)
+    [:material-api: `join_where`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_where.html)
+    [:material-api: `join_asof`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html)
 
 === ":fontawesome-brands-rust: Rust"
 
     [:material-api: `join`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.DataFrameJoinOps.html#method.join)
     ([:material-flag-plus: semi_anti_join](/user-guide/installation/#feature-flags "Enable the feature flag semi_anti_join for semi and for anti joins"){.feature-flag} needed for some options.)
-    [:material-api: `join_asof_by`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoin.html#method.join_asof)
+    [:material-api: `join_asof_by`](https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoinBy.html#method.join_asof_by)
     [:material-flag-plus: Available on feature asof_join](/user-guide/installation/#feature-flags "To use this functionality enable the feature flag asof_join"){.feature-flag}
     [:material-api: `join_where`](https://docs.rs/polars/latest/polars/prelude/struct.JoinBuilder.html#method.join_where)
     [:material-flag-plus: Available on feature iejoin](/user-guide/installation/#feature-flags "To use this functionality enable the feature flag iejoin"){.feature-flag}
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
index fc3e520e5ecc..a2ff3d9882da 100644
--- a/py-polars/Cargo.toml
+++ b/py-polars/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "py-polars"
-version = "1.12.0"
+version = "1.13.1"
 edition = "2021"
 
 [lib]
diff --git a/py-polars/docs/source/reference/expressions/meta.rst b/py-polars/docs/source/reference/expressions/meta.rst
index e70283c4c9b4..514067e0166f 100644
--- a/py-polars/docs/source/reference/expressions/meta.rst
+++ b/py-polars/docs/source/reference/expressions/meta.rst
@@ -11,6 +11,7 @@ The following methods are available under the `expr.meta` attribute.
 
     Expr.meta.eq
     Expr.meta.has_multiple_outputs
+    Expr.meta.is_column
     Expr.meta.is_column_selection
     Expr.meta.is_regex_projection
     Expr.meta.ne
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 4ff2752fdfb5..49c2e2470534 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -9236,7 +9236,7 @@ def n_chunks(self, strategy: Literal["first"] = ...) -> int: ...
     @overload
     def n_chunks(self, strategy: Literal["all"]) -> list[int]: ...
 
-    def n_chunks(self, strategy: str = "first") -> int | list[int]:
+    def n_chunks(self, strategy: Literal["first", "all"] = "first") -> int | list[int]:
         """
         Get number of chunks used by the ChunkedArrays of this DataFrame.
 
diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py
index bb538b4f01e8..5543f629a620 100644
--- a/py-polars/polars/datatypes/classes.py
+++ b/py-polars/polars/datatypes/classes.py
@@ -12,6 +12,7 @@
 import polars.functions as F
 
 with contextlib.suppress(ImportError):  # Module not available when building docs
+    import polars.polars as plr
     from polars.polars import dtype_str_repr as _dtype_str_repr
 
 if TYPE_CHECKING:
@@ -91,7 +92,7 @@ def from_python(cls, py_type: PythonDataType) -> PolarsDataType:  # noqa: D102
         ...
 
     @classmethod
-    def to_python(self) -> PythonDataType:  # noqa: D102
+    def to_python(cls) -> PythonDataType:  # noqa: D102
         ...
 
 
@@ -238,6 +239,44 @@ def to_python(self) -> PythonDataType:
 class NumericType(DataType):
     """Base class for numeric data types."""
 
+    @classmethod
+    def max(cls) -> pl.Expr:
+        """
+        Return a literal expression representing the maximum value of this data type.
+
+        Examples
+        --------
+        >>> pl.select(pl.Int8.max() == 127)
+        shape: (1, 1)
+        ┌─────────┐
+        │ literal │
+        │ ---     │
+        │ bool    │
+        ╞═════════╡
+        │ true    │
+        └─────────┘
+        """
+        return pl.Expr._from_pyexpr(plr._get_dtype_max(cls))
+
+    @classmethod
+    def min(cls) -> pl.Expr:
+        """
+        Return a literal expression representing the minimum value of this data type.
+
+        Examples
+        --------
+        >>> pl.select(pl.Int8.min() == -128)
+        shape: (1, 1)
+        ┌─────────┐
+        │ literal │
+        │ ---     │
+        │ bool    │
+        ╞═════════╡
+        │ true    │
+        └─────────┘
+        """
+        return pl.Expr._from_pyexpr(plr._get_dtype_min(cls))
+
 
 class IntegerType(NumericType):
     """Base class for integer data types."""
diff --git a/py-polars/polars/io/database/_executor.py b/py-polars/polars/io/database/_executor.py
index 1cbaf4679db9..85401d582fe7 100644
--- a/py-polars/polars/io/database/_executor.py
+++ b/py-polars/polars/io/database/_executor.py
@@ -511,7 +511,7 @@ def execute(
             result = cursor_execute(query, *positional_options)
 
         # note: some cursors execute in-place, some access results via a property
-        result = self.cursor if result is None else result
+        result = self.cursor if (result is None or result is True) else result
         if self.driver_name == "duckdb":
             result = result.cursor
 
diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py
index 4cb11506b3f6..9d3cedb47e85 100644
--- a/py-polars/polars/selectors.py
+++ b/py-polars/polars/selectors.py
@@ -385,10 +385,6 @@ def __and__(self, other: Any) -> Expr: ...
     def __and__(self, other: Any) -> SelectorType | Expr:
         if is_column(other):
             colname = other.meta.output_name()
-            if self._attrs["name"] == "by_name" and (
-                params := self._attrs["params"]
-            ).get("require_all", True):
-                return by_name(*params["*names"], colname)
             other = by_name(colname)
         if is_selector(other):
             return _selector_proxy_(
@@ -399,6 +395,12 @@ def __and__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__and__(other)
 
+    def __rand__(self, other: Any) -> Expr:
+        if is_column(other):
+            colname = other.meta.output_name()
+            return by_name(colname) & self
+        return self.as_expr().__rand__(other)
+
     @overload
     def __or__(self, other: SelectorType) -> SelectorType: ...
 
@@ -417,6 +419,11 @@ def __or__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__or__(other)
 
+    def __ror__(self, other: Any) -> Expr:
+        if is_column(other):
+            other = by_name(other.meta.output_name())
+        return self.as_expr().__ror__(other)
+
     @overload
     def __xor__(self, other: SelectorType) -> SelectorType: ...
 
@@ -435,21 +442,6 @@ def __xor__(self, other: Any) -> SelectorType | Expr:
         else:
             return self.as_expr().__or__(other)
 
-    def __rand__(self, other: Any) -> Expr:
-        if is_column(other):
-            colname = other.meta.output_name()
-            if self._attrs["name"] == "by_name" and (
-                params := self._attrs["params"]
-            ).get("require_all", True):
-                return by_name(colname, *params["*names"])
-            other = by_name(colname)
-        return self.as_expr().__rand__(other)
-
-    def __ror__(self, other: Any) -> Expr:
-        if is_column(other):
-            other = by_name(other.meta.output_name())
-        return self.as_expr().__ror__(other)
-
     def __rxor__(self, other: Any) -> Expr:
         if is_column(other):
             other = by_name(other.meta.output_name())
diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs
index 859609828d19..f73577319545 100644
--- a/py-polars/src/lib.rs
+++ b/py-polars/src/lib.rs
@@ -20,7 +20,7 @@ use polars_python::lazygroupby::PyLazyGroupBy;
 use polars_python::series::PySeries;
 #[cfg(feature = "sql")]
 use polars_python::sql::PySQLContext;
-use polars_python::{exceptions, functions};
+use polars_python::{datatypes, exceptions, functions};
 use pyo3::prelude::*;
 use pyo3::{wrap_pyfunction, wrap_pymodule};
 
@@ -279,6 +279,12 @@ fn polars(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(functions::escape_regex))
         .unwrap();
 
+    // Dtype helpers
+    m.add_wrapped(wrap_pyfunction!(datatypes::_get_dtype_max))
+        .unwrap();
+    m.add_wrapped(wrap_pyfunction!(datatypes::_get_dtype_min))
+        .unwrap();
+
     // Exceptions - Errors
     m.add(
         "PolarsError",
diff --git a/py-polars/tests/unit/io/test_lazy_ipc.py b/py-polars/tests/unit/io/test_lazy_ipc.py
index 0d67b6b06f89..ec75d495ce8d 100644
--- a/py-polars/tests/unit/io/test_lazy_ipc.py
+++ b/py-polars/tests/unit/io/test_lazy_ipc.py
@@ -88,6 +88,7 @@ def test_ipc_list_arg(io_files_path: Path) -> None:
     assert df.row(0) == ("vegetables", 45, 0.5, 2)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_scan_ipc_local_with_async(
     capfd: Any,
     monkeypatch: Any,
diff --git a/py-polars/tests/unit/operations/test_is_sorted.py b/py-polars/tests/unit/operations/test_is_sorted.py
index f81076ced502..093dae47bfbf 100644
--- a/py-polars/tests/unit/operations/test_is_sorted.py
+++ b/py-polars/tests/unit/operations/test_is_sorted.py
@@ -384,12 +384,16 @@ def test_with_pd(
     test_with_pd(dfbpd, dfapd, "b", "left", joined)
 
     joined = dfb.join(dfa, on="b", how="inner")
-    assert not joined["a"].flags["SORTED_ASC"]
+    if (joined["a"] != sorted(joined["a"])).any():
+        assert not joined["a"].flags["SORTED_ASC"]
 
     joined = dfb.join(dfa, on="b", how="semi")
-    assert not joined["a"].flags["SORTED_ASC"]
+    if (joined["a"] != sorted(joined["a"])).any():
+        assert not joined["a"].flags["SORTED_ASC"]
+
     joined = dfb.join(dfa, on="b", how="anti")
-    assert not joined["a"].flags["SORTED_ASC"]
+    if (joined["a"] != sorted(joined["a"])).any():
+        assert not joined["a"].flags["SORTED_ASC"]
 
 
 def test_sorted_flag_group_by_dynamic() -> None:
diff --git a/py-polars/tests/unit/operations/test_top_k.py b/py-polars/tests/unit/operations/test_top_k.py
index debb3e729274..866ef88e6e10 100644
--- a/py-polars/tests/unit/operations/test_top_k.py
+++ b/py-polars/tests/unit/operations/test_top_k.py
@@ -397,3 +397,37 @@ def test_bottom_k_nulls(s: pl.Series, should_sort: bool) -> None:
 def test_top_k_descending_deprecated() -> None:
     with pytest.deprecated_call():
         pl.col("a").top_k_by("b", descending=True)  # type: ignore[call-arg]
+
+
+def test_top_k_df() -> None:
+    df = pl.LazyFrame({"a": [3, 4, 1, 2, 5]})
+    expected = [5, 4, 3]
+    assert df.sort("a", descending=True).limit(3).collect()["a"].to_list() == expected
+    assert df.top_k(3, by="a").collect()["a"].to_list() == expected
+    expected = [1, 2, 3]
+    assert df.sort("a", descending=False).limit(3).collect()["a"].to_list() == expected
+    assert df.bottom_k(3, by="a").collect()["a"].to_list() == expected
+
+    df = pl.LazyFrame({"a": [1, None, None, 4, 5]})
+    expected2 = [5, 4, 1, None]
+    assert (
+        df.sort("a", descending=True, nulls_last=True).limit(4).collect()["a"].to_list()
+        == expected2
+    )
+    assert df.top_k(4, by="a").collect()["a"].to_list() == expected2
+    expected2 = [1, 4, 5, None]
+    assert (
+        df.sort("a", descending=False, nulls_last=True)
+        .limit(4)
+        .collect()["a"]
+        .to_list()
+        == expected2
+    )
+    assert df.bottom_k(4, by="a").collect()["a"].to_list() == expected2
+
+    assert df.sort("a", descending=False, nulls_last=False).limit(4).collect()[
+        "a"
+    ].to_list() == [None, None, 1, 4]
+    assert df.sort("a", descending=True, nulls_last=False).limit(4).collect()[
+        "a"
+    ].to_list() == [None, None, 5, 4]
diff --git a/py-polars/tests/unit/sql/test_conditional.py b/py-polars/tests/unit/sql/test_conditional.py
index b2000ebe37b1..3a80c1234aff 100644
--- a/py-polars/tests/unit/sql/test_conditional.py
+++ b/py-polars/tests/unit/sql/test_conditional.py
@@ -36,6 +36,24 @@ def test_case_when() -> None:
         }
 
 
+@pytest.mark.parametrize("else_clause", ["ELSE NULL ", ""])
+def test_case_when_optional_else(else_clause: str) -> None:
+    df = pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6, 7],
+            "b": [7, 6, 5, 4, 3, 2, 1],
+            "c": [3, 4, 0, 3, 4, 1, 1],
+        }
+    )
+    query = f"""
+        SELECT
+          AVG(CASE WHEN a <= b THEN c {else_clause}END) AS conditional_mean
+          FROM self
+    """
+    res = df.sql(query)
+    assert res.to_dict(as_series=False) == {"conditional_mean": [2.5]}
+
+
 def test_control_flow(foods_ipc_path: Path) -> None:
     nums = pl.LazyFrame(
         {
diff --git a/py-polars/tests/unit/sql/test_joins.py b/py-polars/tests/unit/sql/test_joins.py
index d25610eb6763..c423fc4c45f4 100644
--- a/py-polars/tests/unit/sql/test_joins.py
+++ b/py-polars/tests/unit/sql/test_joins.py
@@ -663,3 +663,26 @@ def test_nested_join(join_clause: str) -> None:
                 "Species": "Human",
             },
         ]
+
+
+def test_join_nulls_19624() -> None:
+    df1 = pl.DataFrame({"a": [1, 2, None, None]})
+    df2 = pl.DataFrame({"a": [1, 1, 2, 2, None], "b": [0, 1, 2, 3, 4]})
+
+    # left join
+    result_df = df1.join(df2, how="left", on="a", join_nulls=False, validate="1:m")
+    expected_df = pl.DataFrame(
+        {"a": [1, 1, 2, 2, None, None], "b": [0, 1, 2, 3, None, None]}
+    )
+    assert_frame_equal(result_df, expected_df)
+    result_df = df2.join(df1, how="left", on="a", join_nulls=False, validate="m:1")
+    expected_df = pl.DataFrame({"a": [1, 1, 2, 2, None], "b": [0, 1, 2, 3, 4]})
+    assert_frame_equal(result_df, expected_df)
+
+    # inner join
+    result_df = df1.join(df2, how="inner", on="a", join_nulls=False, validate="1:m")
+    expected_df = pl.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3]})
+    assert_frame_equal(result_df, expected_df)
+    result_df = df2.join(df1, how="inner", on="a", join_nulls=False, validate="m:1")
+    expected_df = pl.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3]})
+    assert_frame_equal(result_df, expected_df)
diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py
index 4d604f2964e9..ed4b8cd1dd61 100644
--- a/py-polars/tests/unit/test_datatypes.py
+++ b/py-polars/tests/unit/test_datatypes.py
@@ -202,3 +202,28 @@ def test_struct_field_iter() -> None:
 def test_raise_invalid_namespace() -> None:
     with pytest.raises(pl.exceptions.InvalidOperationError):
         pl.select(pl.lit(1.5).str.replace("1", "2"))
+
+
+@pytest.mark.parametrize(
+    ("dtype", "lower", "upper"),
+    [
+        (pl.Int8, -128, 127),
+        (pl.UInt8, 0, 255),
+        (pl.Int16, -32768, 32767),
+        (pl.UInt16, 0, 65535),
+        (pl.Int32, -2147483648, 2147483647),
+        (pl.UInt32, 0, 4294967295),
+        (pl.Int64, -9223372036854775808, 9223372036854775807),
+        (pl.UInt64, 0, 18446744073709551615),
+        (pl.Float32, float("-inf"), float("inf")),
+        (pl.Float64, float("-inf"), float("inf")),
+    ],
+)
+def test_max_min(
+    dtype: datatypes.IntegerType | datatypes.Float32 | datatypes.Float64,
+    upper: int | float,
+    lower: int | float,
+) -> None:
+    df = pl.select(min=dtype.min(), max=dtype.max())
+    assert df.to_series(0).item() == lower
+    assert df.to_series(1).item() == upper
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index 78a277a3662f..a8f9e43d84c0 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -246,3 +246,35 @@ def test_lf_agg_lit_explode() -> None:
     schema = {"k": pl.Int64, "o": pl.List(pl.Int64)}
     assert q.collect_schema() == schema
     assert_frame_equal(q.collect(), pl.DataFrame({"k": 1, "o": [[1]]}, schema=schema))  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize("expr_op", [
+    "approx_n_unique", "arg_max", "arg_min", "bitwise_and", "bitwise_or",
+    "bitwise_xor", "count", "entropy", "first", "has_nulls", "implode", "kurtosis",
+    "last", "len", "lower_bound", "max", "mean", "median", "min", "n_unique", "nan_max",
+    "nan_min", "null_count", "product", "sample", "skew", "std", "sum", "upper_bound",
+    "var"
+])  # fmt: skip
+def test_lf_agg_auto_agg_list_19752(expr_op: str) -> None:
+    op = getattr(pl.Expr, expr_op)
+
+    lf = pl.LazyFrame({"a": 1, "b": 1})
+
+    q = lf.group_by("a").agg(pl.col("b").reverse().pipe(op))
+    assert q.collect_schema() == q.collect().collect_schema()
+
+    q = lf.group_by("a").agg(pl.col("b").shuffle().reverse().pipe(op))
+
+    assert q.collect_schema() == q.collect().collect_schema()
+
+
+@pytest.mark.parametrize(
+    "expr", [pl.col("b"), pl.col("b").sum(), pl.col("b").reverse()]
+)
+@pytest.mark.parametrize("mapping_strategy", ["explode", "join", "group_to_rows"])
+def test_lf_window_schema(expr: pl.Expr, mapping_strategy: str) -> None:
+    q = pl.LazyFrame({"a": 1, "b": 1}).select(
+        expr.over("a", mapping_strategy=mapping_strategy)  # type: ignore[arg-type]
+    )
+
+    assert q.collect_schema() == q.collect().collect_schema()
diff --git a/py-polars/tests/unit/test_selectors.py b/py-polars/tests/unit/test_selectors.py
index dd2c415c9a13..f4e29e9194c6 100644
--- a/py-polars/tests/unit/test_selectors.py
+++ b/py-polars/tests/unit/test_selectors.py
@@ -182,11 +182,17 @@ def test_selector_by_name(df: pl.DataFrame) -> None:
 
     # check "by_name & col"
     for selector_expr, expected in (
-        (cs.by_name("abc", "cde") & pl.col("ghi"), ["abc", "cde", "ghi"]),
-        (pl.col("ghi") & cs.by_name("cde", "abc"), ["ghi", "cde", "abc"]),
+        (cs.by_name("abc", "cde") & pl.col("ghi"), []),
+        (cs.by_name("abc", "cde") & pl.col("cde"), ["cde"]),
+        (pl.col("cde") & cs.by_name("cde", "abc"), ["cde"]),
     ):
         assert df.select(selector_expr).columns == expected
 
+    # check "by_name & by_name"
+    assert df.select(
+        cs.by_name("abc", "cde", "def", "eee") & cs.by_name("cde", "eee", "fgg")
+    ).columns == ["cde", "eee"]
+
     # expected errors
     with pytest.raises(ColumnNotFoundError, match="xxx"):
         df.select(cs.by_name("xxx", "fgg", "!!!"))