From 0bea71ea955b1d8fb8a1904e812b3b0f2fcf3441 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sat, 14 Dec 2024 15:24:25 +0100
Subject: [PATCH 01/23] init

---
 R/000-wrappers.R                   |  312 ++++
 R/lazyframe-frame.R                | 2311 +++++++++++++++++++++++++++-
 R/utils-various.R                  |  113 ++
 man/dataframe__cast.Rd             |   12 +-
 man/lazyframe__bottom_k.Rd         |   39 +
 man/lazyframe__cast.Rd             |   37 +
 man/lazyframe__clear.Rd            |   29 +
 man/lazyframe__clone.Rd            |   42 +
 man/lazyframe__collect.Rd          |   11 +
 man/lazyframe__collect_schema.Rd   |   29 +
 man/lazyframe__count.Rd            |   18 +
 man/lazyframe__drop.Rd             |   34 +
 man/lazyframe__drop_nulls.Rd       |   34 +
 man/lazyframe__explain.Rd          |   78 +
 man/lazyframe__explode.Rd          |   27 +
 man/lazyframe__fill_nan.Rd         |   24 +
 man/lazyframe__filter.Rd           |   43 +
 man/lazyframe__first.Rd            |   18 +
 man/lazyframe__gather_every.Rd     |   25 +
 man/lazyframe__group_by.Rd         |   45 +
 man/lazyframe__group_by_dynamic.Rd |  178 +++
 man/lazyframe__head.Rd             |   22 +
 man/lazyframe__interpolate.Rd      |   23 +
 man/lazyframe__join.Rd             |  108 ++
 man/lazyframe__join_asof.Rd        |  166 ++
 man/lazyframe__join_where.Rd       |   52 +
 man/lazyframe__last.Rd             |   18 +
 man/lazyframe__limit.Rd            |   22 +
 man/lazyframe__max.Rd              |   18 +
 man/lazyframe__mean.Rd             |   18 +
 man/lazyframe__median.Rd           |   18 +
 man/lazyframe__merge_sorted.Rd     |   34 +
 man/lazyframe__min.Rd              |   18 +
 man/lazyframe__null_count.Rd       |   18 +
 man/lazyframe__profile.Rd          |  161 ++
 man/lazyframe__quantile.Rd         |   21 +
 man/lazyframe__rename.Rd           |   40 +
 man/lazyframe__reverse.Rd          |   18 +
 man/lazyframe__rolling.Rd          |   88 ++
 man/lazyframe__select_seq.Rd       |   30 +
 man/lazyframe__serialize.Rd        |   18 +
 man/lazyframe__set_sorted.Rd       |   22 +
 man/lazyframe__shift.Rd            |   35 +
 man/lazyframe__sink_csv.Rd         |  137 ++
 man/lazyframe__sink_ipc.Rd         |   86 ++
 man/lazyframe__sink_ndjson.Rd      |   74 +
 man/lazyframe__sink_parquet.Rd     |  122 ++
 man/lazyframe__slice.Rd            |   24 +
 man/lazyframe__sort.Rd             |   55 +
 man/lazyframe__std.Rd              |   19 +
 man/lazyframe__sum.Rd              |   18 +
 man/lazyframe__tail.Rd             |   34 +
 man/lazyframe__to_dot.Rd           |   71 +
 man/lazyframe__top_k.Rd            |   41 +
 man/lazyframe__unique.Rd           |   50 +
 man/lazyframe__unnest.Rd           |   34 +
 man/lazyframe__unpivot.Rd          |   45 +
 man/lazyframe__var.Rd              |   19 +
 man/lazyframe__with_columns_seq.Rd |   66 +
 man/lazyframe__with_context.Rd     |   39 +
 man/lazyframe__with_row_index.Rd   |   32 +
 man/pl.Rd                          |    2 +-
 man/pl__deserialize_lf.Rd          |   22 +
 src/init.c                         |  258 ++++
 src/rust/Cargo.toml                |    7 +
 src/rust/api.h                     |   43 +
 src/rust/src/conversion/mod.rs     |  227 ++-
 src/rust/src/lazyframe/general.rs  |  722 ++++++++-
 src/rust/src/lazyframe/mod.rs      |    1 +
 src/rust/src/lazyframe/serde.rs    |   12 +
 70 files changed, 6678 insertions(+), 9 deletions(-)
 create mode 100644 man/lazyframe__bottom_k.Rd
 create mode 100644 man/lazyframe__cast.Rd
 create mode 100644 man/lazyframe__clear.Rd
 create mode 100644 man/lazyframe__clone.Rd
 create mode 100644 man/lazyframe__collect_schema.Rd
 create mode 100644 man/lazyframe__count.Rd
 create mode 100644 man/lazyframe__drop.Rd
 create mode 100644 man/lazyframe__drop_nulls.Rd
 create mode 100644 man/lazyframe__explain.Rd
 create mode 100644 man/lazyframe__explode.Rd
 create mode 100644 man/lazyframe__fill_nan.Rd
 create mode 100644 man/lazyframe__filter.Rd
 create mode 100644 man/lazyframe__first.Rd
 create mode 100644 man/lazyframe__gather_every.Rd
 create mode 100644 man/lazyframe__group_by.Rd
 create mode 100644 man/lazyframe__group_by_dynamic.Rd
 create mode 100644 man/lazyframe__head.Rd
 create mode 100644 man/lazyframe__interpolate.Rd
 create mode 100644 man/lazyframe__join.Rd
 create mode 100644 man/lazyframe__join_asof.Rd
 create mode 100644 man/lazyframe__join_where.Rd
 create mode 100644 man/lazyframe__last.Rd
 create mode 100644 man/lazyframe__limit.Rd
 create mode 100644 man/lazyframe__max.Rd
 create mode 100644 man/lazyframe__mean.Rd
 create mode 100644 man/lazyframe__median.Rd
 create mode 100644 man/lazyframe__merge_sorted.Rd
 create mode 100644 man/lazyframe__min.Rd
 create mode 100644 man/lazyframe__null_count.Rd
 create mode 100644 man/lazyframe__profile.Rd
 create mode 100644 man/lazyframe__quantile.Rd
 create mode 100644 man/lazyframe__rename.Rd
 create mode 100644 man/lazyframe__reverse.Rd
 create mode 100644 man/lazyframe__rolling.Rd
 create mode 100644 man/lazyframe__select_seq.Rd
 create mode 100644 man/lazyframe__serialize.Rd
 create mode 100644 man/lazyframe__set_sorted.Rd
 create mode 100644 man/lazyframe__shift.Rd
 create mode 100644 man/lazyframe__sink_csv.Rd
 create mode 100644 man/lazyframe__sink_ipc.Rd
 create mode 100644 man/lazyframe__sink_ndjson.Rd
 create mode 100644 man/lazyframe__sink_parquet.Rd
 create mode 100644 man/lazyframe__slice.Rd
 create mode 100644 man/lazyframe__sort.Rd
 create mode 100644 man/lazyframe__std.Rd
 create mode 100644 man/lazyframe__sum.Rd
 create mode 100644 man/lazyframe__tail.Rd
 create mode 100644 man/lazyframe__to_dot.Rd
 create mode 100644 man/lazyframe__top_k.Rd
 create mode 100644 man/lazyframe__unique.Rd
 create mode 100644 man/lazyframe__unnest.Rd
 create mode 100644 man/lazyframe__unpivot.Rd
 create mode 100644 man/lazyframe__var.Rd
 create mode 100644 man/lazyframe__with_columns_seq.Rd
 create mode 100644 man/lazyframe__with_context.Rd
 create mode 100644 man/lazyframe__with_row_index.Rd
 create mode 100644 man/pl__deserialize_lf.Rd
 create mode 100644 src/rust/src/lazyframe/serde.rs

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index c68795d0..72bfead1 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -285,6 +285,11 @@ NULL
   .savvy_wrap_PlRWhen(.Call(savvy_when__impl, `condition`))
 }
 
+
+`deserialize_lf` <- function(`json`) {
+  .savvy_wrap_PlRLazyFrame(.Call(savvy_deserialize_lf__impl, `json`))
+}
+
 ### wrapper functions for PlRChainedThen
 
 `PlRChainedThen_when` <- function(self) {
@@ -3361,6 +3366,271 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   }
 }
 
+`PlRLazyFrame_to_dot` <- function(self) {
+  function(`optimized`) {
+    .savvy_wrap_String(.Call(savvy_PlRLazyFrame_to_dot__impl, `self`, `optimized`))
+  }
+}
+
+`PlRLazyFrame_sort` <- function(self) {
+  function(`by_column`, `descending`, `nulls_last`, `maintain_order`, `multithreaded`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_sort__impl, `self`, `by_column`, `descending`, `nulls_last`, `maintain_order`, `multithreaded`))
+  }
+}
+
+`PlRLazyFrame_top_k` <- function(self) {
+  function(`k`, `by`, `reverse`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_top_k__impl, `self`, `k`, `by`, `reverse`))
+  }
+}
+
+`PlRLazyFrame_bottom_k` <- function(self) {
+  function(`k`, `by`, `reverse`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_bottom_k__impl, `self`, `k`, `by`, `reverse`))
+  }
+}
+
+`PlRLazyFrame_cache` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_cache__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_profile` <- function(self) {
+  function() {
+    .Call(savvy_PlRLazyFrame_profile__impl, `self`)
+  }
+}
+
+`PlRLazyFrame_sink_parquet` <- function(self) {
+  function(`path`, `compression`, `maintain_order`, `statistics`, `retries`, `compression_level` = NULL, `row_group_size` = NULL, `data_page_size` = NULL, `storage_options` = NULL) {
+    invisible(.Call(savvy_PlRLazyFrame_sink_parquet__impl, `self`, `path`, `compression`, `maintain_order`, `statistics`, `retries`, `compression_level`, `row_group_size`, `data_page_size`, `storage_options`))
+  }
+}
+
+`PlRLazyFrame_sink_ipc` <- function(self) {
+  function(`path`, `maintain_order`, `retries`, `compression` = NULL, `storage_options` = NULL) {
+    invisible(.Call(savvy_PlRLazyFrame_sink_ipc__impl, `self`, `path`, `maintain_order`, `retries`, `compression`, `storage_options`))
+  }
+}
+
+`PlRLazyFrame_sink_csv` <- function(self) {
+  function(`path`, `include_bom`, `include_header`, `separator`, `line_terminator`, `quote_char`, `maintain_order`, `batch_size`, `retries`, `datetime_format` = NULL, `date_format` = NULL, `time_format` = NULL, `float_scientific` = NULL, `float_precision` = NULL, `null_value` = NULL, `quote_style` = NULL, `storage_options` = NULL) {
+    invisible(.Call(savvy_PlRLazyFrame_sink_csv__impl, `self`, `path`, `include_bom`, `include_header`, `separator`, `line_terminator`, `quote_char`, `maintain_order`, `batch_size`, `retries`, `datetime_format`, `date_format`, `time_format`, `float_scientific`, `float_precision`, `null_value`, `quote_style`, `storage_options`))
+  }
+}
+
+`PlRLazyFrame_sink_json` <- function(self) {
+  function(`path`, `maintain_order`, `retries`, `storage_options` = NULL) {
+    invisible(.Call(savvy_PlRLazyFrame_sink_json__impl, `self`, `path`, `maintain_order`, `retries`, `storage_options`))
+  }
+}
+
+`PlRLazyFrame_serialize` <- function(self) {
+  function() {
+    .Call(savvy_PlRLazyFrame_serialize__impl, `self`)
+  }
+}
+
+`PlRLazyFrame_select_seq` <- function(self) {
+  function(`exprs`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_select_seq__impl, `self`, `exprs`))
+  }
+}
+
+`PlRLazyFrame_rolling` <- function(self) {
+  function(`index_column`, `period`, `offset`, `closed`, `by`) {
+    `index_column` <- .savvy_extract_ptr(`index_column`, "PlRExpr")
+    .savvy_wrap_PlRLazyGroupBy(.Call(savvy_PlRLazyFrame_rolling__impl, `self`, `index_column`, `period`, `offset`, `closed`, `by`))
+  }
+}
+
+`PlRLazyFrame_group_by_dynamic` <- function(self) {
+  function(`index_column`, `every`, `period`, `offset`, `label`, `include_boundaries`, `closed`, `group_by`, `start_by`) {
+    `index_column` <- .savvy_extract_ptr(`index_column`, "PlRExpr")
+    .savvy_wrap_PlRLazyGroupBy(.Call(savvy_PlRLazyFrame_group_by_dynamic__impl, `self`, `index_column`, `every`, `period`, `offset`, `label`, `include_boundaries`, `closed`, `group_by`, `start_by`))
+  }
+}
+
+`PlRLazyFrame_with_context` <- function(self) {
+  function(`contexts`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_context__impl, `self`, `contexts`))
+  }
+}
+
+`PlRLazyFrame_join_asof` <- function(self) {
+  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
+    `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
+    `left_on` <- .savvy_extract_ptr(`left_on`, "PlRExpr")
+    `right_on` <- .savvy_extract_ptr(`right_on`, "PlRExpr")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
+  }
+}
+
+`PlRLazyFrame_join` <- function(self) {
+  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `join_nulls`, `how`, `suffix`, `validate`, `coalesce` = NULL) {
+    `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `join_nulls`, `how`, `suffix`, `validate`, `coalesce`))
+  }
+}
+
+`PlRLazyFrame_join_where` <- function(self) {
+  function(`other`, `predicates`, `suffix`) {
+    `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_where__impl, `self`, `other`, `predicates`, `suffix`))
+  }
+}
+
+`PlRLazyFrame_with_columns_seq` <- function(self) {
+  function(`exprs`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_columns_seq__impl, `self`, `exprs`))
+  }
+}
+
+`PlRLazyFrame_rename` <- function(self) {
+  function(`existing`, `new`, `strict`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_rename__impl, `self`, `existing`, `new`, `strict`))
+  }
+}
+
+`PlRLazyFrame_reverse` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_reverse__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_shift` <- function(self) {
+  function(`n`, `fill_value` = NULL) {
+    `n` <- .savvy_extract_ptr(`n`, "PlRExpr")
+    `fill_value` <- .savvy_extract_ptr(`fill_value`, "PlRExpr")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_shift__impl, `self`, `n`, `fill_value`))
+  }
+}
+
+`PlRLazyFrame_fill_nan` <- function(self) {
+  function(`fill_value`) {
+    `fill_value` <- .savvy_extract_ptr(`fill_value`, "PlRExpr")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_fill_nan__impl, `self`, `fill_value`))
+  }
+}
+
+`PlRLazyFrame_fill_null` <- function(self) {
+  function(`fill_value`) {
+    `fill_value` <- .savvy_extract_ptr(`fill_value`, "PlRExpr")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_fill_null__impl, `self`, `fill_value`))
+  }
+}
+
+`PlRLazyFrame_min` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_min__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_max` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_max__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_sum` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_sum__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_mean` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_mean__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_std` <- function(self) {
+  function(`ddof`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_std__impl, `self`, `ddof`))
+  }
+}
+
+`PlRLazyFrame_var` <- function(self) {
+  function(`ddof`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_var__impl, `self`, `ddof`))
+  }
+}
+
+`PlRLazyFrame_median` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_median__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_quantile` <- function(self) {
+  function(`quantile`, `interpolation`) {
+    `quantile` <- .savvy_extract_ptr(`quantile`, "PlRExpr")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_quantile__impl, `self`, `quantile`, `interpolation`))
+  }
+}
+
+`PlRLazyFrame_explode` <- function(self) {
+  function(`column`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_explode__impl, `self`, `column`))
+  }
+}
+
+`PlRLazyFrame_null_count` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_null_count__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_unique` <- function(self) {
+  function(`maintain_order`, `keep`, `subset` = NULL) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_unique__impl, `self`, `maintain_order`, `keep`, `subset`))
+  }
+}
+
+`PlRLazyFrame_drop_nulls` <- function(self) {
+  function(`subset` = NULL) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_drop_nulls__impl, `self`, `subset`))
+  }
+}
+
+`PlRLazyFrame_unpivot` <- function(self) {
+  function(`on`, `index`, `value_name` = NULL, `variable_name` = NULL) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_unpivot__impl, `self`, `on`, `index`, `value_name`, `variable_name`))
+  }
+}
+
+`PlRLazyFrame_with_row_index` <- function(self) {
+  function(`name`, `offset` = NULL) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_row_index__impl, `self`, `name`, `offset`))
+  }
+}
+
+`PlRLazyFrame_clone` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_clone__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_unnest` <- function(self) {
+  function(`columns`) {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_unnest__impl, `self`, `columns`))
+  }
+}
+
+`PlRLazyFrame_count` <- function(self) {
+  function() {
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_count__impl, `self`))
+  }
+}
+
+`PlRLazyFrame_merge_sorted` <- function(self) {
+  function(`other`, `key`) {
+    `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_merge_sorted__impl, `self`, `other`, `key`))
+  }
+}
+
 `.savvy_wrap_PlRLazyFrame` <- function(ptr) {
   e <- new.env(parent = emptyenv())
   e$.ptr <- ptr
@@ -3381,6 +3651,48 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   e$`collect_schema` <- `PlRLazyFrame_collect_schema`(ptr)
   e$`sort_by_exprs` <- `PlRLazyFrame_sort_by_exprs`(ptr)
   e$`with_columns` <- `PlRLazyFrame_with_columns`(ptr)
+  e$`to_dot` <- `PlRLazyFrame_to_dot`(ptr)
+  e$`sort` <- `PlRLazyFrame_sort`(ptr)
+  e$`top_k` <- `PlRLazyFrame_top_k`(ptr)
+  e$`bottom_k` <- `PlRLazyFrame_bottom_k`(ptr)
+  e$`cache` <- `PlRLazyFrame_cache`(ptr)
+  e$`profile` <- `PlRLazyFrame_profile`(ptr)
+  e$`sink_parquet` <- `PlRLazyFrame_sink_parquet`(ptr)
+  e$`sink_ipc` <- `PlRLazyFrame_sink_ipc`(ptr)
+  e$`sink_csv` <- `PlRLazyFrame_sink_csv`(ptr)
+  e$`sink_json` <- `PlRLazyFrame_sink_json`(ptr)
+  e$`serialize` <- `PlRLazyFrame_serialize`(ptr)
+  e$`select_seq` <- `PlRLazyFrame_select_seq`(ptr)
+  e$`rolling` <- `PlRLazyFrame_rolling`(ptr)
+  e$`group_by_dynamic` <- `PlRLazyFrame_group_by_dynamic`(ptr)
+  e$`with_context` <- `PlRLazyFrame_with_context`(ptr)
+  e$`join_asof` <- `PlRLazyFrame_join_asof`(ptr)
+  e$`join` <- `PlRLazyFrame_join`(ptr)
+  e$`join_where` <- `PlRLazyFrame_join_where`(ptr)
+  e$`with_columns_seq` <- `PlRLazyFrame_with_columns_seq`(ptr)
+  e$`rename` <- `PlRLazyFrame_rename`(ptr)
+  e$`reverse` <- `PlRLazyFrame_reverse`(ptr)
+  e$`shift` <- `PlRLazyFrame_shift`(ptr)
+  e$`fill_nan` <- `PlRLazyFrame_fill_nan`(ptr)
+  e$`fill_null` <- `PlRLazyFrame_fill_null`(ptr)
+  e$`min` <- `PlRLazyFrame_min`(ptr)
+  e$`max` <- `PlRLazyFrame_max`(ptr)
+  e$`sum` <- `PlRLazyFrame_sum`(ptr)
+  e$`mean` <- `PlRLazyFrame_mean`(ptr)
+  e$`std` <- `PlRLazyFrame_std`(ptr)
+  e$`var` <- `PlRLazyFrame_var`(ptr)
+  e$`median` <- `PlRLazyFrame_median`(ptr)
+  e$`quantile` <- `PlRLazyFrame_quantile`(ptr)
+  e$`explode` <- `PlRLazyFrame_explode`(ptr)
+  e$`null_count` <- `PlRLazyFrame_null_count`(ptr)
+  e$`unique` <- `PlRLazyFrame_unique`(ptr)
+  e$`drop_nulls` <- `PlRLazyFrame_drop_nulls`(ptr)
+  e$`unpivot` <- `PlRLazyFrame_unpivot`(ptr)
+  e$`with_row_index` <- `PlRLazyFrame_with_row_index`(ptr)
+  e$`clone` <- `PlRLazyFrame_clone`(ptr)
+  e$`unnest` <- `PlRLazyFrame_unnest`(ptr)
+  e$`count` <- `PlRLazyFrame_count`(ptr)
+  e$`merge_sorted` <- `PlRLazyFrame_merge_sorted`(ptr)
 
   class(e) <- c("PlRLazyFrame", "savvy_neopolars__sealed")
   e
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 45e14a6e..0465d4a0 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -102,6 +102,61 @@ lazyframe__select <- function(...) {
   })
 }
 
+#' Select columns from this LazyFrame
+#'
+#' This will run all expression sequentially instead of in parallel. Use this
+#' when the work per expression is cheap.
+#'
+#' @inherit as_polars_lf return
+#' @inheritParams lazyframe__select
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = 6:8,
+#'   ham = letters[1:3]
+#' )
+#' lf$select_seq("foo")$collect()
+lazyframe__select_seq <- function(...) {
+  wrap({
+    structify <- parse_env_auto_structify()
+    parse_into_list_of_expressions(..., `__structify` = structify) |>
+      self$`_ldf`$select_seq()
+  })
+}
+
+#' Start a group by operation
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Column(s) to group by.
+#' Accepts expression input. Strings are parsed as column names.
+#' @param .maintain_order Ensure that the order of the groups is consistent with
+#' the input data. This is slower than a default group by. Setting this to
+#' `TRUE` blocks the possibility to run on the streaming engine.
+#'
+# TODO: need a proper definition to link to
+#' @return A lazy groupby
+#' @examples
+#' # Group by one column and call agg() to compute the grouped sum of another
+#' # column.
+#' lf <- pl$LazyFrame(
+#'   a = c("a", "b", "a", "b", "c"),
+#'   b = c(1, 2, 1, 3, 3),
+#'   c = c(5, 4, 3, 2, 1)
+#' )
+#' lf$group_by("a")$agg(pl$col("b")$sum())$collect()
+#'
+#' # Set .maintain_order = TRUE to ensure the order of the groups is consistent
+#' # with the input.
+#' lf$group_by("a", .maintain_order = TRUE)$agg(pl$col("b")$sum())$collect()
+#'
+#' # Group by multiple columns by passing a vector of column names.
+#' lf$group_by(c("a", "b"))$agg(pl$col("c")$max())$collect()
+#'
+#' # Or use positional arguments to group by multiple columns in the same way.
+#' # Expressions are also accepted.
+#' lf$
+#'   group_by("a", pl$col("b") / 2)$
+#'   agg(pl$col("c")$mean())$collect()
 lazyframe__group_by <- function(..., .maintain_order = FALSE) {
   wrap({
     exprs <- parse_into_list_of_expressions(...)
@@ -131,6 +186,18 @@ lazyframe__group_by <- function(..., .maintain_order = FALSE) {
 #' It may be changed at any point without it being considered a breaking change.
 #' @param _eager A logical, indicates to turn off multi-node optimizations and the other optimizations.
 #' This option is intended for internal use only.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @seealso
+#'  - [`$profile()`][lazyframe__profile] - same as `$collect()` but also returns
+#'    a table with each operation profiled.
+#'  - [`$collect_in_background()`][lazyframe__collect_in_background] - non-blocking
+#'    collect returns a future handle. Can also just be used via
+#'    `$collect(collect_in_background = TRUE)`.
+#'  - [`$sink_parquet()`][lazyframe__sink_parquet()] streams query to a parquet file.
+#'  - [`$sink_ipc()`][lazyframe__sink_ipc()] streams query to a arrow file.
+#'
 #' @examples
 #' lf <- pl$LazyFrame(
 #'   a = c("a", "b", "a", "b", "b", "c"),
@@ -185,6 +252,144 @@ lazyframe__collect <- function(
   })
 }
 
+#' Collect and profile a lazy query.
+#'
+#' This will run the query and return a list containing the materialized
+#' DataFrame and a DataFrame that contains profiling information of each node
+#' that is executed.
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams lazyframe__collect
+#' @param show_plot Show a Gantt chart of the profiling result
+#' @param truncate_nodes Truncate the label lengths in the Gantt chart to this
+#' number of characters. If `0` (default), do not truncate.
+#'
+#' @details
+#' The units of the timings are microseconds.
+#'
+#' @return List of two `DataFrame`s: one with the collected result, the other
+#' with the timings of each step. If `show_graph = TRUE`, then the plot is
+#' also stored in the list.
+#' @seealso
+#'  - [`$collect()`][LazyFrame_collect] - regular collect.
+#'  - [`$collect_in_background()`][LazyFrame_collect_in_background] - non-blocking
+#'    collect returns a future handle. Can also just be used via
+#'    `$collect(collect_in_background = TRUE)`.
+#'  - [`$sink_parquet()`][LazyFrame_sink_parquet()] streams query to a parquet file.
+#'  - [`$sink_ipc()`][LazyFrame_sink_ipc()] streams query to a arrow file.
+#'
+#' @examples
+#' ## Simplest use case
+#' pl$LazyFrame()$select(pl$lit(2) + 2)$profile()
+#'
+#' ## Use $profile() to compare two queries
+#'
+#' # -1-  map each Species-group with native polars
+#' as_polars_lf(iris)$
+#'   sort("Sepal.Length")$
+#'   group_by("Species", maintain_order = TRUE)$
+#'   agg(pl$col(pl$Float64)$first() + 5)$
+#'   profile()
+#'
+#' # -2-  map each Species-group of each numeric column with an R function
+#'
+#' # some R function, prints `.` for each time called by polars
+#' r_func <- \(s) {
+#'   cat(".")
+#'   s$to_r()[1] + 5
+#' }
+#'
+#' as_polars_lf(iris)$
+#'   sort("Sepal.Length")$
+#'   group_by("Species", maintain_order = TRUE)$
+#'   agg(pl$col(pl$Float64)$map_elements(r_func))$
+#'   profile()
+lazyframe__profile <- function(
+    ...,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    comm_subplan_elim = TRUE,
+    comm_subexpr_elim = TRUE,
+    cluster_with_columns = TRUE,
+    streaming = FALSE,
+    no_optimization = FALSE,
+    collect_in_background = FALSE,
+    show_plot = FALSE,
+    truncate_nodes = 0) {
+  wrap({
+    check_dots_empty0(...)
+
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+      comm_subplan_elim <- FALSE
+      comm_subexpr_elim <- FALSE
+      cluster_with_columns <- FALSE
+    }
+
+    if (isTRUE(streaming)) {
+      comm_subplan_elim <- FALSE
+    }
+
+    lf <- self$`_rexpr`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = comm_subplan_elim,
+      comm_subexpr_elim = comm_subexpr_elim,
+      cluster_with_columns = cluster_with_columns,
+      streaming = streaming,
+      eager = FALSE
+    )
+
+    out <- self$`_ldf`$profile()
+
+    if (isTRUE(show_plot)) {
+      out[["plot"]] <- make_profile_plot(out, truncate_nodes) |>
+        wrap()
+    }
+    out
+  })
+}
+
+#' Create a string representation of the query plan
+#'
+#' The query plan is read from bottom to top. When `optimized = FALSE`, the
+#' query as it was written by the user is shown. This is not what Polars runs.
+#' Instead, it applies optimizations that are displayed by default by `$explain()`.
+#' One classic example is the predicate pushdown, which applies the filter as
+#' early as possible (i.e. at the bottom of the plan).
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams lazyframe__collect
+#' @param format The format to use for displaying the logical plan. Must be
+#' either `"plain"` (default) or `"tree"`.
+#' @param optimized Return an optimized query plan. If `TRUE` (default), the
+#' subsequent optimization flags control which optimizations run.
+#'
+#' @return A character value containing the query plan.
+#' @examples
+#' lazy_frame <- as_polars_lf(iris)
+#'
+#' # Prepare your query
+#' lazy_query <- lazy_frame$sort("Species")$filter(pl$col("Species") != "setosa")
+#'
+#' # This is the query that was written by the user, without any optimizations
+#' # (use cat() for better printing)
+#' lazy_query$explain(optimized = FALSE) |> cat()
+#'
+#' # This is the query after `polars` optimizes it: instead of sorting first and
+#' # then filtering, it is faster to filter first and then sort the rest.
+#' lazy_query$explain() |> cat()
+#'
+#' # Also possible to see this as tree format
+#' lazy_query$explain(format = "tree") |> cat()
 lazyframe__explain <- function(
     ...,
     format = c("plain", "tree"),
@@ -232,6 +437,26 @@ lazyframe__explain <- function(
   })
 }
 
+#' Resolve the schema of this LazyFrame
+#'
+#' This resolves the query plan but does not trigger computations.
+#'
+#' @return A named list with names indicating column names and values indicating
+#' column data types.
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = 6:8,
+#'   ham = c("a", "b", "c")
+#' )
+#'
+#' lf$collect_schema()
+#'
+#' lf$with_columns(
+#'   baz = (pl$col("foo") + pl$col("bar"))$cast(pl$String),
+#'   pl$col("bar")$cast(pl$Int64)
+#' )$collect_schema()
 lazyframe__collect_schema <- function() {
   self$`_ldf`$collect_schema() |>
     lapply(function(x) {
@@ -241,6 +466,32 @@ lazyframe__collect_schema <- function() {
     wrap()
 }
 
+#' Cast LazyFrame column(s) to the specified dtype(s)
+#'
+#' This allows to convert all columns to a datatype or to convert only specific
+#' columns. Contrarily to the Python implementation, it is not possible to
+#' convert all columns of a specific datatype to another datatype.
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Either a datatype to which
+#' all columns will be cast, or a list where the names are column names and the
+#' values are the datatypes to convert to.
+#' @param strict If `TRUE` (default), throw an error if a cast could not be done
+#' (for instance, due to an overflow). Otherwise, return `null`.
+#'
+#' @return A LazyFrame
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = c(6, 7, 8),
+#'   ham = as.Date(c("2020-01-02", "2020-03-04", "2020-05-06"))
+#' )
+#'
+#' # Cast only some columns
+#' lf$cast(foo = pl$Float32, bar = pl$UInt8)$collect()
+#'
+#' # Cast all columns to the same type
+#' lf$cast(pl$String)$collect()
 lazyframe__cast <- function(..., .strict = TRUE) {
   wrap({
     check_bool(.strict)
@@ -254,12 +505,76 @@ lazyframe__cast <- function(..., .strict = TRUE) {
   })
 }
 
+#' Filter the rows in the LazyFrame based on a predicate expression
+#'
+#' The original order of the remaining rows is preserved. Rows where the filter
+#' does not evaluate to `TRUE` are discarded, including nulls.
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Expression that evaluates to
+#' a boolean Series.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = c(1, 2, 3, NA, 4, NA, 0),
+#'   bar = c(6, 7, 8, NA, NA, 9, 0),
+#'   ham = c("a", "b", "c", NA, "d", "e", "f")
+#' )
+#'
+#' # Filter on one condition
+#' lf$filter(pl$col("foo") > 1)$collect()
+#'
+#' # Filter on multiple conditions
+#' lf$filter((pl$col("foo") < 3) & (pl$col("ham") == "a"))$collect()
+#'
+#' # Filter on an OR condition
+#' lf$filter((pl$col("foo") == 1) | (pl$col("ham") == " c"))$collect()
+#'
+#' # Filter by comparing two columns against each other
+#' lf$filter(pl$col("foo") == pl$col("bar"))$collect()
+#' lf$filter(pl$col("foo") != pl$col("bar"))$collect()
+#'
+#' # Notice how the row with null values is filtered out$ In order to keep the
+#' # rows with nulls, use:
+#' lf$filter(pl$col("foo")$ne_missing(pl$col("bar")))$collect()
 lazyframe__filter <- function(...) {
   parse_predicates_constraints_into_expression(...) |>
     self$`_ldf`$filter() |>
     wrap()
 }
 
+#' Sort the LazyFrame by the given columns
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Column(s) to sort by. Can be
+#' character values indicating column names or Expr(s).
+#' @param descending Sort in descending order. When sorting by multiple
+#' columns, this can be specified per column by passing a logical vector.
+#' @param nulls_last Place null values last. When sorting by multiple
+#' columns, this can be specified per column by passing a logical vector.
+#' @param maintain_order Whether the order should be maintained if elements are
+#' equal. If `TRUE`, streaming is not possible and performance might be worse
+#' since this requires a stable search.
+#' @param multithreaded Sort using multiple threads.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c(1, 2, NA, 4),
+#'   b = c(6, 5, 4, 3),
+#'   c = c("a", "c", "b", "a")
+#' )
+#'
+#' # Pass a single column name to sort by that column.
+#' lf$sort("a")$collect()
+#'
+#' # Sorting by expressions is also supported
+#' lf$sort(pl$col("a") + pl$col("b") * 2, nulls_last = TRUE)$collect()
+#'
+#' # Sort by multiple columns by passing a vector of columns
+#' lf$sort(c("c", "a"), descending = TRUE)$collect()
+#'
+#' # Or use positional arguments to sort by multiple columns in the same way
+#' lf$sort("c", "a", descending = c(FALSE, TRUE))$collect()
 lazyframe__sort <- function(
     ...,
     descending = FALSE,
@@ -269,6 +584,9 @@ lazyframe__sort <- function(
   wrap({
     check_dots_unnamed()
 
+    if (missing(...)) {
+      abort("`...` must contain at least one element.")
+    }
     by <- parse_into_list_of_expressions(...)
     descending <- extend_bool(descending, length(by), "descending", "...")
     nulls_last <- extend_bool(nulls_last, length(by), "nulls_last", "...")
@@ -332,14 +650,93 @@ lazyframe__sort <- function(
 #'   })
 #' }
 lazyframe__with_columns <- function(...) {
+  structify <- parse_env_auto_structify()
+
+  parse_into_list_of_expressions(..., `__structify` = structify) |>
+    self$`_ldf`$with_columns() |>
+    wrap()
+}
+
+#' Modify/append column(s) of a LazyFrame
+#'
+#' @description
+#' This will run all expression sequentially instead of in parallel. Use this
+#' only when the work per expression is cheap.
+#'
+#' Add columns or modify existing ones with expressions. This is similar to
+#' `dplyr::mutate()` as it keeps unmentioned columns (unlike `$select()`).
+#'
+#' However, unlike `dplyr::mutate()`, one cannot use new variables in subsequent
+#' expressions in the same `$with_columns_seq()`call. For instance, if you create a
+#' variable `x`, you will only be able to use it in another `$with_columns_seq()`
+#' or `$select()` call.
+#'
+#' @inherit as_polars_lf return
+#' @inheritParams lazyframe__select
+#' @examples
+#' # Pass an expression to add it as a new column.
+#' lf <- pl$LazyFrame(
+#'   a = 1:4,
+#'   b = c(0.5, 4, 10, 13),
+#'   c = c(TRUE, TRUE, FALSE, TRUE),
+#' )
+#' lf$with_columns_seq((pl$col("a")^2)$alias("a^2"))$collect()
+#'
+#' # Added columns will replace existing columns with the same name.
+#' lf$with_columns_seq(a = pl$col("a")$cast(pl$Float64))$collect()
+#'
+#' # Multiple columns can be added
+#' lf$with_columns_seq(
+#'   (pl$col("a")^2)$alias("a^2"),
+#'   (pl$col("b") / 2)$alias("b/2"),
+#'   (pl$col("c")$not())$alias("not c"),
+#' )$collect()
+#'
+#' # Name expression instead of `$alias()`
+#' lf$with_columns_seq(
+#'   `a^2` = pl$col("a")^2,
+#'   `b/2` = pl$col("b") / 2,
+#'   `not c` = pl$col("c")$not(),
+#' )$collect()
+#'
+#' # Expressions with multiple outputs can automatically be instantiated
+#' # as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
+#' if (requireNamespace("withr", quietly = TRUE)) {
+#'   withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
+#'     lf$drop("c")$with_columns_seq(
+#'       diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
+#'     )$collect()
+#'   })
+#' }
+lazyframe__with_columns_seq <- function(...) {
   wrap({
     structify <- parse_env_auto_structify()
 
     parse_into_list_of_expressions(..., `__structify` = structify) |>
-      self$`_ldf`$with_columns()
+      self$`_ldf`$with_columns_seq()
   })
 }
 
+#' Remove columns from the DataFrame
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Names of the columns that
+#' should be removed from the dataframe. Accepts column selector input.
+#' @param strict Validate that all column names exist in the current schema,
+#' and throw an exception if any do not.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' # Drop columns by passing the name of those columns
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = c(6, 7, 8),
+#'   ham = c("a", "b", "c")
+#' )
+#' lf$drop("ham")$collect()
+#' lf$drop("ham", "bar")$collect()
+#'
+#' # Drop multiple columns by passing a selector
+#' lf$drop(cs$all())$collect()
 lazyframe__drop <- function(..., strict = TRUE) {
   wrap({
     check_dots_unnamed()
@@ -358,12 +755,1924 @@ lazyframe__slice <- function(offset, length = NULL) {
   })
 }
 
+#' Get the first `n` rows
+#'
+#' @param n Number of rows to return.
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+#' lf$head()$collect()
+#' lf$head(2)$collect()
 lazyframe__head <- function(n = 5) {
   self$slice(0, n) |>
     wrap()
 }
 
+#' Get the first `n` rows
+#'
+#' Alias for [`<LazyFrame>$head()`][lazyframe__head].
+#'
+#' @inheritParams lazyframe__head
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+#' lf$limit()$collect()
+#' lf$limit(2)$collect()
+lazyframe__limit <- function(n = 5) {
+  wrap({
+    self$head(n)
+  })
+}
+
+#' Get the last `n` rows
+#'
+#' @inheritParams lazyframe__head
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+#' lf$tail()$collect()
+#' lf$tail(2)$collect()
 lazyframe__tail <- function(n = 5) {
   self$`_ldf`$tail(n) |>
     wrap()
 }
+
+
+#' Get the first row of the LazyFrame
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$first()$collect()
+lazyframe__first <- function() {
+  wrap({
+    self$slice(0, 1)
+  })
+}
+
+#' Get the last row of the LazyFrame
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$last()$collect()
+lazyframe__last <- function() {
+  wrap({
+    self$tail(1)
+  })
+}
+
+#' Aggregate the columns in the LazyFrame to their maximum value
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$max()$collect()
+lazyframe__max <- function() {
+  wrap({
+    self$`_ldf`$max()
+  })
+}
+
+#' Aggregate the columns in the LazyFrame to their mean value
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$mean()$collect()
+lazyframe__mean <- function() {
+  wrap({
+    self$`_ldf`$mean()
+  })
+}
+
+#' Aggregate the columns in the LazyFrame to their median value
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$median()$collect()
+lazyframe__median <- function() {
+  wrap({
+    self$`_ldf`$median()
+  })
+}
+
+#' Aggregate the columns in the LazyFrame to their minimum value
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$min()$collect()
+lazyframe__min <- function() {
+  wrap({
+    self$`_ldf`$min()
+  })
+}
+
+#' Aggregate the columns of this LazyFrame to their sum values
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$sum()$collect()
+lazyframe__sum <- function() {
+  wrap({
+    self$`_ldf`$sum()
+  })
+}
+
+#' Aggregate the columns in the LazyFrame to their variance value
+#'
+#' @inheritParams DataFrame_var
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$var()$collect()
+#' lf$var(ddof = 0)$collect()
+lazyframe__var <- function(ddof = 1) {
+  wrap({
+    self$`_ldf`$var(ddof)
+  })
+}
+
+#' Aggregate the columns of this LazyFrame to their standard deviation values
+#'
+#' @inheritParams DataFrame_std
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$std()$collect()
+#' lf$std(ddof = 0)$collect()
+lazyframe__std <- function(ddof = 1) {
+  wrap({
+    self$`_ldf`$std(ddof)
+  })
+}
+
+#' Aggregate the columns in the DataFrame to a unique quantile value
+#'
+#' @inheritParams DataFrame_quantile
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+#' lf$quantile(0.7)$collect()
+lazyframe__quantile <- function(
+    quantile,
+    interpolation = c("nearest", "higher", "lower", "midpoint", "linear")) {
+  wrap({
+    interpolation <- arg_match0(
+      interpolation,
+      values = c("nearest", "higher", "lower", "midpoint", "linear")
+    )
+    self$`_ldf`$quantile(as_polars_expr(quantile, as_lit = TRUE)$`_rexpr`, interpolation)
+  })
+}
+
+#' @inherit expr__fill_nan title params
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c(1.5, 2, NaN, 4),
+#'   b = c(1.5, NaN, NaN, 4)
+#' )
+#' lf$fill_nan(99)$collect()
+lazyframe__fill_nan <- function(value) {
+  wrap({
+    self$`_ldf`$fill_nan(as_polars_expr(value)$`_rexpr`)
+  })
+}
+
+#' @inherit DataFrame_fill_null title description params
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c(1.5, 2, NA, 4),
+#'   b = c(1.5, NA, NA, 4)
+#' )
+#' lf$fill_null(99)$collect()
+lazyframe__fill_null <- function(fill_value) {
+  wrap({
+    self$`_ldf`$fill_null(as_polars_expr(fill_value)$`_rexpr`)
+  })
+}
+
+#' Shift values by the given number of indices
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param n Number of indices to shift forward. If a negative value is passed,
+#' values are shifted in the opposite direction instead.
+#' @param fill_value Fill the resulting null values with this value. Accepts
+#' expression input. Non-expression inputs are parsed as literals.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = 5:8)
+#'
+#' # By default, values are shifted forward by one index.
+#' lf$shift()$collect()
+#'
+#' # Pass a negative value to shift in the opposite direction instead.
+#' lf$shift(-2)$collect()
+#'
+#' # Specify fill_value to fill the resulting null values.
+#' lf$shift(-2, fill_value = 100)$collect()
+lazyframe__shift <- function(n = 1, ..., fill_value = NULL) {
+  wrap({
+    check_dots_empty0(...)
+    self$`_ldf`$shift(as_polars_expr(n)$`_rexpr`, as_polars_expr(fill_value)$`_rexpr`)
+  })
+}
+
+#' Reverse the LazyFrame
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(key = c("a", "b", "c"), val = 1:3)
+#' lf$reverse()$collect()
+lazyframe__reverse <- function() {
+  wrap({
+    self$`_ldf`$reverse()
+  })
+}
+
+#' Get a slice of the LazyFrame.
+#'
+#' @param offset Start index. Negative indexing is supported.
+#' @param length Length of the slice. If `NULL` (default), all rows starting at
+#' the offset will be selected.
+#'
+#' @return A [LazyFrame][lazyframe__class]
+#' @examples
+#' lf <- pl$LazyFrame(x = c("a", "b", "c"), y = 1:3, z = 4:6)
+#' lf$slice(1, 2)$collect()
+lazyframe__slice <- function(offset, length = NULL) {
+  wrap({
+    self$`_ldf`$slice(offset, length)
+  })
+}
+
+#' Get the last `n` rows.
+#'
+#' @inherit lazyframe__head return params
+#' @inheritParams lazyframe__head
+#' @seealso [`<LazyFrame>$head()`][lazyframe__head]
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+#'
+#' lf$tail()$collect()
+#'
+#' lf$tail(2)$collect()
+lazyframe__tail <- function(n = 5L) {
+  wrap({
+    self$`_ldf`$tail(n)
+  })
+}
+
+#' Drop all rows that contain null values
+#'
+#' The original order of the remaining rows is preserved.
+#'
+#' @param subset Column name(s) for which null values are considered. If `NULL`
+#' (default), use all columns.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = c(6, NA, 8),
+#'   ham = c("a", "b", NA)
+#' )
+#'
+#' # The default behavior of this method is to drop rows where any single value
+#' # of the row is null.
+#' lf$drop_nulls()$collect()
+#'
+#' # This behaviour can be constrained to consider only a subset of columns, as
+#' # defined by name or with a selector. For example, dropping rows if there is
+#' # a null in any of the integer columns:
+#' lf$drop_nulls(subset = cs$integer())$collect()
+lazyframe__drop_nulls <- function(subset = NULL) {
+  wrap({
+    if (!is.null(subset)) {
+      subset <- parse_into_list_of_expressions(!!!subset)
+    }
+    self$`_ldf`$drop_nulls(subset)
+  })
+}
+
+#' Drop duplicate rows from this DataFrame
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param subset Column name(s) or selector(s), to consider when identifying
+#' duplicate rows. If `NULL` (default), use all columns.
+#' @param keep Which of the duplicate rows to keep. Must be one of:
+#' * `"any"`: does not give any guarantee of which row is kept. This allows
+#'   more optimizations.
+#' * `"none"`: don’t keep duplicate rows.
+#' * `"first"`: keep first unique row.
+#' * `"last"`: keep last unique row.
+#' @param maintain_order Keep the same order as the original LazyFrame. This is
+#' more expensive to compute. Setting this to `TRUE` blocks the possibility to
+#' run on the streaming engine.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = c(1, 2, 3, 1),
+#'   bar = c("a", "a", "a", "a"),
+#'   ham = c("b", "b", "b", "b"),
+#' )
+#' lf$unique(maintain_order = TRUE)$collect()
+#'
+#' lf$unique(subset = c("bar", "ham"), maintain_order = TRUE)$collect()
+#'
+#' lf$unique(keep = "last", maintain_order = TRUE)$collect()
+lazyframe__unique <- function(
+    subset = NULL,
+    ...,
+    keep = c("any", "none", "first", "last"),
+    maintain_order = FALSE) {
+  wrap({
+    check_dots_empty0(...)
+    keep <- arg_match0(keep, values = c("any", "none", "first", "last"))
+    if (!is.null(subset)) {
+      subset <- parse_into_list_of_expressions(!!!subset)
+    }
+    self$`_ldf`$unique(subset = subset, keep = keep, maintain_order = maintain_order)
+  })
+}
+
+#' Join LazyFrames
+#'
+#' This function can do both mutating joins (adding columns based on matching
+#' observations, for example with `how = "left"`) and filtering joins (keeping
+#' observations based on matching observations, for example with `how =
+#' "inner"`).
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param other LazyFrame to join with.
+#' @param on Either a vector of column names or a list of expressions and/or
+#'   strings. Use `left_on` and `right_on` if the column names to match on are
+#'   different between the two DataFrames.
+#' @param how One of the following methods:
+#' * "inner": returns rows that have matching values in both tables
+#' * "left": returns all rows from the left table, and the matched rows from
+#'   the right table
+#' * "right": returns all rows from the right table, and the matched rows from
+#'   the left table
+#' * "full": returns all rows when there is a match in either left or right
+#'   table
+#' * "cross": returns the Cartesian product of rows from both tables
+#' * "semi": returns rows from the left table that have a match in the right
+#'   table.
+#' * "anti": returns rows from the left table that have no match in the right
+#'   table.
+#' @param left_on,right_on Same as `on` but only for the left or the right
+#'   DataFrame. They must have the same length.
+#' @param suffix Suffix to add to duplicated column names.
+#' @param validate Checks if join is of specified type:
+#' * `"m:m"` (default): many-to-many, doesn't perform any checks;
+#' * `"1:1"`: one-to-one, check if join keys are unique in both left and right
+#'   datasets;
+#' * `"1:m"`: one-to-many, check if join keys are unique in left dataset
+#' * `"m:1"`: many-to-one, check if join keys are unique in right dataset
+#'
+#' Note that this is currently not supported by the streaming engine.
+#'
+#' @param join_nulls Join on null values. By default null values will never
+#'   produce matches.
+#' @param allow_parallel Allow the physical plan to optionally evaluate the
+#'   computation of both DataFrames up to the join in parallel.
+#' @param force_parallel Force the physical plan to evaluate the computation of
+#'   both DataFrames up to the join in parallel.
+#' @param coalesce Coalescing behavior (merging of join columns).
+#' - `NULL`: join specific.
+#' - `TRUE`: Always coalesce join columns.
+#' - `FALSE`: Never coalesce join columns.
+#' Note that joining on any other expressions than `col` will turn off
+#' coalescing.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = c(6, 7, 8),
+#'   ham = c("a", "b", "c")
+#' )
+#' other_lf <- pl$LazyFrame(
+#'   apple = c("x", "y", "z"),
+#'   ham = c("a", "b", "d")
+#' )
+#' lf$join(other_lf, on = "ham")$collect()
+#'
+#' lf$join(other_lf, on = "ham", how = "full")$collect()
+#'
+#' lf$join(other_lf, on = "ham", how = "left", coalesce = TRUE)$collect()
+#'
+#' lf$join(other_lf, on = "ham", how = "semi")$collect()
+#'
+#' lf$join(other_lf, on = "ham", how = "anti")$collect()
+lazyframe__join <- function(
+    other,
+    on = NULL,
+    how = "inner",
+    ...,
+    left_on = NULL,
+    right_on = NULL,
+    suffix = "_right",
+    validate = "m:m",
+    join_nulls = FALSE,
+    allow_parallel = TRUE,
+    force_parallel = FALSE,
+    coalesce = NULL) {
+  wrap({
+    check_dots_empty0(...)
+    check_polars_lf(other)
+    how <- arg_match0(
+      how,
+      values = c("inner", "full", "left", "right", "semi", "anti", "cross")
+    )
+    validate <- arg_match0(validate, values = c("m:m", "1:m", "m:1", "1:1"))
+    uses_on <- !is.null(on)
+    uses_left_on <- !is.null(left_on)
+    uses_right_on <- !is.null(right_on)
+    uses_lr_on <- uses_left_on | uses_right_on
+    if (uses_on && uses_lr_on) {
+      abort("cannot use 'on' in conjunction with 'left_on' or 'right_on'.")
+    }
+    if (uses_left_on && !uses_right_on) {
+      abort("'left_on' requires corresponding 'right_on'")
+    }
+    if (!uses_left_on && uses_right_on) {
+      abort("'right_on' requires corresponding 'left_on'")
+    }
+    if (how == "cross") {
+      if (uses_on | uses_lr_on) {
+        abort("cross join should not pass join keys.")
+      }
+      return(
+        self$`_ldf`$join(
+          other$`_ldf`, list(), list(),
+          how = how, validate = validate,
+          join_nulls = join_nulls, suffix = suffix,
+          allow_parallel = allow_parallel, force_parallel = force_parallel,
+          coalesce = coalesce
+        )
+      )
+    }
+
+    if (uses_on) {
+      rexprs_right <- rexprs_left <- parse_into_list_of_expressions(!!!on)
+    } else if (uses_lr_on) {
+      rexprs_left <- parse_into_list_of_expressions(!!!left_on)
+      rexprs_right <- parse_into_list_of_expressions(!!!right_on)
+    } else {
+      abort("must specify either `on`, or `left_on` and `right_on`.")
+    }
+    self$`_ldf`$join(
+      other$`_ldf`, rexprs_left, rexprs_right,
+      how = how, validate = validate,
+      join_nulls = join_nulls, suffix = suffix,
+      allow_parallel = allow_parallel, force_parallel = force_parallel,
+      coalesce = coalesce
+    )
+  })
+}
+
+#' Perform a join based on one or multiple (in)equality predicates
+#'
+#' @description
+#' `r lifecycle::badge("experimental")`
+#'
+#' This performs an inner join, so only rows where all predicates are true are
+#' included in the result, and a row from either LazyFrame may be included
+#' multiple times in the result.
+#'
+#' Note that the row order of the input LazyFrames is not preserved.
+#'
+#' @param other LazyFrame to join with.
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> (In)Equality condition to
+#' join the two tables on. When a column name occurs in both tables, the proper
+#' suffix must be applied in the predicate. For example, if both tables have a
+#' column `"x"` that you want to use in the conditions, you must refer to the
+#' column of the right table as `"x<suffix>"`.
+#' @param suffix Suffix to append to columns with a duplicate name.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' east <- pl$LazyFrame(
+#'   id = c(100, 101, 102),
+#'   dur = c(120, 140, 160),
+#'   rev = c(12, 14, 16),
+#'   cores = c(2, 8, 4)
+#' )
+#'
+#' west <- pl$LazyFrame(
+#'   t_id = c(404, 498, 676, 742),
+#'   time = c(90, 130, 150, 170),
+#'   cost = c(9, 13, 15, 16),
+#'   cores = c(4, 2, 1, 4)
+#' )
+#'
+#' east$join_where(
+#'   west,
+#'   pl$col("dur") < pl$col("time"),
+#'   pl$col("rev") < pl$col("cost")
+#' )$collect()
+lazyframe__join_where <- function(
+    other,
+    ...,
+    suffix = "_right") {
+  wrap({
+    check_polars_lf(other)
+    by <- parse_into_list_of_expressions(...)
+    self$`_ldf`$join_where(other$`_ldf`, by, suffix)
+  })
+}
+
+#' Unpivot a LazyFrame from wide to long format
+#'
+#' This function is useful to massage a LazyFrame into a format where one or
+#' more columns are identifier variables (`index`) while all other columns,
+#' considered measured variables (`on`), are “unpivoted” to the row axis
+#' leaving just two non-identifier columns, "variable" and "value".
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param on Values to use as identifier variables. If `value_vars` is
+#' empty all columns that are not in `id_vars` will be used.
+#' @param index Columns to use as identifier variables.
+#' @param variable_name Name to give to the new column containing the names of
+#' the melted columns. Defaults to "variable".
+#' @param value_name Name to give to the new column containing the values of
+#' the melted columns. Defaults to `"value"`.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c("x", "y", "z"),
+#'   b = c(1, 3, 5),
+#'   c = c(2, 4, 6)
+#' )
+#' lf$unpivot(index = "a", on = c("b", "c"))$collect()
+lazyframe__unpivot <- function(
+    on = NULL,
+    ...,
+    index = NULL,
+    variable_name = NULL,
+    value_name = NULL) {
+  wrap({
+    check_dots_empty0(...)
+    if (!is.null(on)) {
+      on <- parse_into_list_of_expressions(!!!on)
+    }
+    if (!is.null(index)) {
+      index <- parse_into_list_of_expressions(!!!index)
+    }
+    self$`_ldf`$unpivot(on, index, value_name, variable_name)
+  })
+}
+
+#' Rename column names
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Either a function that takes
+#' a character vector as input and returns a character vector as output, or
+#' named values where names are old column names and values are the new ones.
+#' @param .strict Validate that all column names exist in the current schema,
+#' and throw an error if any do not. (Note that this parameter is a no-op when
+#' passing a function to `...`).
+#'
+#' @details
+#' If existing names are swapped (e.g. 'A' points to 'B' and 'B' points to
+#' 'A'), polars will block projection and predicate pushdowns at this node.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = 1:3,
+#'   bar = 6:8,
+#'   ham = letters[1:3]
+#' )
+#'
+#' lf$rename(foo = "apple")$collect()
+#'
+#' lf$rename(
+#'   \(column_name) paste0("c", substr(column_name, 2, 100))
+#' )$collect()
+lazyframe__rename <- function(..., .strict = TRUE) {
+  wrap({
+    mapping <- list2(...)
+    if (length(mapping) == 1 && is_function(mapping[[1]]) && !is_named(mapping)) {
+      # TODO: this requires $name$map()
+      abort("Not implemented yet")
+      return(self$select(pl$all()$name$map(mapping[[1]])))
+    }
+    if (!is_list_of_string(mapping)) {
+      abort("`...` only accepts an unnamed function or named single strings.")
+    }
+    existing <- names(mapping)
+    new <- unlist(mapping)
+    self$`_ldf`$rename(existing, new, .strict)
+  })
+}
+
+#' Collect and profile a lazy query
+#'
+#' @description
+#' This will run the query and return a list containing the
+#' materialized DataFrame and a DataFrame that contains profiling information
+#' of each node that is executed.
+#'
+#' @inheritParams lazyframe__collect
+#' @param show_plot Show a Gantt chart of the profiling result
+#' @param truncate_nodes Truncate the label lengths in the Gantt chart to this
+#' number of characters. If `0` (default), do not truncate.
+#'
+#' @details The units of the timings are microseconds.
+#'
+#' @return List of two `DataFrame`s: one with the collected result, the other
+#' with the timings of each step. If `show_graph = TRUE`, then the plot is
+#' also stored in the list.
+#' @seealso
+#'  - [`$collect()`][lazyframe__collect] - regular collect.
+#'  - [`$collect_in_background()`][lazyframe__collect_in_background] - non-blocking
+#'    collect returns a future handle. Can also just be used via
+#'    `$collect(collect_in_background = TRUE)`.
+#'  - [`$sink_parquet()`][lazyframe__sink_parquet()] streams query to a parquet file.
+#'  - [`$sink_ipc()`][lazyframe__sink_ipc()] streams query to a arrow file.
+#'
+#' @examples
+#' ## Simplest use case
+#' pl$LazyFrame()$select(pl$lit(2) + 2)$profile()
+#'
+#' ## Use $profile() to compare two queries
+#'
+#' # -1-  map each Species-group with native polars, takes ~120us only
+#' as_polars_lf(iris)$
+#'   sort("Sepal.Length")$
+#'   group_by("Species", maintain_order = TRUE)$
+#'   agg(pl$col(pl$Float64)$first() + 5)$
+#'   profile()
+#'
+#' # -2-  map each Species-group of each numeric column with an R function, takes ~7000us (slow!)
+#'
+#' # some R function, prints `.` for each time called by polars
+#' r_func <- \(s) {
+#'   cat(".")
+#'   s$to_r()[1] + 5
+#' }
+#'
+#' as_polars_lf(iris)$
+#'   sort("Sepal.Length")$
+#'   group_by("Species", maintain_order = TRUE)$
+#'   agg(pl$col(pl$Float64)$map_elements(r_func))$
+#'   profile()
+lazyframe__profile <- function(
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    comm_subplan_elim = TRUE,
+    comm_subexpr_elim = TRUE,
+    cluster_with_columns = TRUE,
+    streaming = FALSE,
+    no_optimization = FALSE,
+    collect_in_background = FALSE,
+    show_plot = FALSE,
+    truncate_nodes = 0) {
+  if (isTRUE(no_optimization)) {
+    predicate_pushdown <- FALSE
+    projection_pushdown <- FALSE
+    slice_pushdown <- FALSE
+    comm_subplan_elim <- FALSE
+    comm_subexpr_elim <- FALSE
+    cluster_with_columns <- FALSE
+  }
+
+  if (isTRUE(streaming)) {
+    comm_subplan_elim <- FALSE
+  }
+
+  lf <- self$`_ldf`$optimization_toggle(
+    type_coercion = type_coercion,
+    predicate_pushdown = predicate_pushdown,
+    projection_pushdown = projection_pushdown,
+    simplify_expression = simplify_expression,
+    slice_pushdown = slice_pushdown,
+    comm_subplan_elim = comm_subplan_elim,
+    comm_subexpr_elim = comm_subexpr_elim,
+    cluster_with_columns = cluster_with_columns,
+    streaming = streaming,
+    `_eager` = FALSE
+  )
+
+  out <- lapply(self$`_ldf`$profile(), \(x) {
+    x |>
+      .savvy_wrap_PlRDataFrame() |>
+      wrap()
+  })
+
+  if (isTRUE(show_plot)) {
+    out[["plot"]] <- make_profile_plot(out, truncate_nodes)
+  }
+
+  out
+}
+
+#' Serialize the logical plan of this LazyFrame to a string in JSON format
+#'
+#' @return A character value
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:3)$sum()
+#' lf$serialize()
+lazyframe__serialize <- function() {
+  wrap({
+    self$`_ldf`$serialize()
+  })
+}
+
+#' Read a logical plan from a file to construct a LazyFrame
+#'
+#' @param source String containing the LazyFrame logical plan in JSON format.
+#'
+#' @return A character value
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:3)$sum()
+#' ser <- lf$serialize()
+#' pl$deserialize_lf(ser)
+pl__deserialize_lf <- function(source) {
+  wrap({
+    deserialize_lf(source)
+  })
+}
+
+#' Explode the DataFrame to long format by exploding the given columns
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Column names, expressions, or
+#' a selector defining them. The underlying columns being exploded must be of
+#' the `List` or `Array` data type.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   letters = c("a", "a", "b", "c"),
+#'   numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
+#' )
+#'
+#' lf$explode("numbers")$collect()
+lazyframe__explode <- function(...) {
+  wrap({
+    check_dots_unnamed()
+    by <- parse_into_list_of_expressions(...)
+    self$`_ldf`$explode(by)
+  })
+}
+
+#' Clone a LazyFrame
+#'
+#' This makes a very cheap deep copy/clone of an existing
+#' [`LazyFrame`][lazyframe__class]. Rarely useful as `LazyFrame`s are nearly 100%
+#' immutable. Any modification of a `LazyFrame` should lead to a clone anyways,
+#' but this can be useful when dealing with attributes (see examples).
+#'
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' df1 <- as_polars_lf(iris)
+#'
+#' # Make a function to take a LazyFrame, add an attribute, and return a LazyFrame
+#' give_attr <- function(data) {
+#'   attr(data, "created_on") <- "2024-01-29"
+#'   data
+#' }
+#' df2 <- give_attr(df1)
+#'
+#' # Problem: the original LazyFrame also gets the attribute while it shouldn't!
+#' attributes(df1)
+#'
+#' # Use $clone() inside the function to avoid that
+#' give_attr <- function(data) {
+#'   data <- data$clone()
+#'   attr(data, "created_on") <- "2024-01-29"
+#'   data
+#' }
+#' df1 <- as_polars_lf(iris)
+#' df2 <- give_attr(df1)
+#'
+#' # now, the original LazyFrame doesn't get this attribute
+#' attributes(df1)
+lazyframe__clone <- function() {
+  self$`_ldf`$clone()
+}
+
+
+#' Decompose struct columns into separate columns for each of their fields
+#'
+#' The new columns will be inserted into the LazyFrame at the location of the
+#' struct column.
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name of the struct column(s)
+#' that should be unnested.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = 1:5,
+#'   b = c("one", "two", "three", "four", "five"),
+#'   c = 6:10
+#' )$
+#'   select(
+#'   pl$struct("b"),
+#'   pl$struct(c("a", "c"))$alias("a_and_c")
+#' )
+#' lf$collect()
+#'
+#' lf$unnest("a_and_c")$collect()
+#' lf$unnest(pl$col("a_and_c"))$collect()
+lazyframe__unnest <- function(...) {
+  wrap({
+    check_dots_unnamed()
+    columns <- parse_into_list_of_expressions(...)
+    self$`_ldf`$unnest(columns)
+  })
+}
+
+#' Add an external context to the computation graph
+#'
+#' This allows expressions to also access columns from DataFrames or LazyFrames
+#' that are not part of this one.
+#'
+#' @param other Data/LazyFrame to have access to. This can be a list of DataFrames
+#' and LazyFrames.
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
+#' lf_other <- pl$LazyFrame(c = c("foo", "ham"))
+#'
+#' lf$with_context(lf_other)$select(
+#'   pl$col("b") + pl$col("c")$first()
+#' )$collect()
+#'
+#' # Fill nulls with the median from another lazyframe:
+#' train_lf <- pl$LazyFrame(
+#'   feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
+#' )
+#' test_lf <- pl$LazyFrame(
+#'   feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
+#' )
+#'
+#' test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
+#'   pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
+#' )$collect()
+lazyframe__with_context <- function(other) {
+  self$`_ldf`$with_context(other)
+}
+
+
+#' Create rolling groups based on a date/time or integer column
+#'
+#' @description
+#' Different from `group_by_dynamic`, the windows are now determined by the
+#' individual values and are not of constant intervals. For constant intervals
+#' use [`<LazyFrame>$group_by_dynamic()`][lazyframe__group_by_dynamic].
+#'
+#' If you have a time series `<t_0, t_1, ..., t_n>`, then by default the
+#' windows created will be:
+#' * `(t_0 - period, t_0]`
+#' * `(t_1 - period, t_1]`
+#' * …
+#' * `(t_n - period, t_n]`
+#'
+#' whereas if you pass a non-default `offset`, then the windows will be:
+#' * `(t_0 + offset, t_0 + offset + period]`
+#' * `(t_1 + offset, t_1 + offset + period]`
+#' * …
+#' * `(t_n + offset, t_n + offset + period]`
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams lazyframe__group_by_dynamic
+#' @param period Length of the window - must be non-negative.
+#' @param offset Offset of the window. Default is `-period`.
+#'
+#' @inherit expr__rolling_max params details
+#' @return A [LazyGroupBy][LazyGroupBy_class] object
+#' @seealso
+#' - [`<LazyFrame>$group_by_dynamic()`][lazyframe__group_by_dynamic]
+#' @examples
+#' dates <- c(
+#'   "2020-01-01 13:45:48",
+#'   "2020-01-01 16:42:13",
+#'   "2020-01-01 16:45:09",
+#'   "2020-01-02 18:12:48",
+#'   "2020-01-03 19:45:32",
+#'   "2020-01-08 23:16:43"
+#' )
+#'
+#' df <- pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns(
+#'   pl$col("dt")$str$strptime(pl$Datetime())
+#' )
+#'
+#' df$rolling(index_column = "dt", period = "2d")$agg(
+#'   sum_a = pl$col("a")$sum(),
+#'   min_a = pl$col("a")$min(),
+#'   max_a = pl$col("a")$max()
+#' )$collect()
+lazyframe__rolling <- function(
+    index_column,
+    ...,
+    period,
+    offset = NULL,
+    closed = "right",
+    group_by = NULL) {
+  wrap({
+    check_dots_empty0(...)
+    closed <- arg_match0(closed, values = c("both", "left", "right", "none"))
+    period <- parse_as_duration_string(period)
+    if (!is.null(offset)) {
+      offset <- parse_as_duration_string(offset)
+    } else {
+      offset <- negate_duration_string(period)
+    }
+    if (!is.null(group_by) && !is.list(group_by)) {
+      group_by <- list(group_by)
+    }
+    by <- parse_into_list_of_expressions(!!!group_by)
+    self$`_ldf`$rolling(
+      as_polars_expr(index_column)$`_rexpr`, period, offset, closed, by
+    )
+  })
+}
+
+
+#' Group based on a date/time or integer column
+#'
+#' Time windows are calculated and rows are assigned to windows. Different from
+#' a normal group by is that a row can be member of multiple groups. By
+#' default, the windows look like:
+#' * [start, start + period)
+#' * [start + every, start + every + period)
+#' * [start + 2*every, start + 2*every + period)
+#' * …
+#'
+#' where `start` is determined by `start_by`, `offset`, `every`, and the
+#' earliest datapoint. See the `start_by` argument description for details.
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param index_column Column used to group based on the time window. Often of
+#' type Date/Datetime. This column must be sorted in ascending order (or, if
+#' `group_by` is specified, then it must be sorted in ascending order within
+#' each group).
+#' In case of a dynamic group by on indices, the data type needs to be either
+#' Int32 or In64. Note that Int32 gets temporarily cast to Int64, so if
+#' performance matters, use an Int64 column.
+#' @param every Interval of the window.
+#' @param period Length of the window. If `NULL` (default), it will equal
+#' `every`.
+#' @param offset Offset of the window, does not take effect if
+#' `start_by = "datapoint"`. Defaults to zero.
+#' @param include_boundaries Add two columns `"_lower_boundary"` and
+#' `"_upper_boundary"` columns that show the boundaries of the window. This will
+#' impact performance because it’s harder to parallelize.
+#' @param closed Define which sides of the interval are closed (inclusive).
+#' Default is `"left"`.
+#' @param label Define which label to use for the window:
+#' * `"left"`: lower boundary of the window
+#' * `"right"`: upper boundary of the window
+#' * `"datapoint"`: the first value of the index column in the given window. If
+#' you don’t need the label to be at one of the boundaries, choose this option
+#' for maximum performance.
+#' @param start_by The strategy to determine the start of the first window by:
+#' * `"window"`: start by taking the earliest timestamp, truncating it with
+#'   `every`, and then adding `offset`. Note that weekly windows start on
+#'   Monday.
+#' * `"datapoint"`: start from the first encountered data point.
+#' * a day of the week (only takes effect if `every` contains `"w"`): `"monday"`
+#'   starts the window on the Monday before the first data point, etc.
+#'
+#' @details
+#' The `every`, `period`, and `offset` arguments are created with the following
+#' string language:
+#' - 1ns # 1 nanosecond
+#' - 1us # 1 microsecond
+#' - 1ms # 1 millisecond
+#' - 1s  # 1 second
+#' - 1m  # 1 minute
+#' - 1h  # 1 hour
+#' - 1d  # 1 day
+#' - 1w  # 1 calendar week
+#' - 1mo # 1 calendar month
+#' - 1y  # 1 calendar year
+#' These strings can be combined:
+#'   - 3d12h4m25s # 3 days, 12 hours, 4 minutes, and 25 seconds
+#'
+#' In case of a `group_by_dynamic` on an integer column, the windows are
+#' defined by:
+#' - 1i # length 1
+#' - 10i # length 10
+#'
+#' @return A [LazyGroupBy][LazyGroupBy_class] object
+#' @seealso
+#' - [`<LazyFrame>$rolling()`][lazyframe__rolling]
+#'
+#' @examples
+#' lf <- pl$select(
+#'   time = pl$datetime_range(
+#'     start = strptime("2021-12-16 00:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
+#'     end = strptime("2021-12-16 03:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
+#'     interval = "30m"
+#'   ),
+#'   n = 0:6
+#' )$lazy()
+#' lf$collect()
+#'
+#' # Group by windows of 1 hour.
+#' lf$group_by_dynamic("time", every = "1h", closed = "right")$agg(
+#'   vals = pl$col("n")
+#' )$collect()
+#'
+#' # The window boundaries can also be added to the aggregation result
+#' lf$group_by_dynamic(
+#'   "time",
+#'   every = "1h", include_boundaries = TRUE, closed = "right"
+#' )$agg(
+#'   pl$col("n")$mean()
+#' )$collect()
+#'
+#' # When closed = "left", the window excludes the right end of interval:
+#' # [lower_bound, upper_bound)
+#' lf$group_by_dynamic("time", every = "1h", closed = "left")$agg(
+#'   pl$col("n")
+#' )$collect()
+#'
+#' # When closed = "both" the time values at the window boundaries belong to 2
+#' # groups.
+#' lf$group_by_dynamic("time", every = "1h", closed = "both")$agg(
+#'   pl$col("n")
+#' )$collect()
+#'
+#' # Dynamic group bys can also be combined with grouping on normal keys
+#' lf <- lf$with_columns(
+#'   groups = as_polars_series(c("a", "a", "a", "b", "b", "a", "a"))
+#' )
+#' lf$collect()
+#'
+#' lf$group_by_dynamic(
+#'   "time",
+#'   every = "1h",
+#'   closed = "both",
+#'   group_by = "groups",
+#'   include_boundaries = TRUE
+#' )$agg(pl$col("n"))$collect()
+#'
+#' # We can also create a dynamic group by based on an index column
+#' lf <- pl$LazyFrame(
+#'   idx = 0:5,
+#'   A = c("A", "A", "B", "B", "B", "C")
+#' )$with_columns(pl$col("idx")$set_sorted())
+#' lf$collect()
+#'
+#' lf$group_by_dynamic(
+#'   "idx",
+#'   every = "2i",
+#'   period = "3i",
+#'   include_boundaries = TRUE,
+#'   closed = "right"
+#' )$agg(A_agg_list = pl$col("A"))$collect()
+lazyframe__group_by_dynamic <- function(
+    index_column,
+    ...,
+    every,
+    period = NULL,
+    offset = NULL,
+    include_boundaries = FALSE,
+    closed = "left",
+    label = "left",
+    group_by = NULL,
+    start_by = "window") {
+  wrap({
+    check_dots_empty0(...)
+    closed <- arg_match0(closed, values = c("both", "left", "right", "none"))
+    start_by <- arg_match0(
+      start_by,
+      values = c(
+        "window", "datapoint", "monday", "tuesday", "wednesday", "thursday",
+        "friday", "saturday", "sunday"
+      )
+    )
+    every <- parse_as_duration_string(every)
+    offset <- parse_as_duration_string(offset) %||% "0ns"
+    period <- parse_as_duration_string(period) %||% every
+    group_by <- parse_into_list_of_expressions(!!!group_by)
+
+    self$`_ldf`$group_by_dynamic(
+      as_polars_expr(index_column)$`_rexpr`, every, period, offset, label,
+      include_boundaries, closed,
+      group_by, start_by
+    )
+  })
+}
+
+#' Plot the query plan
+#'
+#' This only returns the "dot" output that can be passed to other packages, such
+#' as `DiagrammeR::grViz()`.
+#'
+#' @param ... Not used..
+#' @param optimized Optimize the query plan.
+#' @inheritParams lazyframe__explain
+#'
+#' @return A character vector
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c("a", "b", "a", "b", "b", "c"),
+#'   b = 1:6,
+#'   c = 6:1
+#' )
+#'
+#' query <- lf$group_by("a", maintain_order = TRUE)$agg(
+#'   pl$all()$sum()
+#' )$sort(
+#'   "a"
+#' )
+#'
+#' query$to_dot() |> cat()
+#'
+#' # You could print the graph by using DiagrammeR for example, with
+#' # query$to_dot() |> DiagrammeR::grViz().
+lazyframe__to_dot <- function(
+    ...,
+    optimized = TRUE,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    comm_subplan_elim = TRUE,
+    comm_subexpr_elim = TRUE,
+    cluster_with_columns = TRUE,
+    streaming = FALSE) {
+  lf <- self |>
+    self$`_ldf`$optimization_toggle(
+      pe_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = comm_subplan_elim,
+      comm_subexpr_elim = comm_subexpr_elim,
+      cluster_with_columns = cluster_with_columns,
+      streaming = streaming,
+      eager = FALSE
+    )
+
+  self$`_ldf`$to_dot(optimized)
+}
+
+#' Create an empty or n-row null-filled copy of the LazyFrame
+#'
+#' Returns a n-row null-filled LazyFrame with an identical schema. `n` can be
+#' greater than the current number of rows in the LazyFrame.
+#'
+#' @param n Number of (empty) rows to return in the cleared frame.
+#'
+#' @return A n-row null-filled LazyFrame with an identical schema
+#'
+#' @examples
+#' df <- pl$LazyFrame(
+#'   a = c(NA, 2, 3, 4),
+#'   b = c(0.5, NA, 2.5, 13),
+#'   c = c(TRUE, TRUE, FALSE, NA)
+#' )
+#'
+#' df$clear()
+#'
+#' df$clear(n = 5)
+lazyframe__clear <- function(n = 0) {
+  pl$DataFrame(schema = self$schema)$clear(n)$lazy()
+}
+
+#' Take every nth row in the LazyFrame
+#'
+#' @param n Gather every `n`-th row.
+#' @param offset Starting index.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = 5:8)
+#' lf$gather_every(2)$collect()
+#'
+#' lf$gather_every(2, offset = 1)$collect()
+lazyframe__gather_every <- function(n, offset = 0) {
+  self$select(pl$col("*")$gather_every(n, offset))
+}
+
+#' Return the number of non-null elements for each column
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, NA), c = rep(NA, 4))
+#' lf$count()$collect()
+lazyframe__count <- function() {
+  wrap({
+    self$`_ldf`$count()
+  })
+}
+
+#' Return the number of null elements for each column
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, NA), c = rep(NA, 4))
+#' lf$null_count()$collect()
+lazyframe__null_count <- function() {
+  wrap({
+    self$`_ldf`$null_count()
+  })
+}
+
+#' Return the `k` smallest rows
+#'
+#' @description
+#' Non-null elements are always preferred over null elements, regardless of the
+#' value of `reverse`. The output is not guaranteed to be in any particular
+#' order, call `sort()` after this function if you wish the output to be sorted.
+#'
+#' @inheritParams rlang::check_dots_empty
+#' @param k Number of rows to return.
+#' @param by Column(s) used to determine the bottom rows. Accepts expression
+#' input. Strings are parsed as column names.
+#' @param reverse Consider the `k` largest elements of the by column(s)
+#' (instead of the k smallest). This can be specified per column by passing a
+#' sequence of booleans.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c("a", "b", "a", "b", "b", "c"),
+#'   b = c(2, 1, 1, 3, 2, 1)
+#' )
+#'
+#' # Get the rows which contain the 4 smallest values in column b.
+#' lf$bottom_k(4, by = "b")$collect()
+#'
+#' # Get the rows which contain the 4 smallest values when sorting on column a
+#' # and b$
+#' lf$bottom_k(4, by = c("a", "b"))$collect()
+lazyframe__bottom_k <- function(k, ..., by, reverse = FALSE) {
+  wrap({
+    check_dots_empty0(...)
+    by <- parse_into_list_of_expressions(!!!by)
+    reverse <- extend_bool(reverse, length(by), "reverse", "...")
+    self$`_ldf`$bottom_k(k, by, reverse)
+  })
+}
+
+#' Return the `k` largest rows
+#'
+#' @inherit lazyframe__bottom_k description params
+#' @inheritParams rlang::check_dots_empty0
+#' @param reverse Consider the `k` smallest elements of the `by` column(s)
+#' (instead of the `k` largest). This can be specified per column by passing a
+#' sequence of booleans.
+
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   a = c("a", "b", "a", "b", "b", "c"),
+#'   b = c(2, 1, 1, 3, 2, 1)
+#' )
+#'
+#' # Get the rows which contain the 4 largest values in column b.
+#' lf$top_k(4, by = "b")$collect()
+#'
+#' # Get the rows which contain the 4 largest values when sorting on column a
+#' # and b$
+#' lf$top_k(4, by = c("a", "b"))$collect()
+lazyframe__top_k <- function(k, ..., by, reverse = FALSE) {
+  wrap({
+    check_dots_empty0(...)
+    by <- parse_into_list_of_expressions(!!!by)
+    reverse <- extend_bool(reverse, length(by), "reverse", "...")
+    self$`_ldf`$top_k(k, by, reverse)
+  })
+}
+
+#' Interpolate intermediate values
+#'
+#' The interpolation method is linear.
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf <- pl$LazyFrame(
+#'   foo = c(1, NA, 9, 10),
+#'   bar = c(6, 7, 9, NA),
+#'   ham = c(1, NA, NA, 9)
+#' )
+#'
+#' lf$interpolate()$collect()
+lazyframe__interpolate <- function() {
+  wrap({
+    self$select(pl$col("*")$interpolate())
+  })
+}
+
+#' Take two sorted DataFrames and merge them by the sorted key
+#'
+#' The output of this operation will also be sorted. It is the callers
+#' responsibility that the frames are sorted by that key, otherwise the output
+#' will not make sense. The schemas of both LazyFrames must be equal.
+#'
+#' @param other Other DataFrame that must be merged.
+#' @param key Key that is sorted.
+#'
+#' @inherit as_polars_lf return
+#'
+#' @examples
+#' lf1 <- pl$LazyFrame(
+#'   name = c("steve", "elise", "bob"),
+#'   age = c(42, 44, 18)
+#' )$sort("age")
+#'
+#' lf2 <- pl$LazyFrame(
+#'   name = c("anna", "megan", "steve", "thomas"),
+#'   age = c(21, 33, 42, 20)
+#' )$sort("age")
+#'
+#' lf1$merge_sorted(lf2, key = "age")$collect()
+lazyframe__merge_sorted <- function(other, key) {
+  wrap({
+    self$`_ldf`$merge_sorted(other$`_ldf`, key)
+  })
+}
+
+#' Indicate that one or multiple columns are sorted
+#'
+#' This can speed up future operations, but it can lead to incorrect results if
+#' the data is **not** sorted! Use with care!
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param column Columns that are sorted.
+#' @param descending Whether the columns are sorted in descending order.
+#'
+#' @inherit as_polars_lf return
+lazyframe__set_sorted <- function(column, ..., descending = FALSE) {
+  wrap({
+    check_dots_empty0(...)
+    self$with_columns(pl$col(column)$set_sorted(descending = descending))
+  })
+}
+
+#' Add a row index as the first column in the LazyFrame
+#'
+#' @description
+#' Using this function can have a negative effect on query performance. This
+#' may, for instance, block predicate pushdown optimization.
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param name Name of the index column.
+#' @param offset Start the index at this offset. Cannot be negative.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' lf <- pl$LazyFrame(x = c(1, 3, 5), y = c(2, 4, 6))
+#' lf$with_row_index()$collect()
+#'
+#' lf$with_row_index("id", offset = 1000)$collect()
+#'
+#' # An index column can also be created using the expressions int_range()
+#' # and len()$
+#' lf$with_columns(
+#'   index = pl$int_range(pl$len(), dtype = pl$UInt32)
+#' )$collect()
+lazyframe__with_row_index <- function(name = "index", offset = 0) {
+  wrap({
+    self$`_ldf`$with_row_index(name, offset)
+  })
+}
+
+#' Evaluate the query in streaming mode and write to a Parquet file
+#'
+#' @description
+#' `r lifecycle::badge("experimental")`
+#'
+#' This allows streaming results that are larger than RAM to be written to disk.
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param path A character. File path to which the file should be written.
+#' @param compression The compression method. Must be one of:
+#' * `"lz4"`: fast compression/decompression.
+#' * `"uncompressed"`
+#' * `"snappy"`: this guarantees that the parquet file will be compatible with
+#'   older parquet readers.
+#' * `"gzip"`
+#' * `"lzo"`
+#' * `"brotli"`
+#' * `"zstd"`: good compression performance.
+#' @param compression_level `NULL` or integer. The level of compression to use.
+#'  Only used if method is one of `"gzip"`, `"brotli"`, or `"zstd"`. Higher
+#' compression means smaller files on disk:
+#'  * `"gzip"`: min-level: 0, max-level: 10.
+#'  * `"brotli"`: min-level: 0, max-level: 11.
+#'  * `"zstd"`: min-level: 1, max-level: 22.
+#' @param statistics Whether statistics should be written to the Parquet
+#' headers. Possible values:
+#' * `TRUE`: enable default set of statistics (default)
+#' * `FALSE`: disable all statistics
+#' * `"full"`: calculate and write all available statistics.
+#' * A named list where all values must be `TRUE` or `FALSE`, e.g.
+#'   `list(min = TRUE, max = FALSE)`. Statistics available are `"min"`, `"max"`,
+#'   `"distinct_count"`, `"null_count"`.
+#' @param row_group_size Size of the row groups in number of rows. If `NULL`
+#' (default), the chunks of the DataFrame are used. Writing in smaller chunks
+#' may reduce memory pressure and improve writing speeds.
+#' @param data_page_size Size of the data page in bytes. If `NULL` (default), it
+#' is set to 1024^2 bytes.
+#' @param maintain_order Maintain the order in which data is processed. Setting
+#' this to `FALSE` will be slightly faster.
+#' @inheritParams lazyframe__collect
+#' @inheritParams pl__scan_parquet
+#'
+#' @return Invisibly returns the input LazyFrame
+#'
+#' @examples
+#' # sink table 'mtcars' from mem to parquet
+#' tmpf <- tempfile()
+#' as_polars_lf(mtcars)$sink_parquet(tmpf)
+#'
+#' # stream a query end-to-end
+#' tmpf2 <- tempfile()
+#' pl$scan_parquet(tmpf)$select(pl$col("cyl") * 2)$sink_parquet(tmpf2)
+#'
+#' # load parquet directly into a DataFrame / memory
+#' pl$scan_parquet(tmpf2)$collect()
+lazyframe__sink_parquet <- function(
+    path,
+    ...,
+    compression = "zstd",
+    compression_level = 3,
+    statistics = TRUE,
+    row_group_size = NULL,
+    data_page_size = NULL,
+    maintain_order = TRUE,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    no_optimization = FALSE,
+    storage_options = NULL,
+    retries = 2) {
+  wrap({
+    check_dots_empty0(...)
+    compression <- arg_match0(
+      compression,
+      values = c("lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd")
+    )
+
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+    }
+
+    lf <- self$`_ldf`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = FALSE,
+      comm_subexpr_elim = FALSE,
+      cluster_with_columns = FALSE,
+      streaming = FALSE,
+      `_eager` = FALSE
+    )
+
+    statistics <- translate_statistics(statistics)
+
+    lf$sink_parquet(
+      path = path,
+      compression = compression,
+      compression_level = compression_level,
+      statistics = statistics,
+      row_group_size = row_group_size,
+      data_page_size = data_page_size,
+      maintain_order = maintain_order,
+      storage_options = storage_options,
+      retries = retries
+    )
+
+    invisible(self)
+  })
+}
+
+#' Evaluate the query in streaming mode and write to an IPC file
+#'
+#' @inherit lazyframe__sink_parquet description params return
+#' @inheritParams rlang::check_dots_empty0
+#' @param compression `NULL` or one of:
+#' * `"uncompressed"`: same as `NULL`.
+#' * `"lz4"`: fast compression/decompression.
+#' * `"zstd"`: good compression performance.
+#'
+#' @examples
+#' # sink table 'mtcars' from mem to ipc
+#' tmpf <- tempfile()
+#' as_polars_lf(mtcars)$sink_ipc(tmpf)
+#'
+#' # stream a query end-to-end (not supported yet, https://github.com/pola-rs/polars/issues/1040)
+#' # tmpf2 = tempfile()
+#' # pl$scan_ipc(tmpf)$select(pl$col("cyl") * 2)$sink_ipc(tmpf2)
+#'
+#' # load ipc directly into a DataFrame / memory
+#' # pl$scan_ipc(tmpf2)$collect()
+lazyframe__sink_ipc <- function(
+    path,
+    ...,
+    compression = c("zstd", "lz4", "uncompressed"),
+    maintain_order = TRUE,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    no_optimization = FALSE,
+    storage_options = NULL,
+    retries = 2) {
+  wrap({
+    check_dots_empty0(...)
+    compression <- compression %||% "uncompressed"
+    compression <- arg_match0(
+      compression,
+      values = c("lz4", "uncompressed", "zstd")
+    )
+
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+    }
+
+    lf <- self$`_ldf`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = FALSE,
+      comm_subexpr_elim = FALSE,
+      cluster_with_columns = FALSE,
+      streaming = FALSE,
+      `_eager` = FALSE
+    )
+
+    lf$sink_ipc(
+      path = path,
+      compression = compression,
+      maintain_order = maintain_order,
+      storage_options = storage_options,
+      retries = retries
+    )
+
+    invisible(self)
+  })
+}
+
+#' Evaluate the query in streaming mode and write to a CSV file
+#'
+#' @inherit lazyframe__sink_parquet description params return
+#' @inheritParams rlang::check_dots_empty0
+#' @param include_bom Logical, whether to include UTF-8 BOM in the CSV output.
+#' @param include_header Logical, hether to include header in the CSV output.
+#' @param separator Separate CSV fields with this symbol.
+#' @param line_terminator String used to end each row.
+#' @param quote_char Byte to use as quoting character.
+#' @param batch_size Number of rows that will be processed per thread.
+#' @param datetime_format A format string, with the specifiers defined by the
+#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+#' Rust crate. If no format specified, the default fractional-second precision
+#' is inferred from the maximum timeunit found in the frame’s Datetime cols (if
+#' any).
+#' @param date_format A format string, with the specifiers defined by the
+#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+#' Rust crate.
+#' @param time_format A format string, with the specifiers defined by the
+#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+#' Rust crate.
+#' @param float_precision Whether to use scientific form always (`TRUE`), never
+#' (`FALSE`), or automatically (`NULL`) for Float32 and Float64 datatypes.
+#' @param null_value A string representing null values (defaulting to the empty
+#' string).
+#' @param quote_style Determines the quoting strategy used. Must be one of:
+#' * `"necessary"` (default): This puts quotes around fields only when
+#'   necessary. They are necessary when fields contain a quote, delimiter or
+#'   record terminator. Quotes are also necessary when writing an empty record
+#'   (which is indistinguishable from a record with one empty field). This is
+#'   the default.
+#' * `"always"`: This puts quotes around every field. Always.
+#' * `"never"`: This never puts quotes around fields, even if that results in
+#'   invalid CSV data (e.g.: by not quoting strings containing the separator).
+#' * `"non_numeric"`: This puts quotes around all fields that are non-numeric.
+#'   Namely, when writing a field that does not parse as a valid float or
+#'   integer, then quotes will be used even if they aren`t strictly necessary.
+#'
+#' @examples
+#' # sink table 'mtcars' from mem to CSV
+#' tmpf <- tempfile()
+#' pl$LazyFrame(mtcars)$sink_csv(tmpf)
+#'
+#' # stream a query end-to-end
+#' tmpf2 <- tempfile()
+#' pl$scan_csv(tmpf)$select(pl$col("cyl") * 2)$sink_csv(tmpf2)
+#'
+#' # load parquet directly into a DataFrame / memory
+#' pl$scan_csv(tmpf2)$collect()
+lazyframe__sink_csv <- function(
+    path,
+    ...,
+    include_bom = FALSE,
+    include_header = TRUE,
+    separator = ",",
+    line_terminator = "\n",
+    quote_char = '"',
+    batch_size = 1024,
+    datetime_format = NULL,
+    date_format = NULL,
+    time_format = NULL,
+    float_precision = NULL,
+    null_value = "",
+    quote_style = "necessary",
+    maintain_order = TRUE,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    no_optimization = FALSE,
+    storage_options = NULL,
+    retries = 2) {
+  wrap({
+    check_dots_empty0(...)
+    quote_style <- arg_match0(
+      quote_style,
+      values = c("necessary", "always", "never", "non_numeric")
+    )
+
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+    }
+
+    lf <- self$`_ldf`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = FALSE,
+      comm_subexpr_elim = FALSE,
+      cluster_with_columns = FALSE,
+      streaming = FALSE,
+      `_eager` = FALSE
+    )
+
+    lf$sink_csv(
+      path = path,
+      include_bom = include_bom,
+      include_header = include_header,
+      separator = separator,
+      line_terminator = line_terminator,
+      quote_char = quote_char,
+      batch_size = batch_size,
+      datetime_format = datetime_format,
+      date_format = date_format,
+      time_format = time_format,
+      float_precision = float_precision,
+      null_value = null_value,
+      quote_style = quote_style,
+      maintain_order = maintain_order,
+      storage_options = storage_options,
+      retries = retries
+    )
+
+    invisible(self)
+  })
+}
+
+#' Evaluate the query in streaming mode and write to an NDJSON file
+#'
+#' @inherit lazyframe__sink_parquet description params return
+#' @inheritParams rlang::check_dots_empty0
+#'
+#' @examples
+#' # sink table 'mtcars' from mem to NDJSON
+#' tmpf <- tempfile(fileext = ".ndjson")
+#' pl$LazyFrame(mtcars)$sink_ndjson(tmpf)
+#'
+#' # load parquet directly into a DataFrame / memory
+#' pl$scan_ndjson(tmpf)$collect()
+lazyframe__sink_ndjson <- function(
+    path,
+    ...,
+    maintain_order = TRUE,
+    type_coercion = TRUE,
+    predicate_pushdown = TRUE,
+    projection_pushdown = TRUE,
+    simplify_expression = TRUE,
+    slice_pushdown = TRUE,
+    no_optimization = FALSE,
+    storage_options = NULL,
+    retries = 2) {
+  wrap({
+    check_dots_empty0(...)
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+    }
+
+    lf <- self$`_ldf`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = FALSE,
+      comm_subexpr_elim = FALSE,
+      cluster_with_columns = FALSE,
+      streaming = FALSE,
+      `_eager` = FALSE
+    )
+
+    lf$sink_json(
+      path = path,
+      maintain_order = maintain_order,
+      storage_options = storage_options,
+      retries = retries
+    )
+
+    invisible(self)
+  })
+}
+
+#' Perform joins on nearest keys
+#'
+#' @description
+#' This is similar to a left-join except that we match on nearest key rather
+#' than equal keys. Both frames must be sorted by the `asof_join` key.
+#'
+#' @inheritParams rlang::check_dots_empty0
+#' @param other LazyFrame to join with.
+#' @inheritParams dataframe__join
+#' @param by Join on these columns before performing asof join. Either a vector
+#' of column names or a list of expressions and/or strings. Use `left_by` and
+#' `right_by` if the column names to match on are different between the two
+#' tables.
+#' @param by_left,by_right Same as `by` but only for the left or the right
+#' table. They must have the same length.
+#' @param strategy Strategy for where to find match:
+#' * `"backward"` (default): search for the last row in the right table whose
+#'   `on` key is less than or equal to the left key.
+#' * `"forward"`: search for the first row in the right table whose `on` key is
+#'   greater than or equal to the left key.
+#' * `"nearest"`: search for the last row in the right table whose value is
+#'   nearest to the left key. String keys are not currently supported for a
+#'   nearest search.
+#' @param tolerance Numeric tolerance. By setting this the join will only be
+#' done if the near keys are within this distance. If an asof join is done on
+#' columns of dtype "Date", "Datetime", "Duration" or "Time", use the Polars
+#' duration string language (see details).
+#'
+#' @param coalesce Coalescing behavior (merging of `on` / `left_on` /
+#' `right_on` columns):
+#' * `TRUE`: Always coalesce join columns;
+#' * `FALSE`: Never coalesce join columns.
+#' Note that joining on any other expressions than `col` will turn off
+#' coalescing.
+#'
+#' @inheritSection polars_duration_string Polars duration string language
+#' @examples
+#' gdp <- pl$LazyFrame(
+#'   date = as.Date(c("2016-1-1", "2017-5-1", "2018-1-1", "2019-1-1", "2020-1-1")),
+#'   gdp = c(4164, 4411, 4566, 4696, 4827)
+#' )
+#'
+#' pop <- pl$LazyFrame(
+#'   date = as.Date(c("2016-3-1", "2018-8-1", "2019-1-1")),
+#'   population = c(82.19, 82.66, 83.12)
+#' )
+#'
+#' # optional make sure tables are already sorted with "on" join-key
+#' gdp <- gdp$sort("date")
+#' pop <- pop$sort("date")
+#'
+#'
+#' # Note how the dates don’t quite match. If we join them using join_asof and
+#' # strategy = 'backward', then each date from population which doesn’t have
+#' # an exact match is matched with the closest earlier date from gdp:
+#' pop$join_asof(gdp, on = "date", strategy = "backward")$collect()
+#'
+#' # Note how:
+#' # - date 2016-03-01 from population is matched with 2016-01-01 from gdp;
+#' # - date 2018-08-01 from population is matched with 2018-01-01 from gdp.
+#' # You can verify this by passing coalesce = FALSE:
+#' pop$join_asof(
+#'   gdp,
+#'   on = "date", strategy = "backward", coalesce = FALSE
+#' )$collect()
+#'
+#' # If we instead use strategy = 'forward', then each date from population
+#' # which doesn’t have an exact match is matched with the closest later date
+#' # from gdp:
+#' pop$join_asof(gdp, on = "date", strategy = "forward")$collect()
+#'
+#' # Note how:
+#' # - date 2016-03-01 from population is matched with 2017-01-01 from gdp;
+#' # - date 2018-08-01 from population is matched with 2019-01-01 from gdp.
+#'
+#' # Finally, strategy = 'nearest' gives us a mix of the two results above, as
+#' # each date from population which doesn’t have an exact match is matched
+#' # with the closest date from gdp, regardless of whether it’s earlier or
+#' # later:
+#' pop$join_asof(gdp, on = "date", strategy = "nearest")$collect()
+#'
+#' # Note how:
+#' # - date 2016-03-01 from population is matched with 2016-01-01 from gdp;
+#' # - date 2018-08-01 from population is matched with 2019-01-01 from gdp.
+#'
+#' # The `by` argument allows joining on another column first, before the asof
+#' # join. In this example we join by country first, then asof join by date, as
+#' # above.
+#' gdp2 <- pl$LazyFrame(
+#'   country = rep(c("Germany", "Netherlands"), each = 5),
+#'   date = rep(
+#'     as.Date(c("2016-1-1", "2017-1-1", "2018-1-1", "2019-1-1", "2020-1-1")),
+#'     2
+#'   ),
+#'   gdp = c(4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909)
+#' )$sort("country", "date")
+#' gdp2$collect()
+#'
+#' pop2 <- pl$LazyFrame(
+#'   country = rep(c("Germany", "Netherlands"), each = 3),
+#'   date = rep(as.Date(c("2016-3-1", "2018-8-1", "2019-1-1")), 2),
+#'   population = c(82.19, 82.66, 83.12, 17.11, 17.32, 17.40)
+#' )$sort("country", "date")
+#' pop2$collect()
+#'
+#' pop2$join_asof(
+#'   gdp2,
+#'   by = "country", on = "date", strategy = "nearest"
+#' )$collect()
+lazyframe__join_asof <- function(
+    other,
+    ...,
+    left_on = NULL,
+    right_on = NULL,
+    on = NULL,
+    by_left = NULL,
+    by_right = NULL,
+    by = NULL,
+    strategy = c("backward", "forward", "nearest"),
+    suffix = "_right",
+    tolerance = NULL,
+    allow_parallel = TRUE,
+    force_parallel = FALSE,
+    coalesce = TRUE) {
+  wrap({
+    check_dots_empty0(...)
+    strategy <- arg_match0(strategy, values = c("backward", "forward", "nearest"))
+    if (!is.null(by)) by_left <- by_right <- by
+    if (!is.null(on)) left_on <- right_on <- on
+    tolerance_str <- if (is.character(tolerance)) tolerance else NULL
+    tolerance_num <- if (!is.character(tolerance)) tolerance else NULL
+
+    self$`_ldf`$join_asof(
+      other = other$`_ldf`,
+      left_on = as_polars_expr(left_on)$`_rexpr`,
+      right_on = as_polars_expr(right_on)$`_rexpr`,
+      left_by = by_left,
+      right_by = by_right,
+      allow_parallel = allow_parallel,
+      force_parallel = force_parallel,
+      suffix = suffix,
+      strategy = strategy,
+      tolerance = tolerance_num,
+      tolerance_str = tolerance_str,
+      coalesce = coalesce
+    )
+  })
+}
diff --git a/R/utils-various.R b/R/utils-various.R
index 5840ed62..c62c2bfe 100644
--- a/R/utils-various.R
+++ b/R/utils-various.R
@@ -16,3 +16,116 @@ extend_bool <- function(value, n_match, value_name, match_name) {
     value
   }
 }
+
+#' @noRd
+make_profile_plot <- function(data, truncate_nodes) {
+  check_installed("ggplot2")
+  timings <- as.data.frame(data[[2]])
+  timings$node <- factor(timings$node, levels = unique(timings$node))
+  total_timing <- max(timings$end)
+  if (total_timing > 10000000) {
+    unit <- "s"
+    total_timing <- paste0(total_timing / 1000000, "s")
+    timings$start <- timings$start / 1000000
+    timings$end <- timings$end / 1000000
+  } else if (total_timing > 10000) {
+    unit <- "ms"
+    total_timing <- paste0(total_timing / 1000, "ms")
+    timings$start <- timings$start / 1000
+    timings$end <- timings$end / 1000
+  } else {
+    unit <- "\U00B5s"
+    total_timing <- paste0(total_timing, "\U00B5s")
+  }
+
+  # for some reason, there's an error if I use rlang::.data directly in aes()
+  .data <- rlang::.data
+
+  plot <- ggplot2::ggplot(
+    timings,
+    ggplot2::aes(
+      x = .data[["start"]], xend = .data[["end"]],
+      y = .data[["node"]], yend = .data[["node"]]
+    )
+  ) +
+    ggplot2::geom_segment(linewidth = 6) +
+    ggplot2::xlab(
+      paste0("Node duration in ", unit, ". Total duration: ", total_timing)
+    ) +
+    ggplot2::ylab(NULL) +
+    ggplot2::theme(
+      axis.text = ggplot2::element_text(size = 12)
+    )
+
+  if (truncate_nodes > 0) {
+    plot <- plot +
+      ggplot2::scale_y_discrete(
+        labels = rev(paste0(strtrim(timings$node, truncate_nodes), "...")),
+        limits = rev
+      )
+  } else {
+    plot <- plot +
+      ggplot2::scale_y_discrete(
+        limits = rev
+      )
+  }
+
+  # do not show the plot if we're running testthat
+  if (!identical(Sys.getenv("TESTTHAT"), "true")) {
+    print(plot)
+  }
+  plot
+}
+
+#' @noRd
+translate_statistics <- function(statistics, call = caller_env()) {
+  if (length(statistics) != 1 && !is.list(statistics)) {
+    abort("`statistics` must be of length 1.", call = call)
+  }
+  if (is.logical(statistics)) {
+    if (isTRUE(statistics)) {
+      statistics <- list(
+        min = TRUE,
+        max = TRUE,
+        distinct_count = FALSE,
+        null_count = TRUE
+      )
+    } else {
+      statistics <- list(
+        min = FALSE,
+        max = FALSE,
+        distinct_count = FALSE,
+        null_count = FALSE
+      )
+    }
+  } else if (is.character(statistics)) {
+    if (statistics == "full") {
+      statistics <- list(
+        min = TRUE,
+        max = TRUE,
+        distinct_count = TRUE,
+        null_count = TRUE
+      )
+    } else {
+      abort("`statistics` must be TRUE/FALSE, \"full\", or a named list.", call = call)
+    }
+  } else if (is.list(statistics)) {
+    default <- list(
+      min = TRUE,
+      max = TRUE,
+      distinct_count = FALSE,
+      null_count = TRUE
+    )
+    statistics <- utils::modifyList(default, statistics)
+    nms <- names(statistics)
+    invalid <- nms[!nms %in% c("min", "max", "distinct_count", "null_count")]
+    if (length(invalid) > 0) {
+      msg <- paste0("`", invalid, "`", collapse = ", ")
+      abort(
+        paste0("In `statistics`,", msg, "are not valid keys."),
+        call = call
+      )
+    }
+  }
+  statistics
+}
diff --git a/man/dataframe__cast.Rd b/man/dataframe__cast.Rd
index 10011b1c..f168d5a9 100644
--- a/man/dataframe__cast.Rd
+++ b/man/dataframe__cast.Rd
@@ -6,11 +6,21 @@
 \usage{
 dataframe__cast(..., .strict = TRUE)
 }
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Either a datatype to which
+all columns will be cast, or a list where the names are column names and the
+values are the datatypes to convert to.}
+
+\item{.strict}{If \code{TRUE} (default), throw an error if a cast could not be done
+(for instance, due to an overflow). Otherwise, return \code{null}.}
+}
 \value{
 A polars \link{DataFrame}
 }
 \description{
-Cast DataFrame column(s) to the specified dtype
+This allows to convert all columns to a datatype or to convert only specific
+columns. Contrarily to the Python implementation, it is not possible to
+convert all columns of a specific datatype to another datatype.
 }
 \examples{
 df <- pl$DataFrame(
diff --git a/man/lazyframe__bottom_k.Rd b/man/lazyframe__bottom_k.Rd
new file mode 100644
index 00000000..903ffd46
--- /dev/null
+++ b/man/lazyframe__bottom_k.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__bottom_k}
+\alias{lazyframe__bottom_k}
+\title{Return the \code{k} smallest rows}
+\usage{
+lazyframe__bottom_k(k, ..., by, reverse = FALSE)
+}
+\arguments{
+\item{k}{Number of rows to return.}
+
+\item{by}{Column(s) used to determine the bottom rows. Accepts expression
+input. Strings are parsed as column names.}
+
+\item{reverse}{Consider the \code{k} largest elements of the by column(s)
+(instead of the k smallest). This can be specified per column by passing a
+sequence of booleans.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Non-null elements are always preferred over null elements, regardless of the
+value of \code{reverse}. The output is not guaranteed to be in any particular
+order, call \code{sort()} after this function if you wish the output to be sorted.
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c("a", "b", "a", "b", "b", "c"),
+  b = c(2, 1, 1, 3, 2, 1)
+)
+
+# Get the rows which contain the 4 smallest values in column b.
+lf$bottom_k(4, by = "b")$collect()
+
+# Get the rows which contain the 4 smallest values when sorting on column a
+# and b$
+lf$bottom_k(4, by = c("a", "b"))$collect()
+}
diff --git a/man/lazyframe__cast.Rd b/man/lazyframe__cast.Rd
new file mode 100644
index 00000000..56958008
--- /dev/null
+++ b/man/lazyframe__cast.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__cast}
+\alias{lazyframe__cast}
+\title{Cast LazyFrame column(s) to the specified dtype(s)}
+\usage{
+lazyframe__cast(..., .strict = TRUE)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Either a datatype to which
+all columns will be cast, or a list where the names are column names and the
+values are the datatypes to convert to.}
+
+\item{strict}{If \code{TRUE} (default), throw an error if a cast could not be done
+(for instance, due to an overflow). Otherwise, return \code{null}.}
+}
+\value{
+A LazyFrame
+}
+\description{
+This allows to convert all columns to a datatype or to convert only specific
+columns. Contrarily to the Python implementation, it is not possible to
+convert all columns of a specific datatype to another datatype.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = c(6, 7, 8),
+  ham = as.Date(c("2020-01-02", "2020-03-04", "2020-05-06"))
+)
+
+# Cast only some columns
+lf$cast(foo = pl$Float32, bar = pl$UInt8)$collect()
+
+# Cast all columns to the same type
+lf$cast(pl$String)$collect()
+}
diff --git a/man/lazyframe__clear.Rd b/man/lazyframe__clear.Rd
new file mode 100644
index 00000000..91e4d73c
--- /dev/null
+++ b/man/lazyframe__clear.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__clear}
+\alias{lazyframe__clear}
+\title{Create an empty or n-row null-filled copy of the LazyFrame}
+\usage{
+lazyframe__clear(n = 0)
+}
+\arguments{
+\item{n}{Number of (empty) rows to return in the cleared frame.}
+}
+\value{
+A n-row null-filled LazyFrame with an identical schema
+}
+\description{
+Returns a n-row null-filled LazyFrame with an identical schema. \code{n} can be
+greater than the current number of rows in the LazyFrame.
+}
+\examples{
+df <- pl$LazyFrame(
+  a = c(NA, 2, 3, 4),
+  b = c(0.5, NA, 2.5, 13),
+  c = c(TRUE, TRUE, FALSE, NA)
+)
+
+df$clear()
+
+df$clear(n = 5)
+}
diff --git a/man/lazyframe__clone.Rd b/man/lazyframe__clone.Rd
new file mode 100644
index 00000000..4e51d5f7
--- /dev/null
+++ b/man/lazyframe__clone.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__clone}
+\alias{lazyframe__clone}
+\title{Clone a LazyFrame}
+\usage{
+lazyframe__clone()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This makes a very cheap deep copy/clone of an existing
+\code{\link[=lazyframe__class]{LazyFrame}}. Rarely useful as \code{LazyFrame}s are nearly 100\%
+immutable. Any modification of a \code{LazyFrame} should lead to a clone anyways,
+but this can be useful when dealing with attributes (see examples).
+}
+\examples{
+df1 <- as_polars_lf(iris)
+
+# Make a function to take a LazyFrame, add an attribute, and return a LazyFrame
+give_attr <- function(data) {
+  attr(data, "created_on") <- "2024-01-29"
+  data
+}
+df2 <- give_attr(df1)
+
+# Problem: the original LazyFrame also gets the attribute while it shouldn't!
+attributes(df1)
+
+# Use $clone() inside the function to avoid that
+give_attr <- function(data) {
+  data <- data$clone()
+  attr(data, "created_on") <- "2024-01-29"
+  data
+}
+df1 <- as_polars_lf(iris)
+df2 <- give_attr(df1)
+
+# now, the original LazyFrame doesn't get this attribute
+attributes(df1)
+}
diff --git a/man/lazyframe__collect.Rd b/man/lazyframe__collect.Rd
index 3ccae6b3..e6139b19 100644
--- a/man/lazyframe__collect.Rd
+++ b/man/lazyframe__collect.Rd
@@ -68,3 +68,14 @@ lf$group_by("a")$agg(pl$all()$sum())$collect(
   streaming = TRUE
 )
 }
+\seealso{
+\itemize{
+\item \code{\link[=lazyframe__profile]{$profile()}} - same as \verb{$collect()} but also returns
+a table with each operation profiled.
+\item \code{\link[=lazyframe__collect_in_background]{$collect_in_background()}} - non-blocking
+collect returns a future handle. Can also just be used via
+\verb{$collect(collect_in_background = TRUE)}.
+\item \code{\link[=lazyframe__sink_parquet]{$sink_parquet()}} streams query to a parquet file.
+\item \code{\link[=lazyframe__sink_ipc]{$sink_ipc()}} streams query to a arrow file.
+}
+}
diff --git a/man/lazyframe__collect_schema.Rd b/man/lazyframe__collect_schema.Rd
new file mode 100644
index 00000000..6a59a51f
--- /dev/null
+++ b/man/lazyframe__collect_schema.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__collect_schema}
+\alias{lazyframe__collect_schema}
+\title{Resolve the schema of this LazyFrame}
+\usage{
+lazyframe__collect_schema()
+}
+\value{
+A named list with names indicating column names and values indicating
+column data types.
+}
+\description{
+This resolves the query plan but does not trigger computations.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = 6:8,
+  ham = c("a", "b", "c")
+)
+
+lf$collect_schema()
+
+lf$with_columns(
+  baz = (pl$col("foo") + pl$col("bar"))$cast(pl$String),
+  pl$col("bar")$cast(pl$Int64)
+)$collect_schema()
+}
diff --git a/man/lazyframe__count.Rd b/man/lazyframe__count.Rd
new file mode 100644
index 00000000..a8d52e21
--- /dev/null
+++ b/man/lazyframe__count.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__count}
+\alias{lazyframe__count}
+\title{Return the number of non-null elements for each column}
+\usage{
+lazyframe__count()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Return the number of non-null elements for each column
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, NA), c = rep(NA, 4))
+lf$count()$collect()
+}
diff --git a/man/lazyframe__drop.Rd b/man/lazyframe__drop.Rd
new file mode 100644
index 00000000..95552531
--- /dev/null
+++ b/man/lazyframe__drop.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__drop}
+\alias{lazyframe__drop}
+\title{Remove columns from the DataFrame}
+\usage{
+lazyframe__drop(..., strict = TRUE)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Names of the columns that
+should be removed from the dataframe. Accepts column selector input.}
+
+\item{strict}{Validate that all column names exist in the current schema,
+and throw an exception if any do not.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Remove columns from the DataFrame
+}
+\examples{
+# Drop columns by passing the name of those columns
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = c(6, 7, 8),
+  ham = c("a", "b", "c")
+)
+lf$drop("ham")$collect()
+lf$drop("ham", "bar")$collect()
+
+# Drop multiple columns by passing a selector
+lf$drop(cs$all())$collect()
+}
diff --git a/man/lazyframe__drop_nulls.Rd b/man/lazyframe__drop_nulls.Rd
new file mode 100644
index 00000000..dd3fcc78
--- /dev/null
+++ b/man/lazyframe__drop_nulls.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__drop_nulls}
+\alias{lazyframe__drop_nulls}
+\title{Drop all rows that contain null values}
+\usage{
+lazyframe__drop_nulls(subset = NULL)
+}
+\arguments{
+\item{subset}{Column name(s) for which null values are considered. If \code{NULL}
+(default), use all columns.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+The original order of the remaining rows is preserved.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = c(6, NA, 8),
+  ham = c("a", "b", NA)
+)
+
+# The default behavior of this method is to drop rows where any single value
+# of the row is null.
+lf$drop_nulls()$collect()
+
+# This behaviour can be constrained to consider only a subset of columns, as
+# defined by name or with a selector. For example, dropping rows if there is
+# a null in any of the integer columns:
+lf$drop_nulls(subset = cs$integer())$collect()
+}
diff --git a/man/lazyframe__explain.Rd b/man/lazyframe__explain.Rd
new file mode 100644
index 00000000..582869d2
--- /dev/null
+++ b/man/lazyframe__explain.Rd
@@ -0,0 +1,78 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__explain}
+\alias{lazyframe__explain}
+\title{Create a string representation of the query plan}
+\usage{
+lazyframe__explain(
+  ...,
+  format = c("plain", "tree"),
+  optimized = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  comm_subplan_elim = TRUE,
+  comm_subexpr_elim = TRUE,
+  cluster_with_columns = TRUE,
+  streaming = FALSE
+)
+}
+\arguments{
+\item{...}{Dots which should be empty.}
+
+\item{format}{The format to use for displaying the logical plan. Must be
+either \code{"plain"} (default) or \code{"tree"}.}
+
+\item{optimized}{Return an optimized query plan. If \code{TRUE} (default), the
+subsequent optimization flags control which optimizations run.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{comm_subplan_elim}{A logical, indicats tring to cache branching subplans that occur on self-joins or unions.}
+
+\item{comm_subexpr_elim}{A logical, indicats tring to cache common subexpressions.}
+
+\item{cluster_with_columns}{A logical, indicats to combine sequential independent calls to with_columns.}
+
+\item{streaming}{A logical. If \code{TRUE}, process the query in batches to handle larger-than-memory data.
+If \code{FALSE} (default), the entire query is processed in a single batch.
+Note that streaming mode is considered unstable.
+It may be changed at any point without it being considered a breaking change.}
+}
+\value{
+A character value containing the query plan.
+}
+\description{
+The query plan is read from bottom to top. When \code{optimized = FALSE}, the
+query as it was written by the user is shown. This is not what Polars runs.
+Instead, it applies optimizations that are displayed by default by \verb{$explain()}.
+One classic example is the predicate pushdown, which applies the filter as
+early as possible (i.e. at the bottom of the plan).
+}
+\examples{
+lazy_frame <- as_polars_lf(iris)
+
+# Prepare your query
+lazy_query <- lazy_frame$sort("Species")$filter(pl$col("Species") != "setosa")
+
+# This is the query that was written by the user, without any optimizations
+# (use cat() for better printing)
+lazy_query$explain(optimized = FALSE) |> cat()
+
+# This is the query after `polars` optimizes it: instead of sorting first and
+# then filtering, it is faster to filter first and then sort the rest.
+lazy_query$explain() |> cat()
+
+# Also possible to see this as tree format
+lazy_query$explain(format = "tree") |> cat()
+}
diff --git a/man/lazyframe__explode.Rd b/man/lazyframe__explode.Rd
new file mode 100644
index 00000000..e438e172
--- /dev/null
+++ b/man/lazyframe__explode.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__explode}
+\alias{lazyframe__explode}
+\title{Explode the DataFrame to long format by exploding the given columns}
+\usage{
+lazyframe__explode(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Column names, expressions, or
+a selector defining them. The underlying columns being exploded must be of
+the \code{List} or \code{Array} data type.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Explode the DataFrame to long format by exploding the given columns
+}
+\examples{
+lf <- pl$LazyFrame(
+  letters = c("a", "a", "b", "c"),
+  numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
+)
+
+lf$explode("numbers")$collect()
+}
diff --git a/man/lazyframe__fill_nan.Rd b/man/lazyframe__fill_nan.Rd
new file mode 100644
index 00000000..05e05a2c
--- /dev/null
+++ b/man/lazyframe__fill_nan.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__fill_nan}
+\alias{lazyframe__fill_nan}
+\title{Fill floating point \code{NaN} value with a fill value}
+\usage{
+lazyframe__fill_nan(value)
+}
+\arguments{
+\item{value}{Value used to fill \code{NaN} values.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Fill floating point \code{NaN} value with a fill value
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c(1.5, 2, NaN, 4),
+  b = c(1.5, NaN, NaN, 4)
+)
+lf$fill_nan(99)$collect()
+}
diff --git a/man/lazyframe__filter.Rd b/man/lazyframe__filter.Rd
new file mode 100644
index 00000000..22a94990
--- /dev/null
+++ b/man/lazyframe__filter.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__filter}
+\alias{lazyframe__filter}
+\title{Filter the rows in the LazyFrame based on a predicate expression}
+\usage{
+lazyframe__filter(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Expression that evaluates to
+a boolean Series.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+The original order of the remaining rows is preserved. Rows where the filter
+does not evaluate to \code{TRUE} are discarded, including nulls.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = c(1, 2, 3, NA, 4, NA, 0),
+  bar = c(6, 7, 8, NA, NA, 9, 0),
+  ham = c("a", "b", "c", NA, "d", "e", "f")
+)
+
+# Filter on one condition
+lf$filter(pl$col("foo") > 1)$collect()
+
+# Filter on multiple conditions
+lf$filter((pl$col("foo") < 3) & (pl$col("ham") == "a"))$collect()
+
+# Filter on an OR condition
+lf$filter((pl$col("foo") == 1) | (pl$col("ham") == " c"))$collect()
+
+# Filter by comparing two columns against each other
+lf$filter(pl$col("foo") == pl$col("bar"))$collect()
+lf$filter(pl$col("foo") != pl$col("bar"))$collect()
+
+# Notice how the row with null values is filtered out$ In order to keep the
+# rows with nulls, use:
+lf$filter(pl$col("foo")$ne_missing(pl$col("bar")))$collect()
+}
diff --git a/man/lazyframe__first.Rd b/man/lazyframe__first.Rd
new file mode 100644
index 00000000..20d03d55
--- /dev/null
+++ b/man/lazyframe__first.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__first}
+\alias{lazyframe__first}
+\title{Get the first row of the LazyFrame}
+\usage{
+lazyframe__first()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Get the first row of the LazyFrame
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$first()$collect()
+}
diff --git a/man/lazyframe__gather_every.Rd b/man/lazyframe__gather_every.Rd
new file mode 100644
index 00000000..7eaf654d
--- /dev/null
+++ b/man/lazyframe__gather_every.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__gather_every}
+\alias{lazyframe__gather_every}
+\title{Take every nth row in the LazyFrame}
+\usage{
+lazyframe__gather_every(n, offset = 0)
+}
+\arguments{
+\item{n}{Gather every \code{n}-th row.}
+
+\item{offset}{Starting index.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Take every nth row in the LazyFrame
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = 5:8)
+lf$gather_every(2)$collect()
+
+lf$gather_every(2, offset = 1)$collect()
+}
diff --git a/man/lazyframe__group_by.Rd b/man/lazyframe__group_by.Rd
new file mode 100644
index 00000000..51afd1ca
--- /dev/null
+++ b/man/lazyframe__group_by.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__group_by}
+\alias{lazyframe__group_by}
+\title{Start a group by operation}
+\usage{
+lazyframe__group_by(..., .maintain_order = FALSE)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Column(s) to group by.
+Accepts expression input. Strings are parsed as column names.}
+
+\item{.maintain_order}{Ensure that the order of the groups is consistent with
+the input data. This is slower than a default group by. Setting this to
+\code{TRUE} blocks the possibility to run on the streaming engine.}
+}
+\value{
+A lazy groupby
+}
+\description{
+Start a group by operation
+}
+\examples{
+# Group by one column and call agg() to compute the grouped sum of another
+# column.
+lf <- pl$LazyFrame(
+  a = c("a", "b", "a", "b", "c"),
+  b = c(1, 2, 1, 3, 3),
+  c = c(5, 4, 3, 2, 1)
+)
+lf$group_by("a")$agg(pl$col("b")$sum())$collect()
+
+# Set .maintain_order = TRUE to ensure the order of the groups is consistent
+# with the input.
+lf$group_by("a", .maintain_order = TRUE)$agg(pl$col("b")$sum())$collect()
+
+# Group by multiple columns by passing a vector of column names.
+lf$group_by(c("a", "b"))$agg(pl$col("c")$max())$collect()
+
+# Or use positional arguments to group by multiple columns in the same way.
+# Expressions are also accepted.
+lf$
+  group_by("a", pl$col("b") / 2)$
+  agg(pl$col("c")$mean())$collect()
+}
diff --git a/man/lazyframe__group_by_dynamic.Rd b/man/lazyframe__group_by_dynamic.Rd
new file mode 100644
index 00000000..624890ad
--- /dev/null
+++ b/man/lazyframe__group_by_dynamic.Rd
@@ -0,0 +1,178 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__group_by_dynamic}
+\alias{lazyframe__group_by_dynamic}
+\title{Group based on a date/time or integer column}
+\usage{
+lazyframe__group_by_dynamic(
+  index_column,
+  ...,
+  every,
+  period = NULL,
+  offset = NULL,
+  include_boundaries = FALSE,
+  closed = "left",
+  label = "left",
+  group_by = NULL,
+  start_by = "window"
+)
+}
+\arguments{
+\item{index_column}{Column used to group based on the time window. Often of
+type Date/Datetime. This column must be sorted in ascending order (or, if
+\code{group_by} is specified, then it must be sorted in ascending order within
+each group).
+In case of a dynamic group by on indices, the data type needs to be either
+Int32 or In64. Note that Int32 gets temporarily cast to Int64, so if
+performance matters, use an Int64 column.}
+
+\item{...}{Dots which should be empty.}
+
+\item{every}{Interval of the window.}
+
+\item{period}{Length of the window. If \code{NULL} (default), it will equal
+\code{every}.}
+
+\item{offset}{Offset of the window, does not take effect if
+\code{start_by = "datapoint"}. Defaults to zero.}
+
+\item{include_boundaries}{Add two columns \code{"_lower_boundary"} and
+\code{"_upper_boundary"} columns that show the boundaries of the window. This will
+impact performance because it’s harder to parallelize.}
+
+\item{closed}{Define which sides of the interval are closed (inclusive).
+Default is \code{"left"}.}
+
+\item{label}{Define which label to use for the window:
+\itemize{
+\item \code{"left"}: lower boundary of the window
+\item \code{"right"}: upper boundary of the window
+\item \code{"datapoint"}: the first value of the index column in the given window. If
+you don’t need the label to be at one of the boundaries, choose this option
+for maximum performance.
+}}
+
+\item{start_by}{The strategy to determine the start of the first window by:
+\itemize{
+\item \code{"window"}: start by taking the earliest timestamp, truncating it with
+\code{every}, and then adding \code{offset}. Note that weekly windows start on
+Monday.
+\item \code{"datapoint"}: start from the first encountered data point.
+\item a day of the week (only takes effect if \code{every} contains \code{"w"}): \code{"monday"}
+starts the window on the Monday before the first data point, etc.
+}}
+}
+\value{
+A \link[=LazyGroupBy_class]{LazyGroupBy} object
+}
+\description{
+Time windows are calculated and rows are assigned to windows. Different from
+a normal group by is that a row can be member of multiple groups. By
+default, the windows look like:
+\itemize{
+\item [start, start + period)
+\item [start + every, start + every + period)
+\item [start + 2\emph{every, start + 2}every + period)
+\item …
+}
+}
+\details{
+where \code{start} is determined by \code{start_by}, \code{offset}, \code{every}, and the
+earliest datapoint. See the \code{start_by} argument description for details.
+
+The \code{every}, \code{period}, and \code{offset} arguments are created with the following
+string language:
+\itemize{
+\item 1ns # 1 nanosecond
+\item 1us # 1 microsecond
+\item 1ms # 1 millisecond
+\item 1s  # 1 second
+\item 1m  # 1 minute
+\item 1h  # 1 hour
+\item 1d  # 1 day
+\item 1w  # 1 calendar week
+\item 1mo # 1 calendar month
+\item 1y  # 1 calendar year
+These strings can be combined:
+\itemize{
+\item 3d12h4m25s # 3 days, 12 hours, 4 minutes, and 25 seconds
+}
+}
+
+In case of a \code{group_by_dynamic} on an integer column, the windows are
+defined by:
+\itemize{
+\item 1i # length 1
+\item 10i # length 10
+}
+}
+\examples{
+lf <- pl$select(
+  time = pl$datetime_range(
+    start = strptime("2021-12-16 00:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"),
+    end = strptime("2021-12-16 03:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"),
+    interval = "30m"
+  ),
+  n = 0:6
+)$lazy()
+lf$collect()
+
+# Group by windows of 1 hour.
+lf$group_by_dynamic("time", every = "1h", closed = "right")$agg(
+  vals = pl$col("n")
+)$collect()
+
+# The window boundaries can also be added to the aggregation result
+lf$group_by_dynamic(
+  "time",
+  every = "1h", include_boundaries = TRUE, closed = "right"
+)$agg(
+  pl$col("n")$mean()
+)$collect()
+
+# When closed = "left", the window excludes the right end of interval:
+# [lower_bound, upper_bound)
+lf$group_by_dynamic("time", every = "1h", closed = "left")$agg(
+  pl$col("n")
+)$collect()
+
+# When closed = "both" the time values at the window boundaries belong to 2
+# groups.
+lf$group_by_dynamic("time", every = "1h", closed = "both")$agg(
+  pl$col("n")
+)$collect()
+
+# Dynamic group bys can also be combined with grouping on normal keys
+lf <- lf$with_columns(
+  groups = as_polars_series(c("a", "a", "a", "b", "b", "a", "a"))
+)
+lf$collect()
+
+lf$group_by_dynamic(
+  "time",
+  every = "1h",
+  closed = "both",
+  group_by = "groups",
+  include_boundaries = TRUE
+)$agg(pl$col("n"))$collect()
+
+# We can also create a dynamic group by based on an index column
+lf <- pl$LazyFrame(
+  idx = 0:5,
+  A = c("A", "A", "B", "B", "B", "C")
+)$with_columns(pl$col("idx")$set_sorted())
+lf$collect()
+
+lf$group_by_dynamic(
+  "idx",
+  every = "2i",
+  period = "3i",
+  include_boundaries = TRUE,
+  closed = "right"
+)$agg(A_agg_list = pl$col("A"))$collect()
+}
+\seealso{
+\itemize{
+\item \code{\link[=lazyframe__rolling]{<LazyFrame>$rolling()}}
+}
+}
diff --git a/man/lazyframe__head.Rd b/man/lazyframe__head.Rd
new file mode 100644
index 00000000..3b202274
--- /dev/null
+++ b/man/lazyframe__head.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__head}
+\alias{lazyframe__head}
+\title{Get the first \code{n} rows}
+\usage{
+lazyframe__head(n = 5)
+}
+\arguments{
+\item{n}{Number of rows to return.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Get the first \code{n} rows
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+lf$head()$collect()
+lf$head(2)$collect()
+}
diff --git a/man/lazyframe__interpolate.Rd b/man/lazyframe__interpolate.Rd
new file mode 100644
index 00000000..b6cafaf6
--- /dev/null
+++ b/man/lazyframe__interpolate.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__interpolate}
+\alias{lazyframe__interpolate}
+\title{Interpolate intermediate values}
+\usage{
+lazyframe__interpolate()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+The interpolation method is linear.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = c(1, NA, 9, 10),
+  bar = c(6, 7, 9, NA),
+  ham = c(1, NA, NA, 9)
+)
+
+lf$interpolate()$collect()
+}
diff --git a/man/lazyframe__join.Rd b/man/lazyframe__join.Rd
new file mode 100644
index 00000000..653d5d65
--- /dev/null
+++ b/man/lazyframe__join.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__join}
+\alias{lazyframe__join}
+\title{Join LazyFrames}
+\usage{
+lazyframe__join(
+  other,
+  on = NULL,
+  how = "inner",
+  ...,
+  left_on = NULL,
+  right_on = NULL,
+  suffix = "_right",
+  validate = "m:m",
+  join_nulls = FALSE,
+  allow_parallel = TRUE,
+  force_parallel = FALSE,
+  coalesce = NULL
+)
+}
+\arguments{
+\item{other}{LazyFrame to join with.}
+
+\item{on}{Either a vector of column names or a list of expressions and/or
+strings. Use \code{left_on} and \code{right_on} if the column names to match on are
+different between the two DataFrames.}
+
+\item{how}{One of the following methods:
+\itemize{
+\item "inner": returns rows that have matching values in both tables
+\item "left": returns all rows from the left table, and the matched rows from
+the right table
+\item "right": returns all rows from the right table, and the matched rows from
+the left table
+\item "full": returns all rows when there is a match in either left or right
+table
+\item "cross": returns the Cartesian product of rows from both tables
+\item "semi": returns rows from the left table that have a match in the right
+table.
+\item "anti": returns rows from the left table that have no match in the right
+table.
+}}
+
+\item{...}{Dots which should be empty.}
+
+\item{left_on, right_on}{Same as \code{on} but only for the left or the right
+DataFrame. They must have the same length.}
+
+\item{suffix}{Suffix to add to duplicated column names.}
+
+\item{validate}{Checks if join is of specified type:
+\itemize{
+\item \code{"m:m"} (default): many-to-many, doesn't perform any checks;
+\item \code{"1:1"}: one-to-one, check if join keys are unique in both left and right
+datasets;
+\item \code{"1:m"}: one-to-many, check if join keys are unique in left dataset
+\item \code{"m:1"}: many-to-one, check if join keys are unique in right dataset
+}
+
+Note that this is currently not supported by the streaming engine.}
+
+\item{join_nulls}{Join on null values. By default null values will never
+produce matches.}
+
+\item{allow_parallel}{Allow the physical plan to optionally evaluate the
+computation of both DataFrames up to the join in parallel.}
+
+\item{force_parallel}{Force the physical plan to evaluate the computation of
+both DataFrames up to the join in parallel.}
+
+\item{coalesce}{Coalescing behavior (merging of join columns).
+\itemize{
+\item \code{NULL}: join specific.
+\item \code{TRUE}: Always coalesce join columns.
+\item \code{FALSE}: Never coalesce join columns.
+Note that joining on any other expressions than \code{col} will turn off
+coalescing.
+}}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This function can do both mutating joins (adding columns based on matching
+observations, for example with \code{how = "left"}) and filtering joins (keeping
+observations based on matching observations, for example with \code{how = "inner"}).
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = c(6, 7, 8),
+  ham = c("a", "b", "c")
+)
+other_lf <- pl$LazyFrame(
+  apple = c("x", "y", "z"),
+  ham = c("a", "b", "d")
+)
+lf$join(other_lf, on = "ham")$collect()
+
+lf$join(other_lf, on = "ham", how = "full")$collect()
+
+lf$join(other_lf, on = "ham", how = "left", coalesce = TRUE)$collect()
+
+lf$join(other_lf, on = "ham", how = "semi")$collect()
+
+lf$join(other_lf, on = "ham", how = "anti")$collect()
+}
diff --git a/man/lazyframe__join_asof.Rd b/man/lazyframe__join_asof.Rd
new file mode 100644
index 00000000..fe3cacd3
--- /dev/null
+++ b/man/lazyframe__join_asof.Rd
@@ -0,0 +1,166 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__join_asof}
+\alias{lazyframe__join_asof}
+\title{Perform joins on nearest keys}
+\usage{
+lazyframe__join_asof(
+  other,
+  ...,
+  left_on = NULL,
+  right_on = NULL,
+  on = NULL,
+  by_left = NULL,
+  by_right = NULL,
+  by = NULL,
+  strategy = c("backward", "forward", "nearest"),
+  suffix = "_right",
+  tolerance = NULL,
+  allow_parallel = TRUE,
+  force_parallel = FALSE,
+  coalesce = TRUE
+)
+}
+\arguments{
+\item{other}{LazyFrame to join with.}
+
+\item{...}{Dots which should be empty.}
+
+\item{by_left, by_right}{Same as \code{by} but only for the left or the right
+table. They must have the same length.}
+
+\item{by}{Join on these columns before performing asof join. Either a vector
+of column names or a list of expressions and/or strings. Use \code{left_by} and
+\code{right_by} if the column names to match on are different between the two
+tables.}
+
+\item{strategy}{Strategy for where to find match:
+\itemize{
+\item \code{"backward"} (default): search for the last row in the right table whose
+\code{on} key is less than or equal to the left key.
+\item \code{"forward"}: search for the first row in the right table whose \code{on} key is
+greater than or equal to the left key.
+\item \code{"nearest"}: search for the last row in the right table whose value is
+nearest to the left key. String keys are not currently supported for a
+nearest search.
+}}
+
+\item{tolerance}{Numeric tolerance. By setting this the join will only be
+done if the near keys are within this distance. If an asof join is done on
+columns of dtype "Date", "Datetime", "Duration" or "Time", use the Polars
+duration string language (see details).}
+
+\item{coalesce}{Coalescing behavior (merging of \code{on} / \code{left_on} /
+\code{right_on} columns):
+\itemize{
+\item \code{TRUE}: Always coalesce join columns;
+\item \code{FALSE}: Never coalesce join columns.
+Note that joining on any other expressions than \code{col} will turn off
+coalescing.
+}}
+}
+\description{
+This is similar to a left-join except that we match on nearest key rather
+than equal keys. Both frames must be sorted by the \code{asof_join} key.
+}
+\section{Polars duration string language}{
+
+Polars duration string language is a simple representation of
+durations. It is used in many Polars functions that accept durations.
+
+It has the following format:
+\itemize{
+\item 1ns (1 nanosecond)
+\item 1us (1 microsecond)
+\item 1ms (1 millisecond)
+\item 1s (1 second)
+\item 1m (1 minute)
+\item 1h (1 hour)
+\item 1d (1 calendar day)
+\item 1w (1 calendar week)
+\item 1mo (1 calendar month)
+\item 1q (1 calendar quarter)
+\item 1y (1 calendar year)
+}
+
+Or combine them: \code{"3d12h4m25s"} # 3 days, 12 hours, 4 minutes, and 25 seconds
+
+By "calendar day", we mean the corresponding time on the next day
+(which may not be 24 hours, due to daylight savings).
+Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".
+}
+
+\examples{
+gdp <- pl$LazyFrame(
+  date = as.Date(c("2016-1-1", "2017-5-1", "2018-1-1", "2019-1-1", "2020-1-1")),
+  gdp = c(4164, 4411, 4566, 4696, 4827)
+)
+
+pop <- pl$LazyFrame(
+  date = as.Date(c("2016-3-1", "2018-8-1", "2019-1-1")),
+  population = c(82.19, 82.66, 83.12)
+)
+
+# optional make sure tables are already sorted with "on" join-key
+gdp <- gdp$sort("date")
+pop <- pop$sort("date")
+
+
+# Note how the dates don’t quite match. If we join them using join_asof and
+# strategy = 'backward', then each date from population which doesn’t have
+# an exact match is matched with the closest earlier date from gdp:
+pop$join_asof(gdp, on = "date", strategy = "backward")$collect()
+
+# Note how:
+# - date 2016-03-01 from population is matched with 2016-01-01 from gdp;
+# - date 2018-08-01 from population is matched with 2018-01-01 from gdp.
+# You can verify this by passing coalesce = FALSE:
+pop$join_asof(
+  gdp,
+  on = "date", strategy = "backward", coalesce = FALSE
+)$collect()
+
+# If we instead use strategy = 'forward', then each date from population
+# which doesn’t have an exact match is matched with the closest later date
+# from gdp:
+pop$join_asof(gdp, on = "date", strategy = "forward")$collect()
+
+# Note how:
+# - date 2016-03-01 from population is matched with 2017-01-01 from gdp;
+# - date 2018-08-01 from population is matched with 2019-01-01 from gdp.
+
+# Finally, strategy = 'nearest' gives us a mix of the two results above, as
+# each date from population which doesn’t have an exact match is matched
+# with the closest date from gdp, regardless of whether it’s earlier or
+# later:
+pop$join_asof(gdp, on = "date", strategy = "nearest")$collect()
+
+# Note how:
+# - date 2016-03-01 from population is matched with 2016-01-01 from gdp;
+# - date 2018-08-01 from population is matched with 2019-01-01 from gdp.
+
+# The `by` argument allows joining on another column first, before the asof
+# join. In this example we join by country first, then asof join by date, as
+# above.
+gdp2 <- pl$LazyFrame(
+  country = rep(c("Germany", "Netherlands"), each = 5),
+  date = rep(
+    as.Date(c("2016-1-1", "2017-1-1", "2018-1-1", "2019-1-1", "2020-1-1")),
+    2
+  ),
+  gdp = c(4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909)
+)$sort("country", "date")
+gdp2$collect()
+
+pop2 <- pl$LazyFrame(
+  country = rep(c("Germany", "Netherlands"), each = 3),
+  date = rep(as.Date(c("2016-3-1", "2018-8-1", "2019-1-1")), 2),
+  population = c(82.19, 82.66, 83.12, 17.11, 17.32, 17.40)
+)$sort("country", "date")
+pop2$collect()
+
+pop2$join_asof(
+  gdp2,
+  by = "country", on = "date", strategy = "nearest"
+)$collect()
+}
diff --git a/man/lazyframe__join_where.Rd b/man/lazyframe__join_where.Rd
new file mode 100644
index 00000000..28a6a450
--- /dev/null
+++ b/man/lazyframe__join_where.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__join_where}
+\alias{lazyframe__join_where}
+\title{Perform a join based on one or multiple (in)equality predicates}
+\usage{
+lazyframe__join_where(other, ..., suffix = "_right")
+}
+\arguments{
+\item{other}{LazyFrame to join with.}
+
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> (In)Equality condition to
+join the two tables on. When a column name occurs in both tables, the proper
+suffix must be applied in the predicate. For example, if both tables have a
+column \code{"x"} that you want to use in the conditions, you must refer to the
+column of the right table as \code{"x<suffix>"}.}
+
+\item{suffix}{Suffix to append to columns with a duplicate name.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
+This performs an inner join, so only rows where all predicates are true are
+included in the result, and a row from either LazyFrame may be included
+multiple times in the result.
+
+Note that the row order of the input LazyFrames is not preserved.
+}
+\examples{
+east <- pl$LazyFrame(
+  id = c(100, 101, 102),
+  dur = c(120, 140, 160),
+  rev = c(12, 14, 16),
+  cores = c(2, 8, 4)
+)
+
+west <- pl$LazyFrame(
+  t_id = c(404, 498, 676, 742),
+  time = c(90, 130, 150, 170),
+  cost = c(9, 13, 15, 16),
+  cores = c(4, 2, 1, 4)
+)
+
+east$join_where(
+  west,
+  pl$col("dur") < pl$col("time"),
+  pl$col("rev") < pl$col("cost")
+)$collect()
+}
diff --git a/man/lazyframe__last.Rd b/man/lazyframe__last.Rd
new file mode 100644
index 00000000..a1ab582c
--- /dev/null
+++ b/man/lazyframe__last.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__last}
+\alias{lazyframe__last}
+\title{Get the last row of the LazyFrame}
+\usage{
+lazyframe__last()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Get the last row of the LazyFrame
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$last()$collect()
+}
diff --git a/man/lazyframe__limit.Rd b/man/lazyframe__limit.Rd
new file mode 100644
index 00000000..8f323354
--- /dev/null
+++ b/man/lazyframe__limit.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__limit}
+\alias{lazyframe__limit}
+\title{Get the first \code{n} rows}
+\usage{
+lazyframe__limit(n = 5)
+}
+\arguments{
+\item{n}{Number of rows to return.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Alias for \code{\link[=lazyframe__head]{<LazyFrame>$head()}}.
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+lf$limit()$collect()
+lf$limit(2)$collect()
+}
diff --git a/man/lazyframe__max.Rd b/man/lazyframe__max.Rd
new file mode 100644
index 00000000..f798a561
--- /dev/null
+++ b/man/lazyframe__max.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__max}
+\alias{lazyframe__max}
+\title{Aggregate the columns in the LazyFrame to their maximum value}
+\usage{
+lazyframe__max()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the LazyFrame to their maximum value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$max()$collect()
+}
diff --git a/man/lazyframe__mean.Rd b/man/lazyframe__mean.Rd
new file mode 100644
index 00000000..f19405d0
--- /dev/null
+++ b/man/lazyframe__mean.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__mean}
+\alias{lazyframe__mean}
+\title{Aggregate the columns in the LazyFrame to their mean value}
+\usage{
+lazyframe__mean()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the LazyFrame to their mean value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$mean()$collect()
+}
diff --git a/man/lazyframe__median.Rd b/man/lazyframe__median.Rd
new file mode 100644
index 00000000..7bcf7a69
--- /dev/null
+++ b/man/lazyframe__median.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__median}
+\alias{lazyframe__median}
+\title{Aggregate the columns in the LazyFrame to their median value}
+\usage{
+lazyframe__median()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the LazyFrame to their median value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$median()$collect()
+}
diff --git a/man/lazyframe__merge_sorted.Rd b/man/lazyframe__merge_sorted.Rd
new file mode 100644
index 00000000..1b7eea03
--- /dev/null
+++ b/man/lazyframe__merge_sorted.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__merge_sorted}
+\alias{lazyframe__merge_sorted}
+\title{Take two sorted DataFrames and merge them by the sorted key}
+\usage{
+lazyframe__merge_sorted(other, key)
+}
+\arguments{
+\item{other}{Other DataFrame that must be merged.}
+
+\item{key}{Key that is sorted.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+The output of this operation will also be sorted. It is the callers
+responsibility that the frames are sorted by that key, otherwise the output
+will not make sense. The schemas of both LazyFrames must be equal.
+}
+\examples{
+lf1 <- pl$LazyFrame(
+  name = c("steve", "elise", "bob"),
+  age = c(42, 44, 18)
+)$sort("age")
+
+lf2 <- pl$LazyFrame(
+  name = c("anna", "megan", "steve", "thomas"),
+  age = c(21, 33, 42, 20)
+)$sort("age")
+
+lf1$merge_sorted(lf2, key = "age")$collect()
+}
diff --git a/man/lazyframe__min.Rd b/man/lazyframe__min.Rd
new file mode 100644
index 00000000..a2946a86
--- /dev/null
+++ b/man/lazyframe__min.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__min}
+\alias{lazyframe__min}
+\title{Aggregate the columns in the LazyFrame to their minimum value}
+\usage{
+lazyframe__min()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the LazyFrame to their minimum value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$min()$collect()
+}
diff --git a/man/lazyframe__null_count.Rd b/man/lazyframe__null_count.Rd
new file mode 100644
index 00000000..ec9955fe
--- /dev/null
+++ b/man/lazyframe__null_count.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__null_count}
+\alias{lazyframe__null_count}
+\title{Return the number of null elements for each column}
+\usage{
+lazyframe__null_count()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Return the number of null elements for each column
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, NA), c = rep(NA, 4))
+lf$null_count()$collect()
+}
diff --git a/man/lazyframe__profile.Rd b/man/lazyframe__profile.Rd
new file mode 100644
index 00000000..6182fb6d
--- /dev/null
+++ b/man/lazyframe__profile.Rd
@@ -0,0 +1,161 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__profile}
+\alias{lazyframe__profile}
+\title{Collect and profile a lazy query.}
+\usage{
+lazyframe__profile(
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  comm_subplan_elim = TRUE,
+  comm_subexpr_elim = TRUE,
+  cluster_with_columns = TRUE,
+  streaming = FALSE,
+  no_optimization = FALSE,
+  collect_in_background = FALSE,
+  show_plot = FALSE,
+  truncate_nodes = 0
+)
+
+lazyframe__profile(
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  comm_subplan_elim = TRUE,
+  comm_subexpr_elim = TRUE,
+  cluster_with_columns = TRUE,
+  streaming = FALSE,
+  no_optimization = FALSE,
+  collect_in_background = FALSE,
+  show_plot = FALSE,
+  truncate_nodes = 0
+)
+}
+\arguments{
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{comm_subplan_elim}{A logical, indicats tring to cache branching subplans that occur on self-joins or unions.}
+
+\item{comm_subexpr_elim}{A logical, indicats tring to cache common subexpressions.}
+
+\item{cluster_with_columns}{A logical, indicats to combine sequential independent calls to with_columns.}
+
+\item{streaming}{A logical. If \code{TRUE}, process the query in batches to handle larger-than-memory data.
+If \code{FALSE} (default), the entire query is processed in a single batch.
+Note that streaming mode is considered unstable.
+It may be changed at any point without it being considered a breaking change.}
+
+\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
+
+\item{show_plot}{Show a Gantt chart of the profiling result}
+
+\item{truncate_nodes}{Truncate the label lengths in the Gantt chart to this
+number of characters. If \code{0} (default), do not truncate.}
+}
+\value{
+List of two \code{DataFrame}s: one with the collected result, the other
+with the timings of each step. If \code{show_graph = TRUE}, then the plot is
+also stored in the list.
+
+List of two \code{DataFrame}s: one with the collected result, the other
+with the timings of each step. If \code{show_graph = TRUE}, then the plot is
+also stored in the list.
+}
+\description{
+This will run the query and return a list containing the materialized
+DataFrame and a DataFrame that contains profiling information of each node
+that is executed.
+
+This will run the query and return a list containing the
+materialized DataFrame and a DataFrame that contains profiling information
+of each node that is executed.
+}
+\details{
+The units of the timings are microseconds.
+
+The units of the timings are microseconds.
+}
+\examples{
+## Simplest use case
+pl$LazyFrame()$select(pl$lit(2) + 2)$profile()
+
+## Use $profile() to compare two queries
+
+# -1-  map each Species-group with native polars
+as_polars_lf(iris)$
+  sort("Sepal.Length")$
+  group_by("Species", maintain_order = TRUE)$
+  agg(pl$col(pl$Float64)$first() + 5)$
+  profile()
+
+# -2-  map each Species-group of each numeric column with an R function
+
+# some R function, prints `.` for each time called by polars
+r_func <- \(s) {
+  cat(".")
+  s$to_r()[1] + 5
+}
+
+as_polars_lf(iris)$
+  sort("Sepal.Length")$
+  group_by("Species", maintain_order = TRUE)$
+  agg(pl$col(pl$Float64)$map_elements(r_func))$
+  profile()
+## Simplest use case
+pl$LazyFrame()$select(pl$lit(2) + 2)$profile()
+
+## Use $profile() to compare two queries
+
+# -1-  map each Species-group with native polars, takes ~120us only
+as_polars_lf(iris)$
+  sort("Sepal.Length")$
+  group_by("Species", maintain_order = TRUE)$
+  agg(pl$col(pl$Float64)$first() + 5)$
+  profile()
+
+# -2-  map each Species-group of each numeric column with an R function, takes ~7000us (slow!)
+
+# some R function, prints `.` for each time called by polars
+r_func <- \(s) {
+  cat(".")
+  s$to_r()[1] + 5
+}
+
+as_polars_lf(iris)$
+  sort("Sepal.Length")$
+  group_by("Species", maintain_order = TRUE)$
+  agg(pl$col(pl$Float64)$map_elements(r_func))$
+  profile()
+}
+\seealso{
+\itemize{
+\item \code{\link[=LazyFrame_collect]{$collect()}} - regular collect.
+\item \code{\link[=LazyFrame_collect_in_background]{$collect_in_background()}} - non-blocking
+collect returns a future handle. Can also just be used via
+\verb{$collect(collect_in_background = TRUE)}.
+\item \code{\link[=LazyFrame_sink_parquet]{$sink_parquet()}} streams query to a parquet file.
+\item \code{\link[=LazyFrame_sink_ipc]{$sink_ipc()}} streams query to a arrow file.
+}
+
+\itemize{
+\item \code{\link[=lazyframe__collect]{$collect()}} - regular collect.
+\item \code{\link[=lazyframe__collect_in_background]{$collect_in_background()}} - non-blocking
+collect returns a future handle. Can also just be used via
+\verb{$collect(collect_in_background = TRUE)}.
+\item \code{\link[=lazyframe__sink_parquet]{$sink_parquet()}} streams query to a parquet file.
+\item \code{\link[=lazyframe__sink_ipc]{$sink_ipc()}} streams query to a arrow file.
+}
+}
diff --git a/man/lazyframe__quantile.Rd b/man/lazyframe__quantile.Rd
new file mode 100644
index 00000000..7621d742
--- /dev/null
+++ b/man/lazyframe__quantile.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__quantile}
+\alias{lazyframe__quantile}
+\title{Aggregate the columns in the DataFrame to a unique quantile value}
+\usage{
+lazyframe__quantile(
+  quantile,
+  interpolation = c("nearest", "higher", "lower", "midpoint", "linear")
+)
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the DataFrame to a unique quantile value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$quantile(0.7)$collect()
+}
diff --git a/man/lazyframe__rename.Rd b/man/lazyframe__rename.Rd
new file mode 100644
index 00000000..454f7e99
--- /dev/null
+++ b/man/lazyframe__rename.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__rename}
+\alias{lazyframe__rename}
+\title{Rename column names}
+\usage{
+lazyframe__rename(..., .strict = TRUE)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Either a function that takes
+a character vector as input and returns a character vector as output, or
+named values where names are old column names and values are the new ones.}
+
+\item{.strict}{Validate that all column names exist in the current schema,
+and throw an error if any do not. (Note that this parameter is a no-op when
+passing a function to \code{...}).}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Rename column names
+}
+\details{
+If existing names are swapped (e.g. 'A' points to 'B' and 'B' points to
+'A'), polars will block projection and predicate pushdowns at this node.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = 6:8,
+  ham = letters[1:3]
+)
+
+lf$rename(foo = "apple")$collect()
+
+lf$rename(
+  \(column_name) paste0("c", substr(column_name, 2, 100))
+)$collect()
+}
diff --git a/man/lazyframe__reverse.Rd b/man/lazyframe__reverse.Rd
new file mode 100644
index 00000000..d9675b14
--- /dev/null
+++ b/man/lazyframe__reverse.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__reverse}
+\alias{lazyframe__reverse}
+\title{Reverse the LazyFrame}
+\usage{
+lazyframe__reverse()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Reverse the LazyFrame
+}
+\examples{
+lf <- pl$LazyFrame(key = c("a", "b", "c"), val = 1:3)
+lf$reverse()$collect()
+}
diff --git a/man/lazyframe__rolling.Rd b/man/lazyframe__rolling.Rd
new file mode 100644
index 00000000..62768257
--- /dev/null
+++ b/man/lazyframe__rolling.Rd
@@ -0,0 +1,88 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__rolling}
+\alias{lazyframe__rolling}
+\title{Create rolling groups based on a date/time or integer column}
+\usage{
+lazyframe__rolling(
+  index_column,
+  ...,
+  period,
+  offset = NULL,
+  closed = "right",
+  group_by = NULL
+)
+}
+\arguments{
+\item{index_column}{Column used to group based on the time window. Often of
+type Date/Datetime. This column must be sorted in ascending order (or, if
+\code{group_by} is specified, then it must be sorted in ascending order within
+each group).
+In case of a dynamic group by on indices, the data type needs to be either
+Int32 or In64. Note that Int32 gets temporarily cast to Int64, so if
+performance matters, use an Int64 column.}
+
+\item{...}{Dots which should be empty.}
+
+\item{period}{Length of the window - must be non-negative.}
+
+\item{offset}{Offset of the window. Default is \code{-period}.}
+
+\item{closed}{Define which sides of the interval are closed (inclusive).
+Default is \code{"left"}.}
+}
+\value{
+A \link[=LazyGroupBy_class]{LazyGroupBy} object
+}
+\description{
+Different from \code{group_by_dynamic}, the windows are now determined by the
+individual values and are not of constant intervals. For constant intervals
+use \code{\link[=lazyframe__group_by_dynamic]{<LazyFrame>$group_by_dynamic()}}.
+
+If you have a time series \verb{<t_0, t_1, ..., t_n>}, then by default the
+windows created will be:
+\itemize{
+\item \verb{(t_0 - period, t_0]}
+\item \verb{(t_1 - period, t_1]}
+\item …
+\item \verb{(t_n - period, t_n]}
+}
+
+whereas if you pass a non-default \code{offset}, then the windows will be:
+\itemize{
+\item \verb{(t_0 + offset, t_0 + offset + period]}
+\item \verb{(t_1 + offset, t_1 + offset + period]}
+\item …
+\item \verb{(t_n + offset, t_n + offset + period]}
+}
+}
+\details{
+If you want to compute multiple aggregation statistics over the same dynamic
+window, consider using \code{\link[=expr__rolling]{$rolling()}} - this method can cache
+the window size computation.
+}
+\examples{
+dates <- c(
+  "2020-01-01 13:45:48",
+  "2020-01-01 16:42:13",
+  "2020-01-01 16:45:09",
+  "2020-01-02 18:12:48",
+  "2020-01-03 19:45:32",
+  "2020-01-08 23:16:43"
+)
+
+df <- pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns(
+  pl$col("dt")$str$strptime(pl$Datetime())
+)
+
+df$rolling(index_column = "dt", period = "2d")$agg(
+  sum_a = pl$col("a")$sum(),
+  min_a = pl$col("a")$min(),
+  max_a = pl$col("a")$max()
+)$collect()
+}
+\seealso{
+\itemize{
+\item \code{\link[=lazyframe__group_by_dynamic]{<LazyFrame>$group_by_dynamic()}}
+}
+}
diff --git a/man/lazyframe__select_seq.Rd b/man/lazyframe__select_seq.Rd
new file mode 100644
index 00000000..eec4c8fc
--- /dev/null
+++ b/man/lazyframe__select_seq.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__select_seq}
+\alias{lazyframe__select_seq}
+\title{Select columns from this LazyFrame}
+\usage{
+lazyframe__select_seq(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}>
+Name-value pairs of objects to be converted to polars \link[=Expr]{expressions}
+by the \code{\link[=as_polars_expr]{as_polars_expr()}} function.
+Characters are parsed as column names, other non-expression inputs are parsed as \link[=pl__lit]{literals}.
+Each name will be used as the expression name.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This will run all expression sequentially instead of in parallel. Use this
+when the work per expression is cheap.
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = 1:3,
+  bar = 6:8,
+  ham = letters[1:3]
+)
+lf$select_seq("foo")$collect()
+}
diff --git a/man/lazyframe__serialize.Rd b/man/lazyframe__serialize.Rd
new file mode 100644
index 00000000..f532da6c
--- /dev/null
+++ b/man/lazyframe__serialize.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__serialize}
+\alias{lazyframe__serialize}
+\title{Serialize the logical plan of this LazyFrame to a string in JSON format}
+\usage{
+lazyframe__serialize()
+}
+\value{
+A character value
+}
+\description{
+Serialize the logical plan of this LazyFrame to a string in JSON format
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:3)$sum()
+lf$serialize()
+}
diff --git a/man/lazyframe__set_sorted.Rd b/man/lazyframe__set_sorted.Rd
new file mode 100644
index 00000000..bf53468a
--- /dev/null
+++ b/man/lazyframe__set_sorted.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__set_sorted}
+\alias{lazyframe__set_sorted}
+\title{Indicate that one or multiple columns are sorted}
+\usage{
+lazyframe__set_sorted(column, ..., descending = FALSE)
+}
+\arguments{
+\item{column}{Columns that are sorted.}
+
+\item{...}{Dots which should be empty.}
+
+\item{descending}{Whether the columns are sorted in descending order.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This can speed up future operations, but it can lead to incorrect results if
+the data is \strong{not} sorted! Use with care!
+}
diff --git a/man/lazyframe__shift.Rd b/man/lazyframe__shift.Rd
new file mode 100644
index 00000000..e4fafbb2
--- /dev/null
+++ b/man/lazyframe__shift.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__shift}
+\alias{lazyframe__shift}
+\title{Shift values by the given number of indices}
+\usage{
+lazyframe__shift(n = 1, ..., fill_value = NULL)
+}
+\arguments{
+\item{n}{Number of indices to shift forward. If a negative value is passed,
+values are shifted in the opposite direction instead.}
+
+\item{...}{Dots which should be empty.}
+
+\item{fill_value}{Fill the resulting null values with this value. Accepts
+expression input. Non-expression inputs are parsed as literals.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Shift values by the given number of indices
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = 5:8)
+
+# By default, values are shifted forward by one index.
+lf$shift()$collect()
+
+# Pass a negative value to shift in the opposite direction instead.
+lf$shift(-2)$collect()
+
+# Specify fill_value to fill the resulting null values.
+lf$shift(-2, fill_value = 100)$collect()
+}
diff --git a/man/lazyframe__sink_csv.Rd b/man/lazyframe__sink_csv.Rd
new file mode 100644
index 00000000..98202d20
--- /dev/null
+++ b/man/lazyframe__sink_csv.Rd
@@ -0,0 +1,137 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sink_csv}
+\alias{lazyframe__sink_csv}
+\title{Evaluate the query in streaming mode and write to a CSV file}
+\usage{
+lazyframe__sink_csv(
+  path,
+  ...,
+  include_bom = FALSE,
+  include_header = TRUE,
+  separator = ",",
+  line_terminator = "\\n",
+  quote_char = "\\"",
+  batch_size = 1024,
+  datetime_format = NULL,
+  date_format = NULL,
+  time_format = NULL,
+  float_precision = NULL,
+  null_value = "",
+  quote_style = "necessary",
+  maintain_order = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  no_optimization = FALSE,
+  storage_options = NULL,
+  retries = 2
+)
+}
+\arguments{
+\item{path}{A character. File path to which the file should be written.}
+
+\item{...}{Dots which should be empty.}
+
+\item{include_bom}{Logical, whether to include UTF-8 BOM in the CSV output.}
+
+\item{include_header}{Logical, hether to include header in the CSV output.}
+
+\item{separator}{Separate CSV fields with this symbol.}
+
+\item{line_terminator}{String used to end each row.}
+
+\item{quote_char}{Byte to use as quoting character.}
+
+\item{batch_size}{Number of rows that will be processed per thread.}
+
+\item{datetime_format}{A format string, with the specifiers defined by the
+\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
+Rust crate. If no format specified, the default fractional-second precision
+is inferred from the maximum timeunit found in the frame’s Datetime cols (if
+any).}
+
+\item{date_format}{A format string, with the specifiers defined by the
+\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
+Rust crate.}
+
+\item{time_format}{A format string, with the specifiers defined by the
+\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
+Rust crate.}
+
+\item{float_precision}{Whether to use scientific form always (\code{TRUE}), never
+(\code{FALSE}), or automatically (\code{NULL}) for Float32 and Float64 datatypes.}
+
+\item{null_value}{A string representing null values (defaulting to the empty
+string).}
+
+\item{quote_style}{Determines the quoting strategy used. Must be one of:
+\itemize{
+\item \code{"necessary"} (default): This puts quotes around fields only when
+necessary. They are necessary when fields contain a quote, delimiter or
+record terminator. Quotes are also necessary when writing an empty record
+(which is indistinguishable from a record with one empty field). This is
+the default.
+\item \code{"always"}: This puts quotes around every field. Always.
+\item \code{"never"}: This never puts quotes around fields, even if that results in
+invalid CSV data (e.g.: by not quoting strings containing the separator).
+\item \code{"non_numeric"}: This puts quotes around all fields that are non-numeric.
+Namely, when writing a field that does not parse as a valid float or
+integer, then quotes will be used even if they aren`t strictly necessary.
+}}
+
+\item{maintain_order}{Maintain the order in which data is processed. Setting
+this to \code{FALSE} will be slightly faster.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
+
+\item{storage_options}{Named vector containing options that indicate how to
+connect to a cloud provider. The cloud providers currently supported are
+AWS, GCP, and Azure.
+See supported keys here:
+\itemize{
+\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
+\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
+\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
+\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
+\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
+variable.
+}
+
+If \code{storage_options} is not provided, Polars will try to infer the
+information from environment variables.}
+
+\item{retries}{Number of retries if accessing a cloud instance fails.}
+}
+\value{
+Invisibly returns the input LazyFrame
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
+This allows streaming results that are larger than RAM to be written to disk.
+}
+\examples{
+# sink table 'mtcars' from mem to CSV
+tmpf <- tempfile()
+pl$LazyFrame(mtcars)$sink_csv(tmpf)
+
+# stream a query end-to-end
+tmpf2 <- tempfile()
+pl$scan_csv(tmpf)$select(pl$col("cyl") * 2)$sink_csv(tmpf2)
+
+# load parquet directly into a DataFrame / memory
+pl$scan_csv(tmpf2)$collect()
+}
diff --git a/man/lazyframe__sink_ipc.Rd b/man/lazyframe__sink_ipc.Rd
new file mode 100644
index 00000000..c9c509a9
--- /dev/null
+++ b/man/lazyframe__sink_ipc.Rd
@@ -0,0 +1,86 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sink_ipc}
+\alias{lazyframe__sink_ipc}
+\title{Evaluate the query in streaming mode and write to an IPC file}
+\usage{
+lazyframe__sink_ipc(
+  path,
+  ...,
+  compression = c("zstd", "lz4", "uncompressed"),
+  maintain_order = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  no_optimization = FALSE,
+  storage_options = NULL,
+  retries = 2
+)
+}
+\arguments{
+\item{path}{A character. File path to which the file should be written.}
+
+\item{...}{Dots which should be empty.}
+
+\item{compression}{\code{NULL} or one of:
+\itemize{
+\item \code{"uncompressed"}: same as \code{NULL}.
+\item \code{"lz4"}: fast compression/decompression.
+\item \code{"zstd"}: good compression performance.
+}}
+
+\item{maintain_order}{Maintain the order in which data is processed. Setting
+this to \code{FALSE} will be slightly faster.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
+
+\item{storage_options}{Named vector containing options that indicate how to
+connect to a cloud provider. The cloud providers currently supported are
+AWS, GCP, and Azure.
+See supported keys here:
+\itemize{
+\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
+\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
+\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
+\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
+\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
+variable.
+}
+
+If \code{storage_options} is not provided, Polars will try to infer the
+information from environment variables.}
+
+\item{retries}{Number of retries if accessing a cloud instance fails.}
+}
+\value{
+Invisibly returns the input LazyFrame
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
+This allows streaming results that are larger than RAM to be written to disk.
+}
+\examples{
+# sink table 'mtcars' from mem to ipc
+tmpf <- tempfile()
+as_polars_lf(mtcars)$sink_ipc(tmpf)
+
+# stream a query end-to-end (not supported yet, https://github.com/pola-rs/polars/issues/1040)
+# tmpf2 = tempfile()
+# pl$scan_ipc(tmpf)$select(pl$col("cyl") * 2)$sink_ipc(tmpf2)
+
+# load ipc directly into a DataFrame / memory
+# pl$scan_ipc(tmpf2)$collect()
+}
diff --git a/man/lazyframe__sink_ndjson.Rd b/man/lazyframe__sink_ndjson.Rd
new file mode 100644
index 00000000..218c8e2e
--- /dev/null
+++ b/man/lazyframe__sink_ndjson.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sink_ndjson}
+\alias{lazyframe__sink_ndjson}
+\title{Evaluate the query in streaming mode and write to an NDJSON file}
+\usage{
+lazyframe__sink_ndjson(
+  path,
+  ...,
+  maintain_order = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  no_optimization = FALSE,
+  storage_options = NULL,
+  retries = 2
+)
+}
+\arguments{
+\item{path}{A character. File path to which the file should be written.}
+
+\item{...}{Dots which should be empty.}
+
+\item{maintain_order}{Maintain the order in which data is processed. Setting
+this to \code{FALSE} will be slightly faster.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
+
+\item{storage_options}{Named vector containing options that indicate how to
+connect to a cloud provider. The cloud providers currently supported are
+AWS, GCP, and Azure.
+See supported keys here:
+\itemize{
+\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
+\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
+\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
+\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
+\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
+variable.
+}
+
+If \code{storage_options} is not provided, Polars will try to infer the
+information from environment variables.}
+
+\item{retries}{Number of retries if accessing a cloud instance fails.}
+}
+\value{
+Invisibly returns the input LazyFrame
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
+This allows streaming results that are larger than RAM to be written to disk.
+}
+\examples{
+# sink table 'mtcars' from mem to NDJSON
+tmpf <- tempfile(fileext = ".ndjson")
+pl$LazyFrame(mtcars)$sink_ndjson(tmpf)
+
+# load parquet directly into a DataFrame / memory
+pl$scan_ndjson(tmpf)$collect()
+}
diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd
new file mode 100644
index 00000000..2e04bd4a
--- /dev/null
+++ b/man/lazyframe__sink_parquet.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sink_parquet}
+\alias{lazyframe__sink_parquet}
+\title{Evaluate the query in streaming mode and write to a Parquet file}
+\usage{
+lazyframe__sink_parquet(
+  path,
+  ...,
+  compression = "zstd",
+  compression_level = 3,
+  statistics = TRUE,
+  row_group_size = NULL,
+  data_page_size = NULL,
+  maintain_order = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  no_optimization = FALSE,
+  storage_options = NULL,
+  retries = 2
+)
+}
+\arguments{
+\item{path}{A character. File path to which the file should be written.}
+
+\item{...}{Dots which should be empty.}
+
+\item{compression}{The compression method. Must be one of:
+\itemize{
+\item \code{"lz4"}: fast compression/decompression.
+\item \code{"uncompressed"}
+\item \code{"snappy"}: this guarantees that the parquet file will be compatible with
+older parquet readers.
+\item \code{"gzip"}
+\item \code{"lzo"}
+\item \code{"brotli"}
+\item \code{"zstd"}: good compression performance.
+}}
+
+\item{compression_level}{\code{NULL} or integer. The level of compression to use.
+Only used if method is one of \code{"gzip"}, \code{"brotli"}, or \code{"zstd"}. Higher
+compression means smaller files on disk:
+\itemize{
+\item \code{"gzip"}: min-level: 0, max-level: 10.
+\item \code{"brotli"}: min-level: 0, max-level: 11.
+\item \code{"zstd"}: min-level: 1, max-level: 22.
+}}
+
+\item{statistics}{Whether statistics should be written to the Parquet
+headers. Possible values:
+\itemize{
+\item \code{TRUE}: enable default set of statistics (default)
+\item \code{FALSE}: disable all statistics
+\item \code{"full"}: calculate and write all available statistics.
+\item A named list where all values must be \code{TRUE} or \code{FALSE}, e.g.
+\code{list(min = TRUE, max = FALSE)}. Statistics available are \code{"min"}, \code{"max"},
+\code{"distinct_count"}, \code{"null_count"}.
+}}
+
+\item{row_group_size}{Size of the row groups in number of rows. If \code{NULL}
+(default), the chunks of the DataFrame are used. Writing in smaller chunks
+may reduce memory pressure and improve writing speeds.}
+
+\item{data_page_size}{Size of the data page in bytes. If \code{NULL} (default), it
+is set to 1024^2 bytes.}
+
+\item{maintain_order}{Maintain the order in which data is processed. Setting
+this to \code{FALSE} will be slightly faster.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
+
+\item{storage_options}{Named vector containing options that indicate how to
+connect to a cloud provider. The cloud providers currently supported are
+AWS, GCP, and Azure.
+See supported keys here:
+\itemize{
+\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
+\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
+\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
+\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
+\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
+variable.
+}
+
+If \code{storage_options} is not provided, Polars will try to infer the
+information from environment variables.}
+
+\item{retries}{Number of retries if accessing a cloud instance fails.}
+}
+\value{
+Invisibly returns the input LazyFrame
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
+This allows streaming results that are larger than RAM to be written to disk.
+}
+\examples{
+# sink table 'mtcars' from mem to parquet
+tmpf <- tempfile()
+as_polars_lf(mtcars)$sink_parquet(tmpf)
+
+# stream a query end-to-end
+tmpf2 <- tempfile()
+pl$scan_parquet(tmpf)$select(pl$col("cyl") * 2)$sink_parquet(tmpf2)
+
+# load parquet directly into a DataFrame / memory
+pl$scan_parquet(tmpf2)$collect()
+}
diff --git a/man/lazyframe__slice.Rd b/man/lazyframe__slice.Rd
new file mode 100644
index 00000000..c268a768
--- /dev/null
+++ b/man/lazyframe__slice.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__slice}
+\alias{lazyframe__slice}
+\title{Get a slice of the LazyFrame.}
+\usage{
+lazyframe__slice(offset, length = NULL)
+}
+\arguments{
+\item{offset}{Start index. Negative indexing is supported.}
+
+\item{length}{Length of the slice. If \code{NULL} (default), all rows starting at
+the offset will be selected.}
+}
+\value{
+A \link[=lazyframe__class]{LazyFrame}
+}
+\description{
+Get a slice of the LazyFrame.
+}
+\examples{
+lf <- pl$LazyFrame(x = c("a", "b", "c"), y = 1:3, z = 4:6)
+lf$slice(1, 2)$collect()
+}
diff --git a/man/lazyframe__sort.Rd b/man/lazyframe__sort.Rd
new file mode 100644
index 00000000..cd537530
--- /dev/null
+++ b/man/lazyframe__sort.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sort}
+\alias{lazyframe__sort}
+\title{Sort the LazyFrame by the given columns}
+\usage{
+lazyframe__sort(
+  ...,
+  descending = FALSE,
+  nulls_last = FALSE,
+  multithreaded = TRUE,
+  maintain_order = FALSE
+)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Column(s) to sort by. Can be
+character values indicating column names or Expr(s).}
+
+\item{descending}{Sort in descending order. When sorting by multiple
+columns, this can be specified per column by passing a logical vector.}
+
+\item{nulls_last}{Place null values last. When sorting by multiple
+columns, this can be specified per column by passing a logical vector.}
+
+\item{multithreaded}{Sort using multiple threads.}
+
+\item{maintain_order}{Whether the order should be maintained if elements are
+equal. If \code{TRUE}, streaming is not possible and performance might be worse
+since this requires a stable search.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Sort the LazyFrame by the given columns
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c(1, 2, NA, 4),
+  b = c(6, 5, 4, 3),
+  c = c("a", "c", "b", "a")
+)
+
+# Pass a single column name to sort by that column.
+lf$sort("a")$collect()
+
+# Sorting by expressions is also supported
+lf$sort(pl$col("a") + pl$col("b") * 2, nulls_last = TRUE)$collect()
+
+# Sort by multiple columns by passing a vector of columns
+lf$sort(c("c", "a"), descending = TRUE)$collect()
+
+# Or use positional arguments to sort by multiple columns in the same way
+lf$sort("c", "a", descending = c(FALSE, TRUE))$collect()
+}
diff --git a/man/lazyframe__std.Rd b/man/lazyframe__std.Rd
new file mode 100644
index 00000000..e69e9d76
--- /dev/null
+++ b/man/lazyframe__std.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__std}
+\alias{lazyframe__std}
+\title{Aggregate the columns of this LazyFrame to their standard deviation values}
+\usage{
+lazyframe__std(ddof = 1)
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns of this LazyFrame to their standard deviation values
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$std()$collect()
+lf$std(ddof = 0)$collect()
+}
diff --git a/man/lazyframe__sum.Rd b/man/lazyframe__sum.Rd
new file mode 100644
index 00000000..b1391c71
--- /dev/null
+++ b/man/lazyframe__sum.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__sum}
+\alias{lazyframe__sum}
+\title{Aggregate the columns of this LazyFrame to their sum values}
+\usage{
+lazyframe__sum()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns of this LazyFrame to their sum values
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$sum()$collect()
+}
diff --git a/man/lazyframe__tail.Rd b/man/lazyframe__tail.Rd
new file mode 100644
index 00000000..aebd3e0f
--- /dev/null
+++ b/man/lazyframe__tail.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__tail}
+\alias{lazyframe__tail}
+\title{Get the last \code{n} rows}
+\usage{
+lazyframe__tail(n = 5L)
+
+lazyframe__tail(n = 5L)
+}
+\arguments{
+\item{n}{Number of rows to return.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Get the last \code{n} rows
+
+Get the last \code{n} rows.
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+lf$tail()$collect()
+lf$tail(2)$collect()
+lf <- pl$LazyFrame(a = 1:6, b = 7:12)
+
+lf$tail()$collect()
+
+lf$tail(2)$collect()
+}
+\seealso{
+\code{\link[=lazyframe__head]{<LazyFrame>$head()}}
+}
diff --git a/man/lazyframe__to_dot.Rd b/man/lazyframe__to_dot.Rd
new file mode 100644
index 00000000..eacace78
--- /dev/null
+++ b/man/lazyframe__to_dot.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__to_dot}
+\alias{lazyframe__to_dot}
+\title{Plot the query plan}
+\usage{
+lazyframe__to_dot(
+  ...,
+  optimized = TRUE,
+  type_coercion = TRUE,
+  predicate_pushdown = TRUE,
+  projection_pushdown = TRUE,
+  simplify_expression = TRUE,
+  slice_pushdown = TRUE,
+  comm_subplan_elim = TRUE,
+  comm_subexpr_elim = TRUE,
+  cluster_with_columns = TRUE,
+  streaming = FALSE
+)
+}
+\arguments{
+\item{...}{Not used..}
+
+\item{optimized}{Optimize the query plan.}
+
+\item{type_coercion}{A logical, indicats type coercion optimization.}
+
+\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
+
+\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
+
+\item{simplify_expression}{A logical, indicats simplify expression optimization.}
+
+\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
+
+\item{comm_subplan_elim}{A logical, indicats tring to cache branching subplans that occur on self-joins or unions.}
+
+\item{comm_subexpr_elim}{A logical, indicats tring to cache common subexpressions.}
+
+\item{cluster_with_columns}{A logical, indicats to combine sequential independent calls to with_columns.}
+
+\item{streaming}{A logical. If \code{TRUE}, process the query in batches to handle larger-than-memory data.
+If \code{FALSE} (default), the entire query is processed in a single batch.
+Note that streaming mode is considered unstable.
+It may be changed at any point without it being considered a breaking change.}
+}
+\value{
+A character vector
+}
+\description{
+This only returns the "dot" output that can be passed to other packages, such
+as \code{DiagrammeR::grViz()}.
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c("a", "b", "a", "b", "b", "c"),
+  b = 1:6,
+  c = 6:1
+)
+
+query <- lf$group_by("a", maintain_order = TRUE)$agg(
+  pl$all()$sum()
+)$sort(
+  "a"
+)
+
+query$to_dot() |> cat()
+
+# You could print the graph by using DiagrammeR for example, with
+# query$to_dot() |> DiagrammeR::grViz().
+}
diff --git a/man/lazyframe__top_k.Rd b/man/lazyframe__top_k.Rd
new file mode 100644
index 00000000..60e13e1f
--- /dev/null
+++ b/man/lazyframe__top_k.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__top_k}
+\alias{lazyframe__top_k}
+\title{Return the \code{k} largest rows}
+\usage{
+lazyframe__top_k(k, ..., by, reverse = FALSE)
+}
+\arguments{
+\item{k}{Number of rows to return.}
+
+\item{...}{Dots which should be empty.}
+
+\item{by}{Column(s) used to determine the bottom rows. Accepts expression
+input. Strings are parsed as column names.}
+
+\item{reverse}{Consider the \code{k} smallest elements of the \code{by} column(s)
+(instead of the \code{k} largest). This can be specified per column by passing a
+sequence of booleans.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Non-null elements are always preferred over null elements, regardless of the
+value of \code{reverse}. The output is not guaranteed to be in any particular
+order, call \code{sort()} after this function if you wish the output to be sorted.
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c("a", "b", "a", "b", "b", "c"),
+  b = c(2, 1, 1, 3, 2, 1)
+)
+
+# Get the rows which contain the 4 largest values in column b.
+lf$top_k(4, by = "b")$collect()
+
+# Get the rows which contain the 4 largest values when sorting on column a
+# and b$
+lf$top_k(4, by = c("a", "b"))$collect()
+}
diff --git a/man/lazyframe__unique.Rd b/man/lazyframe__unique.Rd
new file mode 100644
index 00000000..22d160d3
--- /dev/null
+++ b/man/lazyframe__unique.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__unique}
+\alias{lazyframe__unique}
+\title{Drop duplicate rows from this DataFrame}
+\usage{
+lazyframe__unique(
+  subset = NULL,
+  ...,
+  keep = c("any", "none", "first", "last"),
+  maintain_order = FALSE
+)
+}
+\arguments{
+\item{subset}{Column name(s) or selector(s), to consider when identifying
+duplicate rows. If \code{NULL} (default), use all columns.}
+
+\item{...}{Dots which should be empty.}
+
+\item{keep}{Which of the duplicate rows to keep. Must be one of:
+\itemize{
+\item \code{"any"}: does not give any guarantee of which row is kept. This allows
+more optimizations.
+\item \code{"none"}: don’t keep duplicate rows.
+\item \code{"first"}: keep first unique row.
+\item \code{"last"}: keep last unique row.
+}}
+
+\item{maintain_order}{Keep the same order as the original LazyFrame. This is
+more expensive to compute. Setting this to \code{TRUE} blocks the possibility to
+run on the streaming engine.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Drop duplicate rows from this DataFrame
+}
+\examples{
+lf <- pl$LazyFrame(
+  foo = c(1, 2, 3, 1),
+  bar = c("a", "a", "a", "a"),
+  ham = c("b", "b", "b", "b"),
+)
+lf$unique(maintain_order = TRUE)$collect()
+
+lf$unique(subset = c("bar", "ham"), maintain_order = TRUE)$collect()
+
+lf$unique(keep = "last", maintain_order = TRUE)$collect()
+}
diff --git a/man/lazyframe__unnest.Rd b/man/lazyframe__unnest.Rd
new file mode 100644
index 00000000..844d86f1
--- /dev/null
+++ b/man/lazyframe__unnest.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__unnest}
+\alias{lazyframe__unnest}
+\title{Decompose struct columns into separate columns for each of their fields}
+\usage{
+lazyframe__unnest(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name of the struct column(s)
+that should be unnested.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+The new columns will be inserted into the LazyFrame at the location of the
+struct column.
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = 1:5,
+  b = c("one", "two", "three", "four", "five"),
+  c = 6:10
+)$
+  select(
+  pl$struct("b"),
+  pl$struct(c("a", "c"))$alias("a_and_c")
+)
+lf$collect()
+
+lf$unnest("a_and_c")$collect()
+lf$unnest(pl$col("a_and_c"))$collect()
+}
diff --git a/man/lazyframe__unpivot.Rd b/man/lazyframe__unpivot.Rd
new file mode 100644
index 00000000..5c37f657
--- /dev/null
+++ b/man/lazyframe__unpivot.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__unpivot}
+\alias{lazyframe__unpivot}
+\title{Unpivot a LazyFrame from wide to long format}
+\usage{
+lazyframe__unpivot(
+  on = NULL,
+  ...,
+  index = NULL,
+  variable_name = NULL,
+  value_name = NULL
+)
+}
+\arguments{
+\item{on}{Values to use as identifier variables. If \code{value_vars} is
+empty all columns that are not in \code{id_vars} will be used.}
+
+\item{...}{Dots which should be empty.}
+
+\item{index}{Columns to use as identifier variables.}
+
+\item{variable_name}{Name to give to the new column containing the names of
+the melted columns. Defaults to "variable".}
+
+\item{value_name}{Name to give to the new column containing the values of
+the melted columns. Defaults to \code{"value"}.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This function is useful to massage a LazyFrame into a format where one or
+more columns are identifier variables (\code{index}) while all other columns,
+considered measured variables (\code{on}), are “unpivoted” to the row axis
+leaving just two non-identifier columns, "variable" and "value".
+}
+\examples{
+lf <- pl$LazyFrame(
+  a = c("x", "y", "z"),
+  b = c(1, 3, 5),
+  c = c(2, 4, 6)
+)
+lf$unpivot(index = "a", on = c("b", "c"))$collect()
+}
diff --git a/man/lazyframe__var.Rd b/man/lazyframe__var.Rd
new file mode 100644
index 00000000..4e8c3ab5
--- /dev/null
+++ b/man/lazyframe__var.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__var}
+\alias{lazyframe__var}
+\title{Aggregate the columns in the LazyFrame to their variance value}
+\usage{
+lazyframe__var(ddof = 1)
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Aggregate the columns in the LazyFrame to their variance value
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:4, b = c(1, 2, 1, 1))
+lf$var()$collect()
+lf$var(ddof = 0)$collect()
+}
diff --git a/man/lazyframe__with_columns_seq.Rd b/man/lazyframe__with_columns_seq.Rd
new file mode 100644
index 00000000..59dfc908
--- /dev/null
+++ b/man/lazyframe__with_columns_seq.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__with_columns_seq}
+\alias{lazyframe__with_columns_seq}
+\title{Modify/append column(s) of a LazyFrame}
+\usage{
+lazyframe__with_columns_seq(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}>
+Name-value pairs of objects to be converted to polars \link[=Expr]{expressions}
+by the \code{\link[=as_polars_expr]{as_polars_expr()}} function.
+Characters are parsed as column names, other non-expression inputs are parsed as \link[=pl__lit]{literals}.
+Each name will be used as the expression name.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This will run all expression sequentially instead of in parallel. Use this
+only when the work per expression is cheap.
+
+Add columns or modify existing ones with expressions. This is similar to
+\code{dplyr::mutate()} as it keeps unmentioned columns (unlike \verb{$select()}).
+
+However, unlike \code{dplyr::mutate()}, one cannot use new variables in subsequent
+expressions in the same \verb{$with_columns_seq()}call. For instance, if you create a
+variable \code{x}, you will only be able to use it in another \verb{$with_columns_seq()}
+or \verb{$select()} call.
+}
+\examples{
+# Pass an expression to add it as a new column.
+lf <- pl$LazyFrame(
+  a = 1:4,
+  b = c(0.5, 4, 10, 13),
+  c = c(TRUE, TRUE, FALSE, TRUE),
+)
+lf$with_columns_seq((pl$col("a")^2)$alias("a^2"))$collect()
+
+# Added columns will replace existing columns with the same name.
+lf$with_columns_seq(a = pl$col("a")$cast(pl$Float64))$collect()
+
+# Multiple columns can be added
+lf$with_columns_seq(
+  (pl$col("a")^2)$alias("a^2"),
+  (pl$col("b") / 2)$alias("b/2"),
+  (pl$col("c")$not())$alias("not c"),
+)$collect()
+
+# Name expression instead of `$alias()`
+lf$with_columns_seq(
+  `a^2` = pl$col("a")^2,
+  `b/2` = pl$col("b") / 2,
+  `not c` = pl$col("c")$not(),
+)$collect()
+
+# Expressions with multiple outputs can automatically be instantiated
+# as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
+if (requireNamespace("withr", quietly = TRUE)) {
+  withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
+    lf$drop("c")$with_columns_seq(
+      diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
+    )$collect()
+  })
+}
+}
diff --git a/man/lazyframe__with_context.Rd b/man/lazyframe__with_context.Rd
new file mode 100644
index 00000000..4a47d8b4
--- /dev/null
+++ b/man/lazyframe__with_context.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__with_context}
+\alias{lazyframe__with_context}
+\title{Add an external context to the computation graph}
+\usage{
+lazyframe__with_context(other)
+}
+\arguments{
+\item{other}{Data/LazyFrame to have access to. This can be a list of DataFrames
+and LazyFrames.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+This allows expressions to also access columns from DataFrames or LazyFrames
+that are not part of this one.
+}
+\examples{
+lf <- pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
+lf_other <- pl$LazyFrame(c = c("foo", "ham"))
+
+lf$with_context(lf_other)$select(
+  pl$col("b") + pl$col("c")$first()
+)$collect()
+
+# Fill nulls with the median from another lazyframe:
+train_lf <- pl$LazyFrame(
+  feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
+)
+test_lf <- pl$LazyFrame(
+  feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
+)
+
+test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
+  pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
+)$collect()
+}
diff --git a/man/lazyframe__with_row_index.Rd b/man/lazyframe__with_row_index.Rd
new file mode 100644
index 00000000..2a6fc206
--- /dev/null
+++ b/man/lazyframe__with_row_index.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{lazyframe__with_row_index}
+\alias{lazyframe__with_row_index}
+\title{Add a row index as the first column in the LazyFrame}
+\usage{
+lazyframe__with_row_index(name = "index", offset = 0)
+}
+\arguments{
+\item{name}{Name of the index column.}
+
+\item{offset}{Start the index at this offset. Cannot be negative.}
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Using this function can have a negative effect on query performance. This
+may, for instance, block predicate pushdown optimization.
+}
+\examples{
+lf <- pl$LazyFrame(x = c(1, 3, 5), y = c(2, 4, 6))
+lf$with_row_index()$collect()
+
+lf$with_row_index("id", offset = 1000)$collect()
+
+# An index column can also be created using the expressions int_range()
+# and len()$
+lf$with_columns(
+  index = pl$int_range(pl$len(), dtype = pl$UInt32)
+)$collect()
+}
diff --git a/man/pl.Rd b/man/pl.Rd
index 327dee16..33e6b48d 100644
--- a/man/pl.Rd
+++ b/man/pl.Rd
@@ -5,7 +5,7 @@
 \alias{pl}
 \title{Polars top-level function namespace}
 \format{
-An object of class \code{polars_object} of length 74.
+An object of class \code{polars_object} of length 75.
 }
 \usage{
 pl
diff --git a/man/pl__deserialize_lf.Rd b/man/pl__deserialize_lf.Rd
new file mode 100644
index 00000000..0d71fb6f
--- /dev/null
+++ b/man/pl__deserialize_lf.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lazyframe-frame.R
+\name{pl__deserialize_lf}
+\alias{pl__deserialize_lf}
+\title{Read a logical plan from a file to construct a LazyFrame}
+\usage{
+pl__deserialize_lf(source)
+}
+\arguments{
+\item{source}{String containing the LazyFrame logical plan in JSON format.}
+}
+\value{
+A character value
+}
+\description{
+Read a logical plan from a file to construct a LazyFrame
+}
+\examples{
+lf <- pl$LazyFrame(a = 1:3)$sum()
+ser <- lf$serialize()
+pl$deserialize_lf(ser)
+}
diff --git a/src/init.c b/src/init.c
index 21774650..d05a21b1 100644
--- a/src/init.c
+++ b/src/init.c
@@ -244,6 +244,11 @@ SEXP savvy_when__impl(SEXP c_arg__condition) {
     return handle_result(res);
 }
 
+SEXP savvy_deserialize_lf__impl(SEXP c_arg__json) {
+    SEXP res = savvy_deserialize_lf__ffi(c_arg__json);
+    return handle_result(res);
+}
+
 SEXP savvy_PlRChainedThen_when__impl(SEXP self__, SEXP c_arg__condition) {
     SEXP res = savvy_PlRChainedThen_when__ffi(self__, c_arg__condition);
     return handle_result(res);
@@ -2294,6 +2299,216 @@ SEXP savvy_PlRLazyFrame_with_columns__impl(SEXP self__, SEXP c_arg__exprs) {
     return handle_result(res);
 }
 
+SEXP savvy_PlRLazyFrame_to_dot__impl(SEXP self__, SEXP c_arg__optimized) {
+    SEXP res = savvy_PlRLazyFrame_to_dot__ffi(self__, c_arg__optimized);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sort__impl(SEXP self__, SEXP c_arg__by_column, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded) {
+    SEXP res = savvy_PlRLazyFrame_sort__ffi(self__, c_arg__by_column, c_arg__descending, c_arg__nulls_last, c_arg__maintain_order, c_arg__multithreaded);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_top_k__impl(SEXP self__, SEXP c_arg__k, SEXP c_arg__by, SEXP c_arg__reverse) {
+    SEXP res = savvy_PlRLazyFrame_top_k__ffi(self__, c_arg__k, c_arg__by, c_arg__reverse);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_bottom_k__impl(SEXP self__, SEXP c_arg__k, SEXP c_arg__by, SEXP c_arg__reverse) {
+    SEXP res = savvy_PlRLazyFrame_bottom_k__ffi(self__, c_arg__k, c_arg__by, c_arg__reverse);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_cache__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_cache__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_profile__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_profile__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sink_parquet__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__statistics, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options) {
+    SEXP res = savvy_PlRLazyFrame_sink_parquet__ffi(self__, c_arg__path, c_arg__compression, c_arg__maintain_order, c_arg__statistics, c_arg__retries, c_arg__compression_level, c_arg__row_group_size, c_arg__data_page_size, c_arg__storage_options);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sink_ipc__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__compression, SEXP c_arg__storage_options) {
+    SEXP res = savvy_PlRLazyFrame_sink_ipc__ffi(self__, c_arg__path, c_arg__maintain_order, c_arg__retries, c_arg__compression, c_arg__storage_options);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sink_csv__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__include_bom, SEXP c_arg__include_header, SEXP c_arg__separator, SEXP c_arg__line_terminator, SEXP c_arg__quote_char, SEXP c_arg__maintain_order, SEXP c_arg__batch_size, SEXP c_arg__retries, SEXP c_arg__datetime_format, SEXP c_arg__date_format, SEXP c_arg__time_format, SEXP c_arg__float_scientific, SEXP c_arg__float_precision, SEXP c_arg__null_value, SEXP c_arg__quote_style, SEXP c_arg__storage_options) {
+    SEXP res = savvy_PlRLazyFrame_sink_csv__ffi(self__, c_arg__path, c_arg__include_bom, c_arg__include_header, c_arg__separator, c_arg__line_terminator, c_arg__quote_char, c_arg__maintain_order, c_arg__batch_size, c_arg__retries, c_arg__datetime_format, c_arg__date_format, c_arg__time_format, c_arg__float_scientific, c_arg__float_precision, c_arg__null_value, c_arg__quote_style, c_arg__storage_options);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sink_json__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__storage_options) {
+    SEXP res = savvy_PlRLazyFrame_sink_json__ffi(self__, c_arg__path, c_arg__maintain_order, c_arg__retries, c_arg__storage_options);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_serialize__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_serialize__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_select_seq__impl(SEXP self__, SEXP c_arg__exprs) {
+    SEXP res = savvy_PlRLazyFrame_select_seq__ffi(self__, c_arg__exprs);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_rolling__impl(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__closed, SEXP c_arg__by) {
+    SEXP res = savvy_PlRLazyFrame_rolling__ffi(self__, c_arg__index_column, c_arg__period, c_arg__offset, c_arg__closed, c_arg__by);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_group_by_dynamic__impl(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__every, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__label, SEXP c_arg__include_boundaries, SEXP c_arg__closed, SEXP c_arg__group_by, SEXP c_arg__start_by) {
+    SEXP res = savvy_PlRLazyFrame_group_by_dynamic__ffi(self__, c_arg__index_column, c_arg__every, c_arg__period, c_arg__offset, c_arg__label, c_arg__include_boundaries, c_arg__closed, c_arg__group_by, c_arg__start_by);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_with_context__impl(SEXP self__, SEXP c_arg__contexts) {
+    SEXP res = savvy_PlRLazyFrame_with_context__ffi(self__, c_arg__contexts);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
+    SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_join__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__join_nulls, SEXP c_arg__how, SEXP c_arg__suffix, SEXP c_arg__validate, SEXP c_arg__coalesce) {
+    SEXP res = savvy_PlRLazyFrame_join__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__join_nulls, c_arg__how, c_arg__suffix, c_arg__validate, c_arg__coalesce);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_join_where__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__predicates, SEXP c_arg__suffix) {
+    SEXP res = savvy_PlRLazyFrame_join_where__ffi(self__, c_arg__other, c_arg__predicates, c_arg__suffix);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_with_columns_seq__impl(SEXP self__, SEXP c_arg__exprs) {
+    SEXP res = savvy_PlRLazyFrame_with_columns_seq__ffi(self__, c_arg__exprs);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_rename__impl(SEXP self__, SEXP c_arg__existing, SEXP c_arg__new, SEXP c_arg__strict) {
+    SEXP res = savvy_PlRLazyFrame_rename__ffi(self__, c_arg__existing, c_arg__new, c_arg__strict);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_reverse__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_reverse__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_shift__impl(SEXP self__, SEXP c_arg__n, SEXP c_arg__fill_value) {
+    SEXP res = savvy_PlRLazyFrame_shift__ffi(self__, c_arg__n, c_arg__fill_value);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_fill_nan__impl(SEXP self__, SEXP c_arg__fill_value) {
+    SEXP res = savvy_PlRLazyFrame_fill_nan__ffi(self__, c_arg__fill_value);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_fill_null__impl(SEXP self__, SEXP c_arg__fill_value) {
+    SEXP res = savvy_PlRLazyFrame_fill_null__ffi(self__, c_arg__fill_value);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_min__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_min__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_max__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_max__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_sum__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_sum__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_mean__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_mean__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_std__impl(SEXP self__, SEXP c_arg__ddof) {
+    SEXP res = savvy_PlRLazyFrame_std__ffi(self__, c_arg__ddof);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_var__impl(SEXP self__, SEXP c_arg__ddof) {
+    SEXP res = savvy_PlRLazyFrame_var__ffi(self__, c_arg__ddof);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_median__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_median__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_quantile__impl(SEXP self__, SEXP c_arg__quantile, SEXP c_arg__interpolation) {
+    SEXP res = savvy_PlRLazyFrame_quantile__ffi(self__, c_arg__quantile, c_arg__interpolation);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_explode__impl(SEXP self__, SEXP c_arg__column) {
+    SEXP res = savvy_PlRLazyFrame_explode__ffi(self__, c_arg__column);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_null_count__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_null_count__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_unique__impl(SEXP self__, SEXP c_arg__maintain_order, SEXP c_arg__keep, SEXP c_arg__subset) {
+    SEXP res = savvy_PlRLazyFrame_unique__ffi(self__, c_arg__maintain_order, c_arg__keep, c_arg__subset);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_drop_nulls__impl(SEXP self__, SEXP c_arg__subset) {
+    SEXP res = savvy_PlRLazyFrame_drop_nulls__ffi(self__, c_arg__subset);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_unpivot__impl(SEXP self__, SEXP c_arg__on, SEXP c_arg__index, SEXP c_arg__value_name, SEXP c_arg__variable_name) {
+    SEXP res = savvy_PlRLazyFrame_unpivot__ffi(self__, c_arg__on, c_arg__index, c_arg__value_name, c_arg__variable_name);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_with_row_index__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset) {
+    SEXP res = savvy_PlRLazyFrame_with_row_index__ffi(self__, c_arg__name, c_arg__offset);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_clone__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_clone__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_unnest__impl(SEXP self__, SEXP c_arg__columns) {
+    SEXP res = savvy_PlRLazyFrame_unnest__ffi(self__, c_arg__columns);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_count__impl(SEXP self__) {
+    SEXP res = savvy_PlRLazyFrame_count__ffi(self__);
+    return handle_result(res);
+}
+
+SEXP savvy_PlRLazyFrame_merge_sorted__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__key) {
+    SEXP res = savvy_PlRLazyFrame_merge_sorted__ffi(self__, c_arg__other, c_arg__key);
+    return handle_result(res);
+}
+
 SEXP savvy_PlRLazyFrame_new_from_ipc__impl(SEXP c_arg__source, SEXP c_arg__cache, SEXP c_arg__rechunk, SEXP c_arg__try_parse_hive_dates, SEXP c_arg__retries, SEXP c_arg__row_index_offset, SEXP c_arg__n_rows, SEXP c_arg__row_index_name, SEXP c_arg__storage_options, SEXP c_arg__hive_partitioning, SEXP c_arg__hive_schema, SEXP c_arg__file_cache_ttl, SEXP c_arg__include_file_paths) {
     SEXP res = savvy_PlRLazyFrame_new_from_ipc__ffi(c_arg__source, c_arg__cache, c_arg__rechunk, c_arg__try_parse_hive_dates, c_arg__retries, c_arg__row_index_offset, c_arg__n_rows, c_arg__row_index_name, c_arg__storage_options, c_arg__hive_partitioning, c_arg__hive_schema, c_arg__file_cache_ttl, c_arg__include_file_paths);
     return handle_result(res);
@@ -2558,6 +2773,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_time_range__impl", (DL_FUNC) &savvy_time_range__impl, 4},
     {"savvy_time_ranges__impl", (DL_FUNC) &savvy_time_ranges__impl, 4},
     {"savvy_when__impl", (DL_FUNC) &savvy_when__impl, 1},
+    {"savvy_deserialize_lf__impl", (DL_FUNC) &savvy_deserialize_lf__impl, 1},
     {"savvy_PlRChainedThen_when__impl", (DL_FUNC) &savvy_PlRChainedThen_when__impl, 2},
     {"savvy_PlRChainedThen_otherwise__impl", (DL_FUNC) &savvy_PlRChainedThen_otherwise__impl, 2},
     {"savvy_PlRChainedWhen_then__impl", (DL_FUNC) &savvy_PlRChainedWhen_then__impl, 2},
@@ -2968,6 +3184,48 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_collect_schema__impl", (DL_FUNC) &savvy_PlRLazyFrame_collect_schema__impl, 1},
     {"savvy_PlRLazyFrame_sort_by_exprs__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort_by_exprs__impl, 6},
     {"savvy_PlRLazyFrame_with_columns__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns__impl, 2},
+    {"savvy_PlRLazyFrame_to_dot__impl", (DL_FUNC) &savvy_PlRLazyFrame_to_dot__impl, 2},
+    {"savvy_PlRLazyFrame_sort__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort__impl, 6},
+    {"savvy_PlRLazyFrame_top_k__impl", (DL_FUNC) &savvy_PlRLazyFrame_top_k__impl, 4},
+    {"savvy_PlRLazyFrame_bottom_k__impl", (DL_FUNC) &savvy_PlRLazyFrame_bottom_k__impl, 4},
+    {"savvy_PlRLazyFrame_cache__impl", (DL_FUNC) &savvy_PlRLazyFrame_cache__impl, 1},
+    {"savvy_PlRLazyFrame_profile__impl", (DL_FUNC) &savvy_PlRLazyFrame_profile__impl, 1},
+    {"savvy_PlRLazyFrame_sink_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_parquet__impl, 10},
+    {"savvy_PlRLazyFrame_sink_ipc__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_ipc__impl, 6},
+    {"savvy_PlRLazyFrame_sink_csv__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_csv__impl, 18},
+    {"savvy_PlRLazyFrame_sink_json__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_json__impl, 5},
+    {"savvy_PlRLazyFrame_serialize__impl", (DL_FUNC) &savvy_PlRLazyFrame_serialize__impl, 1},
+    {"savvy_PlRLazyFrame_select_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_select_seq__impl, 2},
+    {"savvy_PlRLazyFrame_rolling__impl", (DL_FUNC) &savvy_PlRLazyFrame_rolling__impl, 6},
+    {"savvy_PlRLazyFrame_group_by_dynamic__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by_dynamic__impl, 10},
+    {"savvy_PlRLazyFrame_with_context__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_context__impl, 2},
+    {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 13},
+    {"savvy_PlRLazyFrame_join__impl", (DL_FUNC) &savvy_PlRLazyFrame_join__impl, 11},
+    {"savvy_PlRLazyFrame_join_where__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_where__impl, 4},
+    {"savvy_PlRLazyFrame_with_columns_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns_seq__impl, 2},
+    {"savvy_PlRLazyFrame_rename__impl", (DL_FUNC) &savvy_PlRLazyFrame_rename__impl, 4},
+    {"savvy_PlRLazyFrame_reverse__impl", (DL_FUNC) &savvy_PlRLazyFrame_reverse__impl, 1},
+    {"savvy_PlRLazyFrame_shift__impl", (DL_FUNC) &savvy_PlRLazyFrame_shift__impl, 3},
+    {"savvy_PlRLazyFrame_fill_nan__impl", (DL_FUNC) &savvy_PlRLazyFrame_fill_nan__impl, 2},
+    {"savvy_PlRLazyFrame_fill_null__impl", (DL_FUNC) &savvy_PlRLazyFrame_fill_null__impl, 2},
+    {"savvy_PlRLazyFrame_min__impl", (DL_FUNC) &savvy_PlRLazyFrame_min__impl, 1},
+    {"savvy_PlRLazyFrame_max__impl", (DL_FUNC) &savvy_PlRLazyFrame_max__impl, 1},
+    {"savvy_PlRLazyFrame_sum__impl", (DL_FUNC) &savvy_PlRLazyFrame_sum__impl, 1},
+    {"savvy_PlRLazyFrame_mean__impl", (DL_FUNC) &savvy_PlRLazyFrame_mean__impl, 1},
+    {"savvy_PlRLazyFrame_std__impl", (DL_FUNC) &savvy_PlRLazyFrame_std__impl, 2},
+    {"savvy_PlRLazyFrame_var__impl", (DL_FUNC) &savvy_PlRLazyFrame_var__impl, 2},
+    {"savvy_PlRLazyFrame_median__impl", (DL_FUNC) &savvy_PlRLazyFrame_median__impl, 1},
+    {"savvy_PlRLazyFrame_quantile__impl", (DL_FUNC) &savvy_PlRLazyFrame_quantile__impl, 3},
+    {"savvy_PlRLazyFrame_explode__impl", (DL_FUNC) &savvy_PlRLazyFrame_explode__impl, 2},
+    {"savvy_PlRLazyFrame_null_count__impl", (DL_FUNC) &savvy_PlRLazyFrame_null_count__impl, 1},
+    {"savvy_PlRLazyFrame_unique__impl", (DL_FUNC) &savvy_PlRLazyFrame_unique__impl, 4},
+    {"savvy_PlRLazyFrame_drop_nulls__impl", (DL_FUNC) &savvy_PlRLazyFrame_drop_nulls__impl, 2},
+    {"savvy_PlRLazyFrame_unpivot__impl", (DL_FUNC) &savvy_PlRLazyFrame_unpivot__impl, 5},
+    {"savvy_PlRLazyFrame_with_row_index__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_row_index__impl, 3},
+    {"savvy_PlRLazyFrame_clone__impl", (DL_FUNC) &savvy_PlRLazyFrame_clone__impl, 1},
+    {"savvy_PlRLazyFrame_unnest__impl", (DL_FUNC) &savvy_PlRLazyFrame_unnest__impl, 2},
+    {"savvy_PlRLazyFrame_count__impl", (DL_FUNC) &savvy_PlRLazyFrame_count__impl, 1},
+    {"savvy_PlRLazyFrame_merge_sorted__impl", (DL_FUNC) &savvy_PlRLazyFrame_merge_sorted__impl, 3},
     {"savvy_PlRLazyFrame_new_from_ipc__impl", (DL_FUNC) &savvy_PlRLazyFrame_new_from_ipc__impl, 13},
     {"savvy_PlRLazyFrame_new_from_csv__impl", (DL_FUNC) &savvy_PlRLazyFrame_new_from_csv__impl, 30},
     {"savvy_PlRLazyFrame_new_from_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_new_from_parquet__impl, 18},
diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml
index e3a85e8f..5e10feef 100644
--- a/src/rust/Cargo.toml
+++ b/src/rust/Cargo.toml
@@ -31,6 +31,7 @@ features = [
     "array_any_all",
     "array_count",
     "array_to_struct",
+    "asof_join",
     "binary_encoding",
     "business",
     "cloud",
@@ -41,6 +42,8 @@ features = [
     "cutqcut",
     "diagonal_concat",
     "diff",
+    "dynamic_group_by",
+    "dot_diagram",
     "dot_product",
     "dtype-full",
     "dynamic_group_by",
@@ -51,6 +54,7 @@ features = [
     "find_many",
     "fused",
     "hist",
+    "iejoin",
     "interpolate",
     "interpolate_by",
     "ipc",
@@ -71,6 +75,7 @@ features = [
     "list_sets",
     "list_to_struct",
     "log",
+    "merge_sorted",
     "meta",
     "mode",
     "moment",
@@ -80,6 +85,7 @@ features = [
     "parquet",
     "pct_change",
     "peaks",
+    "pivot",
     "product",
     "propagate_nans",
     "random",
@@ -94,6 +100,7 @@ features = [
     "round_series",
     "row_hash",
     "search_sorted",
+    "semi_anti_join",
     "serde",
     "serde-lazy",
     "sign",
diff --git a/src/rust/api.h b/src/rust/api.h
index d39fb62b..f51c8bfe 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -40,6 +40,7 @@ SEXP savvy_datetime_ranges__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__
 SEXP savvy_time_range__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__every, SEXP c_arg__closed);
 SEXP savvy_time_ranges__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__every, SEXP c_arg__closed);
 SEXP savvy_when__ffi(SEXP c_arg__condition);
+SEXP savvy_deserialize_lf__ffi(SEXP c_arg__json);
 
 // methods and associated functions for PlRChainedThen
 SEXP savvy_PlRChainedThen_when__ffi(SEXP self__, SEXP c_arg__condition);
@@ -462,6 +463,48 @@ SEXP savvy_PlRLazyFrame_cast_all__ffi(SEXP self__, SEXP c_arg__dtype, SEXP c_arg
 SEXP savvy_PlRLazyFrame_collect_schema__ffi(SEXP self__);
 SEXP savvy_PlRLazyFrame_sort_by_exprs__ffi(SEXP self__, SEXP c_arg__by, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
 SEXP savvy_PlRLazyFrame_with_columns__ffi(SEXP self__, SEXP c_arg__exprs);
+SEXP savvy_PlRLazyFrame_to_dot__ffi(SEXP self__, SEXP c_arg__optimized);
+SEXP savvy_PlRLazyFrame_sort__ffi(SEXP self__, SEXP c_arg__by_column, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
+SEXP savvy_PlRLazyFrame_top_k__ffi(SEXP self__, SEXP c_arg__k, SEXP c_arg__by, SEXP c_arg__reverse);
+SEXP savvy_PlRLazyFrame_bottom_k__ffi(SEXP self__, SEXP c_arg__k, SEXP c_arg__by, SEXP c_arg__reverse);
+SEXP savvy_PlRLazyFrame_cache__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_profile__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_sink_parquet__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__statistics, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_sink_ipc__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__compression, SEXP c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_sink_csv__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__include_bom, SEXP c_arg__include_header, SEXP c_arg__separator, SEXP c_arg__line_terminator, SEXP c_arg__quote_char, SEXP c_arg__maintain_order, SEXP c_arg__batch_size, SEXP c_arg__retries, SEXP c_arg__datetime_format, SEXP c_arg__date_format, SEXP c_arg__time_format, SEXP c_arg__float_scientific, SEXP c_arg__float_precision, SEXP c_arg__null_value, SEXP c_arg__quote_style, SEXP c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_sink_json__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_serialize__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_select_seq__ffi(SEXP self__, SEXP c_arg__exprs);
+SEXP savvy_PlRLazyFrame_rolling__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__closed, SEXP c_arg__by);
+SEXP savvy_PlRLazyFrame_group_by_dynamic__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__every, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__label, SEXP c_arg__include_boundaries, SEXP c_arg__closed, SEXP c_arg__group_by, SEXP c_arg__start_by);
+SEXP savvy_PlRLazyFrame_with_context__ffi(SEXP self__, SEXP c_arg__contexts);
+SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
+SEXP savvy_PlRLazyFrame_join__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__join_nulls, SEXP c_arg__how, SEXP c_arg__suffix, SEXP c_arg__validate, SEXP c_arg__coalesce);
+SEXP savvy_PlRLazyFrame_join_where__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__predicates, SEXP c_arg__suffix);
+SEXP savvy_PlRLazyFrame_with_columns_seq__ffi(SEXP self__, SEXP c_arg__exprs);
+SEXP savvy_PlRLazyFrame_rename__ffi(SEXP self__, SEXP c_arg__existing, SEXP c_arg__new, SEXP c_arg__strict);
+SEXP savvy_PlRLazyFrame_reverse__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_shift__ffi(SEXP self__, SEXP c_arg__n, SEXP c_arg__fill_value);
+SEXP savvy_PlRLazyFrame_fill_nan__ffi(SEXP self__, SEXP c_arg__fill_value);
+SEXP savvy_PlRLazyFrame_fill_null__ffi(SEXP self__, SEXP c_arg__fill_value);
+SEXP savvy_PlRLazyFrame_min__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_max__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_sum__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_mean__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_std__ffi(SEXP self__, SEXP c_arg__ddof);
+SEXP savvy_PlRLazyFrame_var__ffi(SEXP self__, SEXP c_arg__ddof);
+SEXP savvy_PlRLazyFrame_median__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_quantile__ffi(SEXP self__, SEXP c_arg__quantile, SEXP c_arg__interpolation);
+SEXP savvy_PlRLazyFrame_explode__ffi(SEXP self__, SEXP c_arg__column);
+SEXP savvy_PlRLazyFrame_null_count__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_unique__ffi(SEXP self__, SEXP c_arg__maintain_order, SEXP c_arg__keep, SEXP c_arg__subset);
+SEXP savvy_PlRLazyFrame_drop_nulls__ffi(SEXP self__, SEXP c_arg__subset);
+SEXP savvy_PlRLazyFrame_unpivot__ffi(SEXP self__, SEXP c_arg__on, SEXP c_arg__index, SEXP c_arg__value_name, SEXP c_arg__variable_name);
+SEXP savvy_PlRLazyFrame_with_row_index__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset);
+SEXP savvy_PlRLazyFrame_clone__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_unnest__ffi(SEXP self__, SEXP c_arg__columns);
+SEXP savvy_PlRLazyFrame_count__ffi(SEXP self__);
+SEXP savvy_PlRLazyFrame_merge_sorted__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__key);
 SEXP savvy_PlRLazyFrame_new_from_ipc__ffi(SEXP c_arg__source, SEXP c_arg__cache, SEXP c_arg__rechunk, SEXP c_arg__try_parse_hive_dates, SEXP c_arg__retries, SEXP c_arg__row_index_offset, SEXP c_arg__n_rows, SEXP c_arg__row_index_name, SEXP c_arg__storage_options, SEXP c_arg__hive_partitioning, SEXP c_arg__hive_schema, SEXP c_arg__file_cache_ttl, SEXP c_arg__include_file_paths);
 SEXP savvy_PlRLazyFrame_new_from_csv__ffi(SEXP c_arg__source, SEXP c_arg__separator, SEXP c_arg__has_header, SEXP c_arg__ignore_errors, SEXP c_arg__skip_rows, SEXP c_arg__cache, SEXP c_arg__missing_utf8_is_empty_string, SEXP c_arg__low_memory, SEXP c_arg__rechunk, SEXP c_arg__skip_rows_after_header, SEXP c_arg__encoding, SEXP c_arg__try_parse_dates, SEXP c_arg__eol_char, SEXP c_arg__raise_if_empty, SEXP c_arg__truncate_ragged_lines, SEXP c_arg__decimal_comma, SEXP c_arg__glob, SEXP c_arg__retries, SEXP c_arg__row_index_offset, SEXP c_arg__comment_prefix, SEXP c_arg__quote_char, SEXP c_arg__null_values, SEXP c_arg__infer_schema_length, SEXP c_arg__row_index_name, SEXP c_arg__n_rows, SEXP c_arg__overwrite_dtype, SEXP c_arg__schema, SEXP c_arg__storage_options, SEXP c_arg__file_cache_ttl, SEXP c_arg__include_file_paths);
 SEXP savvy_PlRLazyFrame_new_from_parquet__ffi(SEXP c_arg__source, SEXP c_arg__cache, SEXP c_arg__parallel, SEXP c_arg__rechunk, SEXP c_arg__low_memory, SEXP c_arg__use_statistics, SEXP c_arg__try_parse_hive_dates, SEXP c_arg__retries, SEXP c_arg__glob, SEXP c_arg__allow_missing_columns, SEXP c_arg__row_index_offset, SEXP c_arg__storage_options, SEXP c_arg__n_rows, SEXP c_arg__row_index_name, SEXP c_arg__hive_partitioning, SEXP c_arg__schema, SEXP c_arg__hive_schema, SEXP c_arg__include_file_paths);
diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index 743a2f38..dce3a5d1 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -4,10 +4,10 @@ use crate::prelude::*;
 use crate::{PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRSeries, RPolarsErr};
 use polars::prelude::cloud::CloudOptions;
 use polars::series::ops::NullBehavior;
-use savvy::{ListSexp, NumericScalar, NumericSexp, NumericTypedSexp, StringSexp, TypedSexp};
+use savvy::{ListSexp, NumericScalar, NumericSexp, NumericTypedSexp, Sexp, StringSexp, TypedSexp};
 use search_sorted::SearchSortedSide;
 pub mod base_date;
-mod chunked_array;
+pub mod chunked_array;
 pub mod clock;
 pub mod data_table;
 
@@ -33,6 +33,25 @@ impl<T> From<T> for Wrap<T> {
     }
 }
 
+impl TryFrom<Sexp> for Wrap<AnyValue<'_>> {
+    type Error = String;
+    fn try_from(obj: Sexp) -> Result<Self, String> {
+        let typed = obj.into_typed();
+        let out = match typed {
+            TypedSexp::Integer(x) => AnyValue::Int64(*(x.to_vec().first().unwrap()) as i64),
+            TypedSexp::Real(x) => AnyValue::Float64(*(x.to_vec().first().unwrap())),
+            TypedSexp::Logical(x) => AnyValue::Boolean(*(x.to_vec().first().unwrap())),
+            TypedSexp::String(x) => {
+                let val = x.to_vec();
+                AnyValue::StringOwned((*val.first().unwrap()).into())
+            }
+            TypedSexp::Null(_) => AnyValue::Null,
+            _ => return Err("Cannot cast to AnyValue".to_string()),
+        };
+        Ok(Wrap(out))
+    }
+}
+
 impl TryFrom<&str> for PlRDataType {
     type Error = String;
 
@@ -85,6 +104,20 @@ impl From<ListSexp> for Wrap<Vec<Option<Vec<u8>>>> {
     }
 }
 
+impl TryFrom<&str> for Wrap<u8> {
+    type Error = String;
+
+    fn try_from(string: &str) -> Result<Self, String> {
+        let mut utf8_byte_iter = string.as_bytes().iter();
+        match (utf8_byte_iter.next(), utf8_byte_iter.next()) {
+            (Some(s), None) => Ok(Wrap(*s)),
+            (None, None) => Err(format!("cannot extract single byte from empty string")),
+            (Some(_), Some(_)) => Err(format!("multi byte-string not allowed")),
+            (None, Some(_)) => unreachable!("the iter() cannot yield Some after None(depleted)"),
+        }
+    }
+}
+
 impl TryFrom<ListSexp> for Wrap<Vec<DataFrame>> {
     type Error = savvy::Error;
 
@@ -601,6 +634,196 @@ impl TryFrom<&str> for Wrap<QuantileMethod> {
     }
 }
 
+impl TryFrom<&str> for Wrap<UniqueKeepStrategy> {
+    type Error = String;
+
+    fn try_from(strategy: &str) -> Result<Self, String> {
+        let parsed = match strategy {
+            "first" => UniqueKeepStrategy::First,
+            "last" => UniqueKeepStrategy::Last,
+            "none" => UniqueKeepStrategy::None,
+            "any" => UniqueKeepStrategy::Any,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<JoinType> {
+    type Error = String;
+
+    fn try_from(how: &str) -> Result<Self, String> {
+        let parsed = match how {
+            "cross" => JoinType::Cross,
+            "inner" => JoinType::Inner,
+            "left" => JoinType::Left,
+            "right" => JoinType::Right,
+            "full" => JoinType::Full,
+            "semi" => JoinType::Semi,
+            "anti" => JoinType::Anti,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<JoinValidation> {
+    type Error = String;
+
+    fn try_from(validation: &str) -> Result<Self, String> {
+        let parsed = match validation {
+            "m:m" => JoinValidation::ManyToMany,
+            "1:m" => JoinValidation::OneToMany,
+            "1:1" => JoinValidation::OneToOne,
+            "m:1" => JoinValidation::ManyToOne,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<Label> {
+    type Error = String;
+
+    fn try_from(label: &str) -> Result<Self, String> {
+        let parsed = match label {
+            "left" => Label::Left,
+            "right" => Label::Right,
+            "datapoint" => Label::DataPoint,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<StartBy> {
+    type Error = String;
+
+    fn try_from(start_by: &str) -> Result<Self, String> {
+        let parsed = match start_by {
+            "window" => StartBy::WindowBound,
+            "datapoint" => StartBy::DataPoint,
+            "monday" => StartBy::Monday,
+            "tuesday" => StartBy::Tuesday,
+            "wednesday" => StartBy::Wednesday,
+            "thursday" => StartBy::Thursday,
+            "friday" => StartBy::Friday,
+            "saturday" => StartBy::Saturday,
+            "sunday" => StartBy::Sunday,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+pub(crate) fn parse_parquet_compression(
+    compression: &str,
+    compression_level: Option<i32>,
+) -> savvy::Result<ParquetCompression> {
+    let parsed = match compression {
+        "uncompressed" => ParquetCompression::Uncompressed,
+        "snappy" => ParquetCompression::Snappy,
+        "gzip" => ParquetCompression::Gzip(
+            compression_level
+                .map(|lvl| {
+                    GzipLevel::try_new(lvl as u8)
+                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
+                })
+                .transpose()?,
+        ),
+        "lzo" => ParquetCompression::Lzo,
+        "brotli" => ParquetCompression::Brotli(
+            compression_level
+                .map(|lvl| {
+                    BrotliLevel::try_new(lvl as u32)
+                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
+                })
+                .transpose()?,
+        ),
+        "lz4" => ParquetCompression::Lz4Raw,
+        "zstd" => ParquetCompression::Zstd(
+            compression_level
+                .map(|lvl| {
+                    ZstdLevel::try_new(lvl)
+                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
+                })
+                .transpose()?,
+        ),
+        _ => return Err(RPolarsErr::Other("unreachable".to_string()).into()),
+    };
+    Ok(parsed)
+}
+
+impl TryFrom<ListSexp> for Wrap<StatisticsOptions> {
+    type Error = String;
+
+    fn try_from(statistics: ListSexp) -> Result<Self, String> {
+        let hm = statistics
+            .iter()
+            .map(|xi| {
+                let name = xi.0;
+                let value = xi.1.into_typed();
+                let value = match value {
+                    TypedSexp::Logical(val) => {
+                        let tmp = val.to_vec();
+                        *tmp.first().unwrap()
+                    }
+                    _ => unreachable!(),
+                };
+                (name, value)
+            })
+            .collect::<std::collections::HashMap<&str, bool>>();
+        let mut out = StatisticsOptions::default();
+        out.min_value = *hm.get(&"min").unwrap();
+        out.max_value = *hm.get(&"max").unwrap();
+        out.distinct_count = *hm.get(&"distinct_count").unwrap();
+        out.null_count = *hm.get(&"null_count").unwrap();
+        Ok(Wrap(out))
+    }
+}
+
+impl TryFrom<&str> for Wrap<IpcCompression> {
+    type Error = String;
+
+    fn try_from(compression: &str) -> Result<Self, String> {
+        let parsed = match compression {
+            "lz4" => IpcCompression::LZ4,
+            "zstd" => IpcCompression::ZSTD,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<QuoteStyle> {
+    type Error = String;
+
+    fn try_from(compression: &str) -> Result<Self, String> {
+        let parsed = match compression {
+            "always" => QuoteStyle::Always,
+            "necessary" => QuoteStyle::Necessary,
+            "non_numeric" => QuoteStyle::NonNumeric,
+            "never" => QuoteStyle::Never,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
+impl TryFrom<&str> for Wrap<AsofStrategy> {
+    type Error = String;
+
+    fn try_from(strategy: &str) -> Result<Self, String> {
+        let parsed = match strategy {
+            "forward" => AsofStrategy::Forward,
+            "backward" => AsofStrategy::Backward,
+            "nearest" => AsofStrategy::Nearest,
+            _ => return Err("unreachable".to_string()),
+        };
+        Ok(Wrap(parsed))
+    }
+}
+
 impl TryFrom<&str> for Wrap<CsvEncoding> {
     type Error = String;
 
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index fd12c4b1..fed0dd1d 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -1,6 +1,5 @@
-use crate::{
-    prelude::*, PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, RPolarsErr,
-};
+use super::*;
+use crate::{PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, RPolarsErr};
 use polars::io::{HiveOptions, RowIndex};
 use savvy::{
     savvy, ListSexp, LogicalSexp, NumericScalar, OwnedListSexp, OwnedStringSexp, Result, Sexp,
@@ -208,6 +207,721 @@ impl PlRLazyFrame {
         Ok(ldf.with_columns(exprs).into())
     }
 
+    fn to_dot(&self, optimized: bool) -> Result<String> {
+        let result = self.ldf.to_dot(optimized).map_err(RPolarsErr::from)?;
+        Ok(result)
+    }
+
+    fn sort(
+        &self,
+        by_column: &str,
+        descending: bool,
+        nulls_last: bool,
+        maintain_order: bool,
+        multithreaded: bool,
+    ) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf
+            .sort(
+                [by_column],
+                SortMultipleOptions {
+                    descending: vec![descending],
+                    nulls_last: vec![nulls_last],
+                    multithreaded,
+                    maintain_order,
+                    limit: None,
+                },
+            )
+            .into())
+    }
+
+    fn top_k(&self, k: NumericScalar, by: ListSexp, reverse: LogicalSexp) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let k = <Wrap<u32>>::try_from(k)?.0;
+        let exprs = <Wrap<Vec<Expr>>>::from(by).0;
+        let reverse = reverse.to_vec();
+        Ok(ldf
+            .top_k(
+                k,
+                exprs,
+                SortMultipleOptions::new().with_order_descending_multi(reverse),
+            )
+            .into())
+    }
+
+    fn bottom_k(&self, k: NumericScalar, by: ListSexp, reverse: LogicalSexp) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let k = <Wrap<u32>>::try_from(k)?.0;
+        let exprs = <Wrap<Vec<Expr>>>::from(by).0;
+        let reverse = reverse.to_vec();
+        Ok(ldf
+            .bottom_k(
+                k,
+                exprs,
+                SortMultipleOptions::new().with_order_descending_multi(reverse),
+            )
+            .into())
+    }
+
+    fn cache(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.cache().into())
+    }
+
+    fn profile(&self) -> Result<Sexp> {
+        use crate::{
+            r_threads::{concurrent_handler, ThreadCom},
+            r_udf::{RUdfReturn, RUdfSignature, CONFIG},
+        };
+        fn serve_r(
+            udf_sig: RUdfSignature,
+        ) -> std::result::Result<RUdfReturn, Box<dyn std::error::Error>> {
+            udf_sig.eval()
+        }
+
+        let ldf = self.ldf.clone();
+        let (data, timings) = if ThreadCom::try_from_global(&CONFIG).is_ok() {
+            let ldf = self.ldf.clone();
+            ldf.profile().map_err(RPolarsErr::from)?
+        } else {
+            concurrent_handler(
+                // closure 1: spawned by main thread
+                // tc is a ThreadCom which any child thread can use to submit R jobs to main thread
+                move |tc| {
+                    // get return value
+                    let retval = ldf.profile();
+
+                    // drop the last two ThreadCom clones, signals to main/R-serving thread to shut down.
+                    ThreadCom::kill_global(&CONFIG);
+                    drop(tc);
+
+                    retval
+                },
+                // closure 2: how to serve polars worker R job request in main thread
+                serve_r,
+                // CONFIG is "global variable" where any new thread can request a clone of ThreadCom to establish contact with main thread
+                &CONFIG,
+            )
+            .map_err(|e| e.to_string())?
+            .map_err(RPolarsErr::from)?
+        };
+
+        let data = <PlRDataFrame>::from(data);
+        let timings = <PlRDataFrame>::from(timings);
+
+        let mut out = OwnedListSexp::new(2, true)?;
+        unsafe {
+            let _ = out.set_value_unchecked(0, Sexp::try_from(data)?.0);
+            let _ = out.set_value_unchecked(1, Sexp::try_from(timings)?.0);
+        };
+        Ok(out.into())
+    }
+
+    fn sink_parquet(
+        &self,
+        path: &str,
+        compression: &str,
+        maintain_order: bool,
+        statistics: ListSexp,
+        retries: NumericScalar,
+        compression_level: Option<NumericScalar>,
+        row_group_size: Option<NumericScalar>,
+        data_page_size: Option<NumericScalar>,
+        storage_options: Option<StringSexp>,
+    ) -> Result<()> {
+        let path: PathBuf = path.into();
+        let statistics = <Wrap<StatisticsOptions>>::try_from(statistics)?.0;
+        let compression_level: Option<i32> = match compression_level {
+            Some(x) => Some(x.as_i32()?),
+            None => None,
+        };
+        let compression = parse_parquet_compression(compression, compression_level)?;
+        let row_group_size: Option<usize> = match row_group_size {
+            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
+            None => None,
+        };
+        let data_page_size: Option<usize> = match data_page_size {
+            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
+            None => None,
+        };
+        let retries = <Wrap<usize>>::try_from(retries)?.0;
+
+        let options = ParquetWriteOptions {
+            compression,
+            statistics,
+            row_group_size,
+            data_page_size,
+            maintain_order,
+        };
+        let cloud_options = match storage_options {
+            Some(x) => {
+                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
+                    RPolarsErr::Other(
+                        "`storage_options` must be a named character vector".to_string(),
+                    )
+                })?;
+                Some(out.0)
+            }
+            None => None,
+        };
+        let cloud_options = {
+            let cloud_options =
+                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
+            Some(cloud_options.with_max_retries(retries))
+        };
+        let _ = self
+            .ldf
+            .clone()
+            .sink_parquet(&path, options, cloud_options)
+            .map_err(RPolarsErr::from);
+        Ok(())
+    }
+
+    fn sink_ipc(
+        &self,
+        path: &str,
+        maintain_order: bool,
+        retries: NumericScalar,
+        compression: Option<&str>,
+        storage_options: Option<StringSexp>,
+    ) -> Result<()> {
+        let path: PathBuf = path.into();
+
+        let retries = <Wrap<usize>>::try_from(retries)?.0;
+        let compression: Option<IpcCompression> = match compression {
+            Some(x) => {
+                if x == "uncompressed" {
+                    None
+                } else {
+                    Some(<Wrap<IpcCompression>>::try_from(x)?.0)
+                }
+            }
+
+            None => None,
+        };
+        let options = IpcWriterOptions {
+            compression,
+            maintain_order,
+        };
+
+        let cloud_options = match storage_options {
+            Some(x) => {
+                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
+                    RPolarsErr::Other(
+                        "`storage_options` must be a named character vector".to_string(),
+                    )
+                })?;
+                Some(out.0)
+            }
+            None => None,
+        };
+        let cloud_options = {
+            let cloud_options =
+                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
+            Some(cloud_options.with_max_retries(retries))
+        };
+
+        let _ = self
+            .ldf
+            .clone()
+            .sink_ipc(&path, options, cloud_options)
+            .map_err(RPolarsErr::from);
+        Ok(())
+    }
+
+    fn sink_csv(
+        &self,
+        path: &str,
+        include_bom: bool,
+        include_header: bool,
+        separator: &str,
+        line_terminator: &str,
+        quote_char: &str,
+        maintain_order: bool,
+        batch_size: NumericScalar,
+        retries: NumericScalar,
+        datetime_format: Option<&str>,
+        date_format: Option<&str>,
+        time_format: Option<&str>,
+        float_scientific: Option<bool>,
+        float_precision: Option<NumericScalar>,
+        null_value: Option<&str>,
+        quote_style: Option<&str>,
+        storage_options: Option<StringSexp>,
+    ) -> Result<()> {
+        let path: PathBuf = path.into();
+        let quote_style = match quote_style {
+            Some(x) => <Wrap<QuoteStyle>>::try_from(x)?.0,
+            None => QuoteStyle::default(),
+        };
+        let retries = <Wrap<usize>>::try_from(retries)?.0;
+        let null_value = null_value
+            .map(|x| x.to_string())
+            .unwrap_or(SerializeOptions::default().null);
+        let batch_size = <Wrap<NonZeroUsize>>::try_from(batch_size)?.0;
+        let float_precision = match float_precision {
+            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
+            None => None,
+        };
+        let separator = <Wrap<u8>>::try_from(separator)?.0;
+        let quote_char = <Wrap<u8>>::try_from(quote_char)?.0;
+
+        let serialize_options = SerializeOptions {
+            date_format: date_format.map(|x| x.to_string()),
+            time_format: time_format.map(|x| x.to_string()),
+            datetime_format: datetime_format.map(|x| x.to_string()),
+            float_scientific,
+            float_precision,
+            separator,
+            quote_char,
+            null: null_value.to_string(),
+            line_terminator: line_terminator.to_string(),
+            quote_style,
+        };
+
+        let options = CsvWriterOptions {
+            include_bom,
+            include_header,
+            maintain_order,
+            batch_size,
+            serialize_options,
+        };
+        let cloud_options = match storage_options {
+            Some(x) => {
+                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
+                    RPolarsErr::Other(
+                        "`storage_options` must be a named character vector".to_string(),
+                    )
+                })?;
+                Some(out.0)
+            }
+            None => None,
+        };
+        let cloud_options = {
+            let cloud_options =
+                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
+            Some(cloud_options.with_max_retries(retries))
+        };
+
+        let _ = self
+            .ldf
+            .clone()
+            .sink_csv(&path, options, cloud_options)
+            .map_err(RPolarsErr::from);
+        Ok(())
+    }
+
+    fn sink_json(
+        &self,
+        path: &str,
+        maintain_order: bool,
+        retries: NumericScalar,
+        storage_options: Option<StringSexp>,
+    ) -> Result<()> {
+        let path: PathBuf = path.into();
+        let retries = <Wrap<usize>>::try_from(retries)?.0;
+        let options = JsonWriterOptions { maintain_order };
+        let cloud_options = match storage_options {
+            Some(x) => {
+                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
+                    RPolarsErr::Other(
+                        "`storage_options` must be a named character vector".to_string(),
+                    )
+                })?;
+                Some(out.0)
+            }
+            None => None,
+        };
+        let cloud_options = {
+            let cloud_options =
+                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
+            Some(cloud_options.with_max_retries(retries))
+        };
+
+        let _ = self
+            .ldf
+            .clone()
+            .sink_json(&path, options, cloud_options)
+            .map_err(RPolarsErr::from);
+        Ok(())
+    }
+
+    fn serialize(&self) -> Result<Sexp> {
+        let dump = serde_json::to_string(&self.ldf.logical_plan)
+            .map_err(|err| RPolarsErr::Other(err.to_string()))?;
+        dump.try_into()
+    }
+
+    fn select_seq(&mut self, exprs: ListSexp) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let exprs = <Wrap<Vec<Expr>>>::from(exprs).0;
+        Ok(ldf.select_seq(exprs).into())
+    }
+
+    fn rolling(
+        &mut self,
+        index_column: &PlRExpr,
+        period: &str,
+        offset: &str,
+        closed: &str,
+        by: ListSexp,
+    ) -> Result<PlRLazyGroupBy> {
+        let closed_window = <Wrap<ClosedWindow>>::try_from(closed)?.0;
+        let ldf = self.ldf.clone();
+        let by = <Wrap<Vec<Expr>>>::from(by).0;
+        let lazy_gb = ldf.rolling(
+            index_column.inner.clone(),
+            by,
+            RollingGroupOptions {
+                index_column: "".into(),
+                period: Duration::try_parse(period).map_err(RPolarsErr::from)?,
+                offset: Duration::try_parse(offset).map_err(RPolarsErr::from)?,
+                closed_window,
+            },
+        );
+
+        Ok(PlRLazyGroupBy { lgb: Some(lazy_gb) })
+    }
+
+    fn group_by_dynamic(
+        &mut self,
+        index_column: &PlRExpr,
+        every: &str,
+        period: &str,
+        offset: &str,
+        label: &str,
+        include_boundaries: bool,
+        closed: &str,
+        group_by: ListSexp,
+        start_by: &str,
+    ) -> Result<PlRLazyGroupBy> {
+        let closed_window = <Wrap<ClosedWindow>>::try_from(closed)?.0;
+        let group_by = <Wrap<Vec<Expr>>>::from(group_by).0;
+        let ldf = self.ldf.clone();
+        let label = <Wrap<Label>>::try_from(label)?.0;
+        let start_by = <Wrap<StartBy>>::try_from(start_by)?.0;
+        let lazy_gb = ldf.group_by_dynamic(
+            index_column.inner.clone(),
+            group_by,
+            DynamicGroupOptions {
+                every: Duration::try_parse(every).map_err(RPolarsErr::from)?,
+                period: Duration::try_parse(period).map_err(RPolarsErr::from)?,
+                offset: Duration::try_parse(offset).map_err(RPolarsErr::from)?,
+                label,
+                include_boundaries,
+                closed_window,
+                start_by,
+                ..Default::default()
+            },
+        );
+
+        Ok(PlRLazyGroupBy { lgb: Some(lazy_gb) })
+    }
+
+    fn with_context(&self, contexts: ListSexp) -> Result<Self> {
+        let contexts = <Wrap<Vec<LazyFrame>>>::try_from(contexts)?.0;
+        Ok(self.ldf.clone().with_context(contexts).into())
+    }
+
+    fn join_asof(
+        &self,
+        other: &PlRLazyFrame,
+        left_on: &PlRExpr,
+        right_on: &PlRExpr,
+        allow_parallel: bool,
+        force_parallel: bool,
+        suffix: &str,
+        coalesce: bool,
+        strategy: &str,
+        left_by: Option<StringSexp>,
+        right_by: Option<StringSexp>,
+        tolerance: Option<Sexp>,
+        tolerance_str: Option<&str>,
+    ) -> Result<Self> {
+        let coalesce = if coalesce {
+            JoinCoalesce::CoalesceColumns
+        } else {
+            JoinCoalesce::KeepColumns
+        };
+        let strategy = <Wrap<AsofStrategy>>::try_from(strategy)?.0;
+        let ldf = self.ldf.clone();
+        let other = other.ldf.clone();
+        let left_on = left_on.inner.clone();
+        let right_on = right_on.inner.clone();
+        let left_by = left_by.map(|x| x.to_vec().into_iter().map(|y| y.into()).collect());
+        let right_by = right_by.map(|x| x.to_vec().into_iter().map(|y| y.into()).collect());
+        let tolerance = match tolerance {
+            Some(x) => Some(<Wrap<AnyValue<'_>>>::try_from(x)?.0),
+            None => None,
+        };
+        Ok(ldf
+            .join_builder()
+            .with(other)
+            .left_on([left_on])
+            .right_on([right_on])
+            .allow_parallel(allow_parallel)
+            .force_parallel(force_parallel)
+            .coalesce(coalesce)
+            .how(JoinType::AsOf(AsOfOptions {
+                strategy,
+                left_by,
+                right_by,
+                tolerance: tolerance.map(|t| t.into_static()),
+                tolerance_str: tolerance_str.map(|s| s.into()),
+            }))
+            .suffix(suffix)
+            .finish()
+            .into())
+    }
+
+    fn join(
+        &self,
+        other: &PlRLazyFrame,
+        left_on: ListSexp,
+        right_on: ListSexp,
+        allow_parallel: bool,
+        force_parallel: bool,
+        join_nulls: bool,
+        how: &str,
+        suffix: &str,
+        validate: &str,
+        coalesce: Option<bool>,
+    ) -> Result<Self> {
+        let coalesce = match coalesce {
+            None => JoinCoalesce::JoinSpecific,
+            Some(true) => JoinCoalesce::CoalesceColumns,
+            Some(false) => JoinCoalesce::KeepColumns,
+        };
+        let ldf = self.ldf.clone();
+        let other = other.ldf.clone();
+        let left_on = <Wrap<Vec<Expr>>>::from(left_on).0;
+        let right_on = <Wrap<Vec<Expr>>>::from(right_on).0;
+
+        let how = <Wrap<JoinType>>::try_from(how)?.0;
+        let validate = <Wrap<JoinValidation>>::try_from(validate)?.0;
+        Ok(ldf
+            .join_builder()
+            .with(other)
+            .left_on(left_on)
+            .right_on(right_on)
+            .allow_parallel(allow_parallel)
+            .force_parallel(force_parallel)
+            .join_nulls(join_nulls)
+            .how(how)
+            .coalesce(coalesce)
+            .validate(validate)
+            .suffix(suffix)
+            .finish()
+            .into())
+    }
+
+    fn join_where(&self, other: &PlRLazyFrame, predicates: ListSexp, suffix: &str) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let other = other.ldf.clone();
+
+        let predicates = <Wrap<Vec<Expr>>>::from(predicates).0;
+
+        Ok(ldf
+            .join_builder()
+            .with(other)
+            .suffix(suffix)
+            .join_where(predicates)
+            .into())
+    }
+
+    fn with_columns_seq(&mut self, exprs: ListSexp) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let exprs = <Wrap<Vec<Expr>>>::from(exprs).0;
+        Ok(ldf.with_columns_seq(exprs).into())
+    }
+
+    fn rename(&mut self, existing: StringSexp, new: StringSexp, strict: bool) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.rename(existing.to_vec(), new.to_vec(), strict).into())
+    }
+
+    fn reverse(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.reverse().into())
+    }
+
+    fn shift(&self, n: &PlRExpr, fill_value: Option<&PlRExpr>) -> Result<Self> {
+        let lf = self.ldf.clone();
+        let out = match fill_value {
+            Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
+            None => lf.shift(n.inner.clone()),
+        };
+        Ok(out.into())
+    }
+
+    fn fill_nan(&self, fill_value: &PlRExpr) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.fill_nan(fill_value.inner.clone()).into())
+    }
+
+    fn fill_null(&self, fill_value: &PlRExpr) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.fill_null(fill_value.inner.clone()).into())
+    }
+
+    fn min(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let out = ldf.min();
+        Ok(out.into())
+    }
+
+    fn max(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let out = ldf.max();
+        Ok(out.into())
+    }
+
+    fn sum(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let out = ldf.sum();
+        Ok(out.into())
+    }
+
+    fn mean(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let out = ldf.mean();
+        Ok(out.into())
+    }
+
+    fn std(&self, ddof: NumericScalar) -> Result<Self> {
+        let ddof = <Wrap<u8>>::try_from(ddof)?.0;
+        let ldf = self.ldf.clone();
+        let out = ldf.std(ddof);
+        Ok(out.into())
+    }
+
+    fn var(&self, ddof: NumericScalar) -> Result<Self> {
+        let ddof = <Wrap<u8>>::try_from(ddof)?.0;
+        let ldf = self.ldf.clone();
+        let out = ldf.var(ddof);
+        Ok(out.into())
+    }
+
+    fn median(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let out = ldf.median();
+        Ok(out.into())
+    }
+
+    fn quantile(&self, quantile: &PlRExpr, interpolation: &str) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let interpolation = <Wrap<QuantileMethod>>::try_from(interpolation)?.0;
+        let out = ldf.quantile(quantile.inner.clone(), interpolation);
+        Ok(out.into())
+    }
+
+    fn explode(&self, column: ListSexp) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let column = <Wrap<Vec<Expr>>>::from(column).0;
+        Ok(ldf.explode(column).into())
+    }
+
+    fn null_count(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.null_count().into())
+    }
+
+    fn unique(&self, maintain_order: bool, keep: &str, subset: Option<ListSexp>) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let keep = <Wrap<UniqueKeepStrategy>>::try_from(keep)?.0;
+        let subset = subset.map(|e| <Wrap<Vec<Expr>>>::from(e).0);
+        let out = match maintain_order {
+            true => ldf.unique_stable_generic(subset, keep),
+            false => ldf.unique_generic(subset, keep),
+        };
+        Ok(out.into())
+    }
+
+    fn drop_nulls(&self, subset: Option<ListSexp>) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let subset = subset.map(|e| <Wrap<Vec<Expr>>>::from(e).0);
+        Ok(ldf.drop_nulls(subset).into())
+    }
+
+    fn unpivot(
+        &self,
+        on: ListSexp,
+        index: ListSexp,
+        value_name: Option<&str>,
+        variable_name: Option<&str>,
+    ) -> Result<Self> {
+        let on = <Wrap<Vec<Expr>>>::from(on).0;
+        let index = <Wrap<Vec<Expr>>>::from(index).0;
+        let args = UnpivotArgsDSL {
+            on: on.into_iter().map(|e| e.into()).collect(),
+            index: index.into_iter().map(|e| e.into()).collect(),
+            value_name: value_name.map(|s| s.into()),
+            variable_name: variable_name.map(|s| s.into()),
+        };
+
+        let ldf = self.ldf.clone();
+        Ok(ldf.unpivot(args).into())
+    }
+
+    fn with_row_index(&self, name: &str, offset: Option<NumericScalar>) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        let offset: Option<u32> = match offset {
+            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0),
+            None => None,
+        };
+        Ok(ldf.with_row_index(name, offset).into())
+    }
+
+    // fn map_batches(
+    //     &self,
+    //     lambda: PyObject,
+    //     predicate_pushdown: bool,
+    //     projection_pushdown: bool,
+    //     slice_pushdown: bool,
+    //     streamable: bool,
+    //     schema: Option<Wrap<Schema>>,
+    //     validate_output: bool,
+    // ) -> Result<Self> {
+    //     let mut opt = OptFlags::default();
+    //     opt.set(OptFlags::PREDICATE_PUSHDOWN, predicate_pushdown);
+    //     opt.set(OptFlags::PROJECTION_PUSHDOWN, projection_pushdown);
+    //     opt.set(OptFlags::SLICE_PUSHDOWN, slice_pushdown);
+    //     opt.set(OptFlags::STREAMING, streamable);
+
+    //     self.ldf
+    //         .clone()
+    //         .map_python(
+    //             lambda.into(),
+    //             opt,
+    //             schema.map(|s| Arc::new(s.0)),
+    //             validate_output,
+    //         )
+    //         .into()
+    // }
+
+    fn clone(&self) -> Result<Self> {
+        Ok(self.ldf.clone().into())
+    }
+
+    fn unnest(&self, columns: ListSexp) -> Result<Self> {
+        let columns = <Wrap<Vec<Expr>>>::from(columns).0;
+        Ok(self.ldf.clone().unnest(columns).into())
+    }
+
+    fn count(&self) -> Result<Self> {
+        let ldf = self.ldf.clone();
+        Ok(ldf.count().into())
+    }
+
+    fn merge_sorted(&self, other: &PlRLazyFrame, key: &str) -> Result<Self> {
+        let out = self
+            .ldf
+            .clone()
+            .merge_sorted(other.ldf.clone(), key)
+            .map_err(RPolarsErr::from)?;
+        Ok(out.into())
+    }
+
     fn new_from_ipc(
         source: StringSexp,
         cache: bool,
@@ -319,7 +1033,7 @@ impl PlRLazyFrame {
         storage_options: Option<StringSexp>,
         file_cache_ttl: Option<NumericScalar>,
         include_file_paths: Option<&str>,
-    ) -> Result<PlRLazyFrame> {
+    ) -> Result<Self> {
         let source = source
             .to_vec()
             .iter()
diff --git a/src/rust/src/lazyframe/mod.rs b/src/rust/src/lazyframe/mod.rs
index d3615cfc..0afc04c7 100644
--- a/src/rust/src/lazyframe/mod.rs
+++ b/src/rust/src/lazyframe/mod.rs
@@ -1,4 +1,5 @@
 mod general;
+mod serde;
 
 use crate::prelude::*;
 use savvy::{savvy, EnvironmentSexp};
diff --git a/src/rust/src/lazyframe/serde.rs b/src/rust/src/lazyframe/serde.rs
new file mode 100644
index 00000000..feefb4f8
--- /dev/null
+++ b/src/rust/src/lazyframe/serde.rs
@@ -0,0 +1,12 @@
+use crate::{prelude::*, PlRLazyFrame, RPolarsErr};
+use savvy::{savvy, Result};
+
+#[savvy]
+fn deserialize_lf(json: &str) -> Result<PlRLazyFrame> {
+    let lp = serde_json::from_str::<DslPlan>(json).map_err(|_| {
+        let msg = "could not deserialize input into a LazyFrame";
+        RPolarsErr::Other(msg.to_string())
+    })?;
+    let out = LazyFrame::from(lp);
+    Ok(<PlRLazyFrame>::from(out))
+}

From d967634dab338d47d13e13442b242bdba14faedd Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:10:25 +0100
Subject: [PATCH 02/23] remove pub keyword

---
 src/rust/src/conversion/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index dce3a5d1..eddfefb4 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -7,7 +7,7 @@ use polars::series::ops::NullBehavior;
 use savvy::{ListSexp, NumericScalar, NumericSexp, NumericTypedSexp, Sexp, StringSexp, TypedSexp};
 use search_sorted::SearchSortedSide;
 pub mod base_date;
-pub mod chunked_array;
+mod chunked_array;
 pub mod clock;
 pub mod data_table;
 

From 75c2a2ea71875e20cb428e02ce5a643ea148c9dd Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:17:12 +0100
Subject: [PATCH 03/23] docs of limit

---
 R/lazyframe-frame.R     | 13 +++----------
 man/lazyframe__head.Rd  |  5 ++++-
 man/lazyframe__limit.Rd | 22 ----------------------
 3 files changed, 7 insertions(+), 33 deletions(-)
 delete mode 100644 man/lazyframe__limit.Rd

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 0465d4a0..1d2fb9e9 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -757,6 +757,8 @@ lazyframe__slice <- function(offset, length = NULL) {
 
 #' Get the first `n` rows
 #'
+#' `$limit()` is an alias for `$head()`.
+#'
 #' @param n Number of rows to return.
 #' @inherit as_polars_lf return
 #' @examples
@@ -768,16 +770,7 @@ lazyframe__head <- function(n = 5) {
     wrap()
 }
 
-#' Get the first `n` rows
-#'
-#' Alias for [`<LazyFrame>$head()`][lazyframe__head].
-#'
-#' @inheritParams lazyframe__head
-#' @inherit as_polars_lf return
-#' @examples
-#' lf <- pl$LazyFrame(a = 1:6, b = 7:12)
-#' lf$limit()$collect()
-#' lf$limit(2)$collect()
+#' @rdname lazyframe__head
 lazyframe__limit <- function(n = 5) {
   wrap({
     self$head(n)
diff --git a/man/lazyframe__head.Rd b/man/lazyframe__head.Rd
index 3b202274..2cb69058 100644
--- a/man/lazyframe__head.Rd
+++ b/man/lazyframe__head.Rd
@@ -2,9 +2,12 @@
 % Please edit documentation in R/lazyframe-frame.R
 \name{lazyframe__head}
 \alias{lazyframe__head}
+\alias{lazyframe__limit}
 \title{Get the first \code{n} rows}
 \usage{
 lazyframe__head(n = 5)
+
+lazyframe__limit(n = 5)
 }
 \arguments{
 \item{n}{Number of rows to return.}
@@ -13,7 +16,7 @@ lazyframe__head(n = 5)
 A polars \link{LazyFrame}
 }
 \description{
-Get the first \code{n} rows
+\verb{$limit()} is an alias for \verb{$head()}.
 }
 \examples{
 lf <- pl$LazyFrame(a = 1:6, b = 7:12)
diff --git a/man/lazyframe__limit.Rd b/man/lazyframe__limit.Rd
deleted file mode 100644
index 8f323354..00000000
--- a/man/lazyframe__limit.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__limit}
-\alias{lazyframe__limit}
-\title{Get the first \code{n} rows}
-\usage{
-lazyframe__limit(n = 5)
-}
-\arguments{
-\item{n}{Number of rows to return.}
-}
-\value{
-A polars \link{LazyFrame}
-}
-\description{
-Alias for \code{\link[=lazyframe__head]{<LazyFrame>$head()}}.
-}
-\examples{
-lf <- pl$LazyFrame(a = 1:6, b = 7:12)
-lf$limit()$collect()
-lf$limit(2)$collect()
-}

From a63d6234ad67b3ea2e8114affc8175d9a22d95ae Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:19:17 +0100
Subject: [PATCH 04/23] only named args in profile()

---
 R/lazyframe-frame.R       | 75 +++++++++++++++++++++------------------
 man/lazyframe__profile.Rd |  4 +++
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 1d2fb9e9..dc2b10ae 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -1380,6 +1380,7 @@ lazyframe__rename <- function(..., .strict = TRUE) {
 #' materialized DataFrame and a DataFrame that contains profiling information
 #' of each node that is executed.
 #'
+#' @inheritParams rlang::check_dots_empty0
 #' @inheritParams lazyframe__collect
 #' @param show_plot Show a Gantt chart of the profiling result
 #' @param truncate_nodes Truncate the label lengths in the Gantt chart to this
@@ -1425,6 +1426,7 @@ lazyframe__rename <- function(..., .strict = TRUE) {
 #'   agg(pl$col(pl$Float64)$map_elements(r_func))$
 #'   profile()
 lazyframe__profile <- function(
+    ...,
     type_coercion = TRUE,
     predicate_pushdown = TRUE,
     projection_pushdown = TRUE,
@@ -1438,43 +1440,46 @@ lazyframe__profile <- function(
     collect_in_background = FALSE,
     show_plot = FALSE,
     truncate_nodes = 0) {
-  if (isTRUE(no_optimization)) {
-    predicate_pushdown <- FALSE
-    projection_pushdown <- FALSE
-    slice_pushdown <- FALSE
-    comm_subplan_elim <- FALSE
-    comm_subexpr_elim <- FALSE
-    cluster_with_columns <- FALSE
-  }
-
-  if (isTRUE(streaming)) {
-    comm_subplan_elim <- FALSE
-  }
-
-  lf <- self$`_ldf`$optimization_toggle(
-    type_coercion = type_coercion,
-    predicate_pushdown = predicate_pushdown,
-    projection_pushdown = projection_pushdown,
-    simplify_expression = simplify_expression,
-    slice_pushdown = slice_pushdown,
-    comm_subplan_elim = comm_subplan_elim,
-    comm_subexpr_elim = comm_subexpr_elim,
-    cluster_with_columns = cluster_with_columns,
-    streaming = streaming,
-    `_eager` = FALSE
-  )
-
-  out <- lapply(self$`_ldf`$profile(), \(x) {
-    x |>
-      .savvy_wrap_PlRDataFrame() |>
-      wrap()
-  })
+  wrap({
+    check_dots_empty0(...)
+    if (isTRUE(no_optimization)) {
+      predicate_pushdown <- FALSE
+      projection_pushdown <- FALSE
+      slice_pushdown <- FALSE
+      comm_subplan_elim <- FALSE
+      comm_subexpr_elim <- FALSE
+      cluster_with_columns <- FALSE
+    }
+
+    if (isTRUE(streaming)) {
+      comm_subplan_elim <- FALSE
+    }
+
+    lf <- self$`_ldf`$optimization_toggle(
+      type_coercion = type_coercion,
+      predicate_pushdown = predicate_pushdown,
+      projection_pushdown = projection_pushdown,
+      simplify_expression = simplify_expression,
+      slice_pushdown = slice_pushdown,
+      comm_subplan_elim = comm_subplan_elim,
+      comm_subexpr_elim = comm_subexpr_elim,
+      cluster_with_columns = cluster_with_columns,
+      streaming = streaming,
+      `_eager` = FALSE
+    )
 
-  if (isTRUE(show_plot)) {
-    out[["plot"]] <- make_profile_plot(out, truncate_nodes)
-  }
+    out <- lapply(self$`_ldf`$profile(), \(x) {
+      x |>
+        .savvy_wrap_PlRDataFrame() |>
+        wrap()
+    })
 
-  out
+    if (isTRUE(show_plot)) {
+      out[["plot"]] <- make_profile_plot(out, truncate_nodes)
+    }
+
+    out
+  })
 }
 
 #' Serialize the logical plan of this LazyFrame to a string in JSON format
diff --git a/man/lazyframe__profile.Rd b/man/lazyframe__profile.Rd
index 6182fb6d..d276e2fd 100644
--- a/man/lazyframe__profile.Rd
+++ b/man/lazyframe__profile.Rd
@@ -5,6 +5,7 @@
 \title{Collect and profile a lazy query.}
 \usage{
 lazyframe__profile(
+  ...,
   type_coercion = TRUE,
   predicate_pushdown = TRUE,
   projection_pushdown = TRUE,
@@ -21,6 +22,7 @@ lazyframe__profile(
 )
 
 lazyframe__profile(
+  ...,
   type_coercion = TRUE,
   predicate_pushdown = TRUE,
   projection_pushdown = TRUE,
@@ -37,6 +39,8 @@ lazyframe__profile(
 )
 }
 \arguments{
+\item{...}{Dots which should be empty.}
+
 \item{type_coercion}{A logical, indicats type coercion optimization.}
 
 \item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}

From ba5deb41ac120db7cddb7e876f9b4370f7109b48 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:20:23 +0100
Subject: [PATCH 05/23] wrap clone()

---
 R/lazyframe-frame.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index dc2b10ae..26512d55 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -1565,7 +1565,8 @@ lazyframe__explode <- function(...) {
 #' # now, the original LazyFrame doesn't get this attribute
 #' attributes(df1)
 lazyframe__clone <- function() {
-  self$`_ldf`$clone()
+  self$`_ldf`$clone() |>
+    wrap()
 }
 
 

From 9219ccdf7ae03ea3a51ddd6aa520c55c1798f952 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:24:33 +0100
Subject: [PATCH 06/23] remove with_context()

---
 R/000-wrappers.R                  |  7 ------
 R/lazyframe-frame.R               | 33 --------------------------
 man/lazyframe__with_context.Rd    | 39 -------------------------------
 src/init.c                        |  6 -----
 src/rust/api.h                    |  1 -
 src/rust/src/lazyframe/general.rs |  5 ----
 6 files changed, 91 deletions(-)
 delete mode 100644 man/lazyframe__with_context.Rd

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index 72bfead1..14e1ca38 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -3452,12 +3452,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   }
 }
 
-`PlRLazyFrame_with_context` <- function(self) {
-  function(`contexts`) {
-    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_context__impl, `self`, `contexts`))
-  }
-}
-
 `PlRLazyFrame_join_asof` <- function(self) {
   function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
     `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
@@ -3665,7 +3659,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   e$`select_seq` <- `PlRLazyFrame_select_seq`(ptr)
   e$`rolling` <- `PlRLazyFrame_rolling`(ptr)
   e$`group_by_dynamic` <- `PlRLazyFrame_group_by_dynamic`(ptr)
-  e$`with_context` <- `PlRLazyFrame_with_context`(ptr)
   e$`join_asof` <- `PlRLazyFrame_join_asof`(ptr)
   e$`join` <- `PlRLazyFrame_join`(ptr)
   e$`join_where` <- `PlRLazyFrame_join_where`(ptr)
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 26512d55..53830735 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -1601,39 +1601,6 @@ lazyframe__unnest <- function(...) {
   })
 }
 
-#' Add an external context to the computation graph
-#'
-#' This allows expressions to also access columns from DataFrames or LazyFrames
-#' that are not part of this one.
-#'
-#' @param other Data/LazyFrame to have access to. This can be a list of DataFrames
-#' and LazyFrames.
-#' @inherit as_polars_lf return
-#'
-#' @examples
-#' lf <- pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
-#' lf_other <- pl$LazyFrame(c = c("foo", "ham"))
-#'
-#' lf$with_context(lf_other)$select(
-#'   pl$col("b") + pl$col("c")$first()
-#' )$collect()
-#'
-#' # Fill nulls with the median from another lazyframe:
-#' train_lf <- pl$LazyFrame(
-#'   feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
-#' )
-#' test_lf <- pl$LazyFrame(
-#'   feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
-#' )
-#'
-#' test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
-#'   pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
-#' )$collect()
-lazyframe__with_context <- function(other) {
-  self$`_ldf`$with_context(other)
-}
-
-
 #' Create rolling groups based on a date/time or integer column
 #'
 #' @description
diff --git a/man/lazyframe__with_context.Rd b/man/lazyframe__with_context.Rd
deleted file mode 100644
index 4a47d8b4..00000000
--- a/man/lazyframe__with_context.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__with_context}
-\alias{lazyframe__with_context}
-\title{Add an external context to the computation graph}
-\usage{
-lazyframe__with_context(other)
-}
-\arguments{
-\item{other}{Data/LazyFrame to have access to. This can be a list of DataFrames
-and LazyFrames.}
-}
-\value{
-A polars \link{LazyFrame}
-}
-\description{
-This allows expressions to also access columns from DataFrames or LazyFrames
-that are not part of this one.
-}
-\examples{
-lf <- pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
-lf_other <- pl$LazyFrame(c = c("foo", "ham"))
-
-lf$with_context(lf_other)$select(
-  pl$col("b") + pl$col("c")$first()
-)$collect()
-
-# Fill nulls with the median from another lazyframe:
-train_lf <- pl$LazyFrame(
-  feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
-)
-test_lf <- pl$LazyFrame(
-  feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
-)
-
-test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
-  pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
-)$collect()
-}
diff --git a/src/init.c b/src/init.c
index d05a21b1..2ef1be85 100644
--- a/src/init.c
+++ b/src/init.c
@@ -2369,11 +2369,6 @@ SEXP savvy_PlRLazyFrame_group_by_dynamic__impl(SEXP self__, SEXP c_arg__index_co
     return handle_result(res);
 }
 
-SEXP savvy_PlRLazyFrame_with_context__impl(SEXP self__, SEXP c_arg__contexts) {
-    SEXP res = savvy_PlRLazyFrame_with_context__ffi(self__, c_arg__contexts);
-    return handle_result(res);
-}
-
 SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
     SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
     return handle_result(res);
@@ -3198,7 +3193,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_select_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_select_seq__impl, 2},
     {"savvy_PlRLazyFrame_rolling__impl", (DL_FUNC) &savvy_PlRLazyFrame_rolling__impl, 6},
     {"savvy_PlRLazyFrame_group_by_dynamic__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by_dynamic__impl, 10},
-    {"savvy_PlRLazyFrame_with_context__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_context__impl, 2},
     {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 13},
     {"savvy_PlRLazyFrame_join__impl", (DL_FUNC) &savvy_PlRLazyFrame_join__impl, 11},
     {"savvy_PlRLazyFrame_join_where__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_where__impl, 4},
diff --git a/src/rust/api.h b/src/rust/api.h
index f51c8bfe..4bf76b74 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -477,7 +477,6 @@ SEXP savvy_PlRLazyFrame_serialize__ffi(SEXP self__);
 SEXP savvy_PlRLazyFrame_select_seq__ffi(SEXP self__, SEXP c_arg__exprs);
 SEXP savvy_PlRLazyFrame_rolling__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__closed, SEXP c_arg__by);
 SEXP savvy_PlRLazyFrame_group_by_dynamic__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__every, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__label, SEXP c_arg__include_boundaries, SEXP c_arg__closed, SEXP c_arg__group_by, SEXP c_arg__start_by);
-SEXP savvy_PlRLazyFrame_with_context__ffi(SEXP self__, SEXP c_arg__contexts);
 SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
 SEXP savvy_PlRLazyFrame_join__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__join_nulls, SEXP c_arg__how, SEXP c_arg__suffix, SEXP c_arg__validate, SEXP c_arg__coalesce);
 SEXP savvy_PlRLazyFrame_join_where__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__predicates, SEXP c_arg__suffix);
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index fed0dd1d..64d2f4cc 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -618,11 +618,6 @@ impl PlRLazyFrame {
         Ok(PlRLazyGroupBy { lgb: Some(lazy_gb) })
     }
 
-    fn with_context(&self, contexts: ListSexp) -> Result<Self> {
-        let contexts = <Wrap<Vec<LazyFrame>>>::try_from(contexts)?.0;
-        Ok(self.ldf.clone().with_context(contexts).into())
-    }
-
     fn join_asof(
         &self,
         other: &PlRLazyFrame,

From a1b5f162f0e2ee3a50fb683f5e1b4a78991f8519 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:31:27 +0100
Subject: [PATCH 07/23] adjust $clear()

---
 R/lazyframe-frame.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 53830735..df9f8d05 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -1921,7 +1921,12 @@ lazyframe__to_dot <- function(
 #'
 #' df$clear(n = 5)
 lazyframe__clear <- function(n = 0) {
-  pl$DataFrame(schema = self$schema)$clear(n)$lazy()
+  cols <- names(self)
+  dat <- vector("list", length(cols))
+  names(dat) <- cols
+  pl$DataFrame(!!!dat, .schema_overrides = self$collect_schema())$
+    clear(n)$
+    lazy()
 }
 
 #' Take every nth row in the LazyFrame

From 10710ca3fbdb5795ddd6e6c56eff7f3dd95cbc67 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:35:45 +0100
Subject: [PATCH 08/23] inherit from args_dots_empty in docs

---
 R/lazyframe-frame.R                | 36 +++++++++++++++---------------
 man/lazyframe__explain.Rd          |  2 +-
 man/lazyframe__group_by_dynamic.Rd |  2 +-
 man/lazyframe__join.Rd             |  2 +-
 man/lazyframe__join_asof.Rd        |  2 +-
 man/lazyframe__profile.Rd          |  2 +-
 man/lazyframe__rolling.Rd          |  2 +-
 man/lazyframe__set_sorted.Rd       |  2 +-
 man/lazyframe__shift.Rd            |  2 +-
 man/lazyframe__sink_csv.Rd         |  2 +-
 man/lazyframe__sink_ipc.Rd         |  2 +-
 man/lazyframe__sink_ndjson.Rd      |  2 +-
 man/lazyframe__sink_parquet.Rd     |  2 +-
 man/lazyframe__top_k.Rd            |  2 +-
 man/lazyframe__unique.Rd           |  2 +-
 man/lazyframe__unpivot.Rd          |  2 +-
 16 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index df9f8d05..c9511b0b 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -258,7 +258,7 @@ lazyframe__collect <- function(
 #' DataFrame and a DataFrame that contains profiling information of each node
 #' that is executed.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @inheritParams lazyframe__collect
 #' @param show_plot Show a Gantt chart of the profiling result
 #' @param truncate_nodes Truncate the label lengths in the Gantt chart to this
@@ -366,7 +366,7 @@ lazyframe__profile <- function(
 #' One classic example is the predicate pushdown, which applies the filter as
 #' early as possible (i.e. at the bottom of the plan).
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @inheritParams lazyframe__collect
 #' @param format The format to use for displaying the logical plan. Must be
 #' either `"plain"` (default) or `"tree"`.
@@ -954,7 +954,7 @@ lazyframe__fill_null <- function(fill_value) {
 
 #' Shift values by the given number of indices
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param n Number of indices to shift forward. If a negative value is passed,
 #' values are shifted in the opposite direction instead.
 #' @param fill_value Fill the resulting null values with this value. Accepts
@@ -1058,7 +1058,7 @@ lazyframe__drop_nulls <- function(subset = NULL) {
 
 #' Drop duplicate rows from this DataFrame
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param subset Column name(s) or selector(s), to consider when identifying
 #' duplicate rows. If `NULL` (default), use all columns.
 #' @param keep Which of the duplicate rows to keep. Must be one of:
@@ -1105,7 +1105,7 @@ lazyframe__unique <- function(
 #' observations based on matching observations, for example with `how =
 #' "inner"`).
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param other LazyFrame to join with.
 #' @param on Either a vector of column names or a list of expressions and/or
 #'   strings. Use `left_on` and `right_on` if the column names to match on are
@@ -1294,7 +1294,7 @@ lazyframe__join_where <- function(
 #' considered measured variables (`on`), are “unpivoted” to the row axis
 #' leaving just two non-identifier columns, "variable" and "value".
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param on Values to use as identifier variables. If `value_vars` is
 #' empty all columns that are not in `id_vars` will be used.
 #' @param index Columns to use as identifier variables.
@@ -1380,7 +1380,7 @@ lazyframe__rename <- function(..., .strict = TRUE) {
 #' materialized DataFrame and a DataFrame that contains profiling information
 #' of each node that is executed.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @inheritParams lazyframe__collect
 #' @param show_plot Show a Gantt chart of the profiling result
 #' @param truncate_nodes Truncate the label lengths in the Gantt chart to this
@@ -1621,7 +1621,7 @@ lazyframe__unnest <- function(...) {
 #' * …
 #' * `(t_n + offset, t_n + offset + period]`
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @inheritParams lazyframe__group_by_dynamic
 #' @param period Length of the window - must be non-negative.
 #' @param offset Offset of the window. Default is `-period`.
@@ -1689,7 +1689,7 @@ lazyframe__rolling <- function(
 #' where `start` is determined by `start_by`, `offset`, `every`, and the
 #' earliest datapoint. See the `start_by` argument description for details.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param index_column Column used to group based on the time window. Often of
 #' type Date/Datetime. This column must be sorted in ascending order (or, if
 #' `group_by` is specified, then it must be sorted in ascending order within
@@ -1978,7 +1978,7 @@ lazyframe__null_count <- function() {
 #' value of `reverse`. The output is not guaranteed to be in any particular
 #' order, call `sort()` after this function if you wish the output to be sorted.
 #'
-#' @inheritParams rlang::check_dots_empty
+#' @inheritParams rlang::arg_dots_empty
 #' @param k Number of rows to return.
 #' @param by Column(s) used to determine the bottom rows. Accepts expression
 #' input. Strings are parsed as column names.
@@ -2012,7 +2012,7 @@ lazyframe__bottom_k <- function(k, ..., by, reverse = FALSE) {
 #' Return the `k` largest rows
 #'
 #' @inherit lazyframe__bottom_k description params
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param reverse Consider the `k` smallest elements of the `by` column(s)
 #' (instead of the `k` largest). This can be specified per column by passing a
 #' sequence of booleans.
@@ -2093,7 +2093,7 @@ lazyframe__merge_sorted <- function(other, key) {
 #' This can speed up future operations, but it can lead to incorrect results if
 #' the data is **not** sorted! Use with care!
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param column Columns that are sorted.
 #' @param descending Whether the columns are sorted in descending order.
 #'
@@ -2111,7 +2111,7 @@ lazyframe__set_sorted <- function(column, ..., descending = FALSE) {
 #' Using this function can have a negative effect on query performance. This
 #' may, for instance, block predicate pushdown optimization.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param name Name of the index column.
 #' @param offset Start the index at this offset. Cannot be negative.
 #'
@@ -2140,7 +2140,7 @@ lazyframe__with_row_index <- function(name = "index", offset = 0) {
 #'
 #' This allows streaming results that are larger than RAM to be written to disk.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param path A character. File path to which the file should be written.
 #' @param compression The compression method. Must be one of:
 #' * `"lz4"`: fast compression/decompression.
@@ -2252,7 +2252,7 @@ lazyframe__sink_parquet <- function(
 #' Evaluate the query in streaming mode and write to an IPC file
 #'
 #' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param compression `NULL` or one of:
 #' * `"uncompressed"`: same as `NULL`.
 #' * `"lz4"`: fast compression/decompression.
@@ -2324,7 +2324,7 @@ lazyframe__sink_ipc <- function(
 #' Evaluate the query in streaming mode and write to a CSV file
 #'
 #' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param include_bom Logical, whether to include UTF-8 BOM in the CSV output.
 #' @param include_header Logical, hether to include header in the CSV output.
 #' @param separator Separate CSV fields with this symbol.
@@ -2446,7 +2446,7 @@ lazyframe__sink_csv <- function(
 #' Evaluate the query in streaming mode and write to an NDJSON file
 #'
 #' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #'
 #' @examples
 #' # sink table 'mtcars' from mem to NDJSON
@@ -2505,7 +2505,7 @@ lazyframe__sink_ndjson <- function(
 #' This is similar to a left-join except that we match on nearest key rather
 #' than equal keys. Both frames must be sorted by the `asof_join` key.
 #'
-#' @inheritParams rlang::check_dots_empty0
+#' @inheritParams rlang::args_dots_empty
 #' @param other LazyFrame to join with.
 #' @inheritParams dataframe__join
 #' @param by Join on these columns before performing asof join. Either a vector
diff --git a/man/lazyframe__explain.Rd b/man/lazyframe__explain.Rd
index 582869d2..3982a52a 100644
--- a/man/lazyframe__explain.Rd
+++ b/man/lazyframe__explain.Rd
@@ -20,7 +20,7 @@ lazyframe__explain(
 )
 }
 \arguments{
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{format}{The format to use for displaying the logical plan. Must be
 either \code{"plain"} (default) or \code{"tree"}.}
diff --git a/man/lazyframe__group_by_dynamic.Rd b/man/lazyframe__group_by_dynamic.Rd
index 624890ad..9c7301f5 100644
--- a/man/lazyframe__group_by_dynamic.Rd
+++ b/man/lazyframe__group_by_dynamic.Rd
@@ -26,7 +26,7 @@ In case of a dynamic group by on indices, the data type needs to be either
 Int32 or In64. Note that Int32 gets temporarily cast to Int64, so if
 performance matters, use an Int64 column.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{every}{Interval of the window.}
 
diff --git a/man/lazyframe__join.Rd b/man/lazyframe__join.Rd
index 653d5d65..46074ac3 100644
--- a/man/lazyframe__join.Rd
+++ b/man/lazyframe__join.Rd
@@ -42,7 +42,7 @@ table.
 table.
 }}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{left_on, right_on}{Same as \code{on} but only for the left or the right
 DataFrame. They must have the same length.}
diff --git a/man/lazyframe__join_asof.Rd b/man/lazyframe__join_asof.Rd
index fe3cacd3..5c9575d4 100644
--- a/man/lazyframe__join_asof.Rd
+++ b/man/lazyframe__join_asof.Rd
@@ -24,7 +24,7 @@ lazyframe__join_asof(
 \arguments{
 \item{other}{LazyFrame to join with.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{by_left, by_right}{Same as \code{by} but only for the left or the right
 table. They must have the same length.}
diff --git a/man/lazyframe__profile.Rd b/man/lazyframe__profile.Rd
index d276e2fd..6f27320c 100644
--- a/man/lazyframe__profile.Rd
+++ b/man/lazyframe__profile.Rd
@@ -39,7 +39,7 @@ lazyframe__profile(
 )
 }
 \arguments{
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{type_coercion}{A logical, indicats type coercion optimization.}
 
diff --git a/man/lazyframe__rolling.Rd b/man/lazyframe__rolling.Rd
index 62768257..42d2a21b 100644
--- a/man/lazyframe__rolling.Rd
+++ b/man/lazyframe__rolling.Rd
@@ -22,7 +22,7 @@ In case of a dynamic group by on indices, the data type needs to be either
 Int32 or In64. Note that Int32 gets temporarily cast to Int64, so if
 performance matters, use an Int64 column.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{period}{Length of the window - must be non-negative.}
 
diff --git a/man/lazyframe__set_sorted.Rd b/man/lazyframe__set_sorted.Rd
index bf53468a..61374b10 100644
--- a/man/lazyframe__set_sorted.Rd
+++ b/man/lazyframe__set_sorted.Rd
@@ -9,7 +9,7 @@ lazyframe__set_sorted(column, ..., descending = FALSE)
 \arguments{
 \item{column}{Columns that are sorted.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{descending}{Whether the columns are sorted in descending order.}
 }
diff --git a/man/lazyframe__shift.Rd b/man/lazyframe__shift.Rd
index e4fafbb2..82b4e53f 100644
--- a/man/lazyframe__shift.Rd
+++ b/man/lazyframe__shift.Rd
@@ -10,7 +10,7 @@ lazyframe__shift(n = 1, ..., fill_value = NULL)
 \item{n}{Number of indices to shift forward. If a negative value is passed,
 values are shifted in the opposite direction instead.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{fill_value}{Fill the resulting null values with this value. Accepts
 expression input. Non-expression inputs are parsed as literals.}
diff --git a/man/lazyframe__sink_csv.Rd b/man/lazyframe__sink_csv.Rd
index 98202d20..7810747f 100644
--- a/man/lazyframe__sink_csv.Rd
+++ b/man/lazyframe__sink_csv.Rd
@@ -33,7 +33,7 @@ lazyframe__sink_csv(
 \arguments{
 \item{path}{A character. File path to which the file should be written.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{include_bom}{Logical, whether to include UTF-8 BOM in the CSV output.}
 
diff --git a/man/lazyframe__sink_ipc.Rd b/man/lazyframe__sink_ipc.Rd
index c9c509a9..2910291f 100644
--- a/man/lazyframe__sink_ipc.Rd
+++ b/man/lazyframe__sink_ipc.Rd
@@ -22,7 +22,7 @@ lazyframe__sink_ipc(
 \arguments{
 \item{path}{A character. File path to which the file should be written.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{compression}{\code{NULL} or one of:
 \itemize{
diff --git a/man/lazyframe__sink_ndjson.Rd b/man/lazyframe__sink_ndjson.Rd
index 218c8e2e..11ad024f 100644
--- a/man/lazyframe__sink_ndjson.Rd
+++ b/man/lazyframe__sink_ndjson.Rd
@@ -21,7 +21,7 @@ lazyframe__sink_ndjson(
 \arguments{
 \item{path}{A character. File path to which the file should be written.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{maintain_order}{Maintain the order in which data is processed. Setting
 this to \code{FALSE} will be slightly faster.}
diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd
index 2e04bd4a..fb35e05a 100644
--- a/man/lazyframe__sink_parquet.Rd
+++ b/man/lazyframe__sink_parquet.Rd
@@ -26,7 +26,7 @@ lazyframe__sink_parquet(
 \arguments{
 \item{path}{A character. File path to which the file should be written.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{compression}{The compression method. Must be one of:
 \itemize{
diff --git a/man/lazyframe__top_k.Rd b/man/lazyframe__top_k.Rd
index 60e13e1f..da2b5b0b 100644
--- a/man/lazyframe__top_k.Rd
+++ b/man/lazyframe__top_k.Rd
@@ -9,7 +9,7 @@ lazyframe__top_k(k, ..., by, reverse = FALSE)
 \arguments{
 \item{k}{Number of rows to return.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{by}{Column(s) used to determine the bottom rows. Accepts expression
 input. Strings are parsed as column names.}
diff --git a/man/lazyframe__unique.Rd b/man/lazyframe__unique.Rd
index 22d160d3..b77072fc 100644
--- a/man/lazyframe__unique.Rd
+++ b/man/lazyframe__unique.Rd
@@ -15,7 +15,7 @@ lazyframe__unique(
 \item{subset}{Column name(s) or selector(s), to consider when identifying
 duplicate rows. If \code{NULL} (default), use all columns.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{keep}{Which of the duplicate rows to keep. Must be one of:
 \itemize{
diff --git a/man/lazyframe__unpivot.Rd b/man/lazyframe__unpivot.Rd
index 5c37f657..b9d5f9a1 100644
--- a/man/lazyframe__unpivot.Rd
+++ b/man/lazyframe__unpivot.Rd
@@ -16,7 +16,7 @@ lazyframe__unpivot(
 \item{on}{Values to use as identifier variables. If \code{value_vars} is
 empty all columns that are not in \code{id_vars} will be used.}
 
-\item{...}{Dots which should be empty.}
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{index}{Columns to use as identifier variables.}
 

From 91e2823a4531f875e65cab8d9eeac70b83080c1e Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:39:49 +0100
Subject: [PATCH 09/23] unnecessary change in rust

---
 src/rust/src/lazyframe/general.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index 64d2f4cc..16f609cc 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -1,5 +1,6 @@
-use super::*;
-use crate::{PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, RPolarsErr};
+use crate::{
+    prelude::*, PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, RPolarsErr,
+};
 use polars::io::{HiveOptions, RowIndex};
 use savvy::{
     savvy, ListSexp, LogicalSexp, NumericScalar, OwnedListSexp, OwnedStringSexp, Result, Sexp,

From 0f11c25ee4afe028000a737732bdd29ba8d657c4 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Sun, 15 Dec 2024 17:40:44 +0100
Subject: [PATCH 10/23] same

---
 src/rust/src/lazyframe/general.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index 16f609cc..f1264a9d 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -1029,7 +1029,7 @@ impl PlRLazyFrame {
         storage_options: Option<StringSexp>,
         file_cache_ttl: Option<NumericScalar>,
         include_file_paths: Option<&str>,
-    ) -> Result<Self> {
+    ) -> Result<PlRLazyFrame> {
         let source = source
             .to_vec()
             .iter()

From 8d283f7bfd8cbd8433be4a058650e3b688849a48 Mon Sep 17 00:00:00 2001
From: eitsupi <ts1s1andn@gmail.com>
Date: Mon, 16 Dec 2024 16:03:05 +0000
Subject: [PATCH 11/23] fix: remove alternative way to convert R objects

---
 R/000-wrappers.R                  |  1 +
 R/lazyframe-frame.R               | 16 ++++++++++++++--
 src/rust/src/conversion/mod.rs    | 21 +--------------------
 src/rust/src/lazyframe/general.rs | 15 +++++++++++----
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index 14e1ca38..06462887 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -3457,6 +3457,7 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
     `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
     `left_on` <- .savvy_extract_ptr(`left_on`, "PlRExpr")
     `right_on` <- .savvy_extract_ptr(`right_on`, "PlRExpr")
+    `tolerance` <- .savvy_extract_ptr(`tolerance`, "PlRSeries")
     .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
   }
 }
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index c9511b0b..f413a523 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -2628,8 +2628,20 @@ lazyframe__join_asof <- function(
     strategy <- arg_match0(strategy, values = c("backward", "forward", "nearest"))
     if (!is.null(by)) by_left <- by_right <- by
     if (!is.null(on)) left_on <- right_on <- on
-    tolerance_str <- if (is.character(tolerance)) tolerance else NULL
-    tolerance_num <- if (!is.character(tolerance)) tolerance else NULL
+
+    tolerance_str <- NULL
+    tolerance_num <- NULL
+    if (is_string(tolerance)) {
+      tolerance_str <- tolerance
+    } else if (!is.null(tolerance)) {
+      # TODO: duration string conversion support
+      series <- as_polars_series(tolerance)
+      if (series$len() == 1L) {
+        tolerance_num <- series$`_s`
+      } else {
+        abort("`tolerance` must be one of NULL, a single string, or an R object that can be converted to a Polars Series of length 1.")
+      }
+    }
 
     self$`_ldf`$join_asof(
       other = other$`_ldf`,
diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index eddfefb4..9774906f 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -4,7 +4,7 @@ use crate::prelude::*;
 use crate::{PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRSeries, RPolarsErr};
 use polars::prelude::cloud::CloudOptions;
 use polars::series::ops::NullBehavior;
-use savvy::{ListSexp, NumericScalar, NumericSexp, NumericTypedSexp, Sexp, StringSexp, TypedSexp};
+use savvy::{ListSexp, NumericScalar, NumericSexp, NumericTypedSexp, StringSexp, TypedSexp};
 use search_sorted::SearchSortedSide;
 pub mod base_date;
 mod chunked_array;
@@ -33,25 +33,6 @@ impl<T> From<T> for Wrap<T> {
     }
 }
 
-impl TryFrom<Sexp> for Wrap<AnyValue<'_>> {
-    type Error = String;
-    fn try_from(obj: Sexp) -> Result<Self, String> {
-        let typed = obj.into_typed();
-        let out = match typed {
-            TypedSexp::Integer(x) => AnyValue::Int64(*(x.to_vec().first().unwrap()) as i64),
-            TypedSexp::Real(x) => AnyValue::Float64(*(x.to_vec().first().unwrap())),
-            TypedSexp::Logical(x) => AnyValue::Boolean(*(x.to_vec().first().unwrap())),
-            TypedSexp::String(x) => {
-                let val = x.to_vec();
-                AnyValue::StringOwned((*val.first().unwrap()).into())
-            }
-            TypedSexp::Null(_) => AnyValue::Null,
-            _ => return Err("Cannot cast to AnyValue".to_string()),
-        };
-        Ok(Wrap(out))
-    }
-}
-
 impl TryFrom<&str> for PlRDataType {
     type Error = String;
 
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index f1264a9d..d5615d29 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -1,5 +1,6 @@
 use crate::{
-    prelude::*, PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, RPolarsErr,
+    prelude::*, PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, PlRSeries,
+    RPolarsErr,
 };
 use polars::io::{HiveOptions, RowIndex};
 use savvy::{
@@ -631,7 +632,7 @@ impl PlRLazyFrame {
         strategy: &str,
         left_by: Option<StringSexp>,
         right_by: Option<StringSexp>,
-        tolerance: Option<Sexp>,
+        tolerance: Option<&PlRSeries>,
         tolerance_str: Option<&str>,
     ) -> Result<Self> {
         let coalesce = if coalesce {
@@ -647,7 +648,13 @@ impl PlRLazyFrame {
         let left_by = left_by.map(|x| x.to_vec().into_iter().map(|y| y.into()).collect());
         let right_by = right_by.map(|x| x.to_vec().into_iter().map(|y| y.into()).collect());
         let tolerance = match tolerance {
-            Some(x) => Some(<Wrap<AnyValue<'_>>>::try_from(x)?.0),
+            Some(x) => Some(
+                x.series
+                    .clone()
+                    .get(0)
+                    .map_err(RPolarsErr::from)?
+                    .into_static(),
+            ),
             None => None,
         };
         Ok(ldf
@@ -662,7 +669,7 @@ impl PlRLazyFrame {
                 strategy,
                 left_by,
                 right_by,
-                tolerance: tolerance.map(|t| t.into_static()),
+                tolerance,
                 tolerance_str: tolerance_str.map(|s| s.into()),
             }))
             .suffix(suffix)

From 29a8cadc4f5fa341e6f3abd10c6539dbf3947621 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Tue, 17 Dec 2024 12:47:11 +0100
Subject: [PATCH 12/23] remove deserialize_lf()

---
 R/000-wrappers.R                |  5 -----
 R/lazyframe-frame.R             | 17 ++---------------
 man/pl.Rd                       |  2 +-
 man/pl__deserialize_lf.Rd       | 22 ----------------------
 src/init.c                      |  6 ------
 src/rust/api.h                  |  1 -
 src/rust/src/lazyframe/mod.rs   |  1 -
 src/rust/src/lazyframe/serde.rs | 12 ------------
 8 files changed, 3 insertions(+), 63 deletions(-)
 delete mode 100644 man/pl__deserialize_lf.Rd
 delete mode 100644 src/rust/src/lazyframe/serde.rs

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index 06462887..d0edd968 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -285,11 +285,6 @@ NULL
   .savvy_wrap_PlRWhen(.Call(savvy_when__impl, `condition`))
 }
 
-
-`deserialize_lf` <- function(`json`) {
-  .savvy_wrap_PlRLazyFrame(.Call(savvy_deserialize_lf__impl, `json`))
-}
-
 ### wrapper functions for PlRChainedThen
 
 `PlRChainedThen_when` <- function(self) {
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index f413a523..c581e0be 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -1494,21 +1494,6 @@ lazyframe__serialize <- function() {
   })
 }
 
-#' Read a logical plan from a file to construct a LazyFrame
-#'
-#' @param source String containing the LazyFrame logical plan in JSON format.
-#'
-#' @return A character value
-#' @examples
-#' lf <- pl$LazyFrame(a = 1:3)$sum()
-#' ser <- lf$serialize()
-#' pl$deserialize_lf(ser)
-pl__deserialize_lf <- function(source) {
-  wrap({
-    deserialize_lf(source)
-  })
-}
-
 #' Explode the DataFrame to long format by exploding the given columns
 #'
 #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Column names, expressions, or
@@ -2659,3 +2644,5 @@ lazyframe__join_asof <- function(
     )
   })
 }
+
+# TODO-REWRITE: implement $deserialize() for LazyFrame
diff --git a/man/pl.Rd b/man/pl.Rd
index 33e6b48d..327dee16 100644
--- a/man/pl.Rd
+++ b/man/pl.Rd
@@ -5,7 +5,7 @@
 \alias{pl}
 \title{Polars top-level function namespace}
 \format{
-An object of class \code{polars_object} of length 75.
+An object of class \code{polars_object} of length 74.
 }
 \usage{
 pl
diff --git a/man/pl__deserialize_lf.Rd b/man/pl__deserialize_lf.Rd
deleted file mode 100644
index 0d71fb6f..00000000
--- a/man/pl__deserialize_lf.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{pl__deserialize_lf}
-\alias{pl__deserialize_lf}
-\title{Read a logical plan from a file to construct a LazyFrame}
-\usage{
-pl__deserialize_lf(source)
-}
-\arguments{
-\item{source}{String containing the LazyFrame logical plan in JSON format.}
-}
-\value{
-A character value
-}
-\description{
-Read a logical plan from a file to construct a LazyFrame
-}
-\examples{
-lf <- pl$LazyFrame(a = 1:3)$sum()
-ser <- lf$serialize()
-pl$deserialize_lf(ser)
-}
diff --git a/src/init.c b/src/init.c
index 2ef1be85..3271e29c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -244,11 +244,6 @@ SEXP savvy_when__impl(SEXP c_arg__condition) {
     return handle_result(res);
 }
 
-SEXP savvy_deserialize_lf__impl(SEXP c_arg__json) {
-    SEXP res = savvy_deserialize_lf__ffi(c_arg__json);
-    return handle_result(res);
-}
-
 SEXP savvy_PlRChainedThen_when__impl(SEXP self__, SEXP c_arg__condition) {
     SEXP res = savvy_PlRChainedThen_when__ffi(self__, c_arg__condition);
     return handle_result(res);
@@ -2768,7 +2763,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_time_range__impl", (DL_FUNC) &savvy_time_range__impl, 4},
     {"savvy_time_ranges__impl", (DL_FUNC) &savvy_time_ranges__impl, 4},
     {"savvy_when__impl", (DL_FUNC) &savvy_when__impl, 1},
-    {"savvy_deserialize_lf__impl", (DL_FUNC) &savvy_deserialize_lf__impl, 1},
     {"savvy_PlRChainedThen_when__impl", (DL_FUNC) &savvy_PlRChainedThen_when__impl, 2},
     {"savvy_PlRChainedThen_otherwise__impl", (DL_FUNC) &savvy_PlRChainedThen_otherwise__impl, 2},
     {"savvy_PlRChainedWhen_then__impl", (DL_FUNC) &savvy_PlRChainedWhen_then__impl, 2},
diff --git a/src/rust/api.h b/src/rust/api.h
index 4bf76b74..e53c3bc2 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -40,7 +40,6 @@ SEXP savvy_datetime_ranges__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__
 SEXP savvy_time_range__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__every, SEXP c_arg__closed);
 SEXP savvy_time_ranges__ffi(SEXP c_arg__start, SEXP c_arg__end, SEXP c_arg__every, SEXP c_arg__closed);
 SEXP savvy_when__ffi(SEXP c_arg__condition);
-SEXP savvy_deserialize_lf__ffi(SEXP c_arg__json);
 
 // methods and associated functions for PlRChainedThen
 SEXP savvy_PlRChainedThen_when__ffi(SEXP self__, SEXP c_arg__condition);
diff --git a/src/rust/src/lazyframe/mod.rs b/src/rust/src/lazyframe/mod.rs
index 0afc04c7..d3615cfc 100644
--- a/src/rust/src/lazyframe/mod.rs
+++ b/src/rust/src/lazyframe/mod.rs
@@ -1,5 +1,4 @@
 mod general;
-mod serde;
 
 use crate::prelude::*;
 use savvy::{savvy, EnvironmentSexp};
diff --git a/src/rust/src/lazyframe/serde.rs b/src/rust/src/lazyframe/serde.rs
deleted file mode 100644
index feefb4f8..00000000
--- a/src/rust/src/lazyframe/serde.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-use crate::{prelude::*, PlRLazyFrame, RPolarsErr};
-use savvy::{savvy, Result};
-
-#[savvy]
-fn deserialize_lf(json: &str) -> Result<PlRLazyFrame> {
-    let lp = serde_json::from_str::<DslPlan>(json).map_err(|_| {
-        let msg = "could not deserialize input into a LazyFrame";
-        RPolarsErr::Other(msg.to_string())
-    })?;
-    let out = LazyFrame::from(lp);
-    Ok(<PlRLazyFrame>::from(out))
-}

From fb21d782b6f9c78de7bc7f234e607c10b42a0ca3 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Thu, 16 Jan 2025 14:39:55 +0100
Subject: [PATCH 13/23] try partial fix wasm32

---
 src/rust/src/conversion/mod.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index 94f14be1..65c1002d 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -698,6 +698,7 @@ impl TryFrom<&str> for Wrap<StartBy> {
     }
 }
 
+#[cfg(not(target_arch = "wasm32"))]
 pub(crate) fn parse_parquet_compression(
     compression: &str,
     compression_level: Option<i32>,
@@ -736,6 +737,7 @@ pub(crate) fn parse_parquet_compression(
     Ok(parsed)
 }
 
+#[cfg(not(target_arch = "wasm32"))]
 impl TryFrom<ListSexp> for Wrap<StatisticsOptions> {
     type Error = String;
 

From c2cd38646fc6b33b8f6d3bcc4dd9f3792339bba8 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Thu, 23 Jan 2025 23:20:53 +0100
Subject: [PATCH 14/23] change parquet statistics handling

---
 R/lazyframe-frame.R               | 34 +++++++++++---
 R/utils-various.R                 | 73 ++++++++++---------------------
 src/rust/src/conversion/mod.rs    | 29 ------------
 src/rust/src/lazyframe/general.rs | 13 +++++-
 4 files changed, 61 insertions(+), 88 deletions(-)

diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index c581e0be..b3abdd3e 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -2146,10 +2146,8 @@ lazyframe__with_row_index <- function(name = "index", offset = 0) {
 #' headers. Possible values:
 #' * `TRUE`: enable default set of statistics (default)
 #' * `FALSE`: disable all statistics
-#' * `"full"`: calculate and write all available statistics.
-#' * A named list where all values must be `TRUE` or `FALSE`, e.g.
-#'   `list(min = TRUE, max = FALSE)`. Statistics available are `"min"`, `"max"`,
-#'   `"distinct_count"`, `"null_count"`.
+#' * A list created via [parquet_statistics()] to specify which statistics to
+#'   include.
 #' @param row_group_size Size of the row groups in number of rows. If `NULL`
 #' (default), the chunks of the DataFrame are used. Writing in smaller chunks
 #' may reduce memory pressure and improve writing speeds.
@@ -2216,13 +2214,37 @@ lazyframe__sink_parquet <- function(
       `_eager` = FALSE
     )
 
-    statistics <- translate_statistics(statistics)
+    if (isTRUE(statistics)) {
+      statistics <- parquet_statistics(
+        min = TRUE,
+        max = TRUE,
+        distinct_count = TRUE,
+        null_count = TRUE
+      )
+    } else if (isFALSE(statistics)) {
+      statistics <- parquet_statistics(
+        min = FALSE,
+        max = FALSE,
+        distinct_count = FALSE,
+        null_count = FALSE
+      )
+    }
+    statistics <- if (!inherits(statistics, "polars_parquet_statistics")) {
+      abort("`statistics` must be TRUE, FALSE, or a call to `parquet_statistics()`.")
+    }
+    stat_min <- statistics[["min"]]
+    stat_max <- statistics[["max"]]
+    stat_distinct_count <- statistics[["distinct_count"]]
+    stat_null_count <- statistics[["null_count"]]
 
     lf$sink_parquet(
       path = path,
       compression = compression,
       compression_level = compression_level,
-      statistics = statistics,
+      stat_min = stat_min,
+      stat_max = stat_max,
+      stat_null_count = stat_null_count,
+      stat_distinct_count = stat_distinct_count,
       row_group_size = row_group_size,
       data_page_size = data_page_size,
       maintain_order = maintain_order,
diff --git a/R/utils-various.R b/R/utils-various.R
index c62c2bfe..59cd4743 100644
--- a/R/utils-various.R
+++ b/R/utils-various.R
@@ -77,55 +77,26 @@ make_profile_plot <- function(data, truncate_nodes) {
   plot
 }
 
-#' @noRd
-translate_statistics <- function(statistics, call = caller_env()) {
-  if (length(statistics) != 1 && !is.list(statistics)) {
-    abort("`statistics` must be of length 1.", call = call)
-  }
-  if (is.logical(statistics)) {
-    if (isTRUE(statistics)) {
-      statistics <- list(
-        min = TRUE,
-        max = TRUE,
-        distinct_count = FALSE,
-        null_count = TRUE
-      )
-    } else {
-      statistics <- list(
-        min = FALSE,
-        max = FALSE,
-        distinct_count = FALSE,
-        null_count = FALSE
-      )
-    }
-  } else if (is.character(statistics)) {
-    if (statistics == "full") {
-      statistics <- list(
-        min = TRUE,
-        max = TRUE,
-        distinct_count = TRUE,
-        null_count = TRUE
-      )
-    } else {
-      abort("`statistics` must be TRUE/FALSE, \"full\", or a named list.", call = call)
-    }
-  } else if (is.list(statistics)) {
-    default <- list(
-      min = TRUE,
-      max = TRUE,
-      distinct_count = FALSE,
-      null_count = TRUE
-    )
-    statistics <- utils::modifyList(default, statistics)
-    nms <- names(statistics)
-    invalid <- nms[!nms %in% c("min", "max", "distinct_count", "null_count")]
-    if (length(invalid) > 0) {
-      msg <- paste0("`", invalid, "`", collapse = ", ")
-      abort(
-        paste0("In `statistics`,", msg, "are not valid keys."),
-        call = call
-      )
-    }
-  }
-  statistics
+#' Prepare statistics for writing to Parquet file
+#'
+#' @param min Include stats on the minimum values in the column.
+#' @param max Include stats on the maximum values in the column.
+#' @param distinct_count Include stats on the number of distinct values in the
+#' column.
+#' @param null_count Include stats on the number of null values in the column.
+#'
+#' @export
+parquet_statistics <- function(
+    min = TRUE,
+    max = TRUE,
+    distinct_count = TRUE,
+    null_count = TRUE) {
+  out <- list(
+    min = min,
+    max = max,
+    distinct_count = distinct_count,
+    null_count = null_count
+  )
+  class(out) <- "polars_parquet_statistics"
+  out
 }
diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index 65c1002d..50c45790 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -737,35 +737,6 @@ pub(crate) fn parse_parquet_compression(
     Ok(parsed)
 }
 
-#[cfg(not(target_arch = "wasm32"))]
-impl TryFrom<ListSexp> for Wrap<StatisticsOptions> {
-    type Error = String;
-
-    fn try_from(statistics: ListSexp) -> Result<Self, String> {
-        let hm = statistics
-            .iter()
-            .map(|xi| {
-                let name = xi.0;
-                let value = xi.1.into_typed();
-                let value = match value {
-                    TypedSexp::Logical(val) => {
-                        let tmp = val.to_vec();
-                        *tmp.first().unwrap()
-                    }
-                    _ => unreachable!(),
-                };
-                (name, value)
-            })
-            .collect::<std::collections::HashMap<&str, bool>>();
-        let mut out = StatisticsOptions::default();
-        out.min_value = *hm.get(&"min").unwrap();
-        out.max_value = *hm.get(&"max").unwrap();
-        out.distinct_count = *hm.get(&"distinct_count").unwrap();
-        out.null_count = *hm.get(&"null_count").unwrap();
-        Ok(Wrap(out))
-    }
-}
-
 impl TryFrom<&str> for Wrap<IpcCompression> {
     type Error = String;
 
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index 5b930c57..e80d5088 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -324,12 +324,16 @@ impl PlRLazyFrame {
         Ok(out.into())
     }
 
+    #[cfg(not(target_arch = "wasm32"))]
     fn sink_parquet(
         &self,
         path: &str,
         compression: &str,
         maintain_order: bool,
-        statistics: ListSexp,
+        stat_min: bool,
+        stat_max: bool,
+        stat_distinct_count: bool,
+        stat_null_count: bool,
         retries: NumericScalar,
         compression_level: Option<NumericScalar>,
         row_group_size: Option<NumericScalar>,
@@ -337,7 +341,12 @@ impl PlRLazyFrame {
         storage_options: Option<StringSexp>,
     ) -> Result<()> {
         let path: PathBuf = path.into();
-        let statistics = <Wrap<StatisticsOptions>>::try_from(statistics)?.0;
+        let statistics = StatisticsOptions {
+            min_value: stat_min,
+            max_value: stat_max,
+            null_count: stat_null_count,
+            distinct_count: stat_distinct_count,
+        };
         let compression_level: Option<i32> = match compression_level {
             Some(x) => Some(x.as_i32()?),
             None => None,

From 56aae32e18c93a12608ddafe5b17fdd0700d0db2 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Thu, 23 Jan 2025 23:24:38 +0100
Subject: [PATCH 15/23] add allow_exact_matches in asof_join

---
 R/000-wrappers.R                  |  8 ++++----
 R/lazyframe-frame.R               | 10 ++++++++--
 src/init.c                        | 12 ++++++------
 src/rust/api.h                    |  4 ++--
 src/rust/src/lazyframe/general.rs |  2 ++
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index ae25ea7c..361b9ec2 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -3440,12 +3440,12 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
 }
 
 `PlRLazyFrame_join_asof` <- function(self) {
-  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
+  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
     `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
     `left_on` <- .savvy_extract_ptr(`left_on`, "PlRExpr")
     `right_on` <- .savvy_extract_ptr(`right_on`, "PlRExpr")
     `tolerance` <- .savvy_extract_ptr(`tolerance`, "PlRSeries")
-    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
   }
 }
 
@@ -3576,8 +3576,8 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
 }
 
 `PlRLazyFrame_sink_parquet` <- function(self) {
-  function(`path`, `compression`, `maintain_order`, `statistics`, `retries`, `compression_level` = NULL, `row_group_size` = NULL, `data_page_size` = NULL, `storage_options` = NULL) {
-    invisible(.Call(savvy_PlRLazyFrame_sink_parquet__impl, `self`, `path`, `compression`, `maintain_order`, `statistics`, `retries`, `compression_level`, `row_group_size`, `data_page_size`, `storage_options`))
+  function(`path`, `compression`, `maintain_order`, `stat_min`, `stat_max`, `stat_distinct_count`, `stat_null_count`, `retries`, `compression_level` = NULL, `row_group_size` = NULL, `data_page_size` = NULL, `storage_options` = NULL) {
+    invisible(.Call(savvy_PlRLazyFrame_sink_parquet__impl, `self`, `path`, `compression`, `maintain_order`, `stat_min`, `stat_max`, `stat_distinct_count`, `stat_null_count`, `retries`, `compression_level`, `row_group_size`, `data_page_size`, `storage_options`))
   }
 }
 
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index b3abdd3e..26960dfa 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -2540,6 +2540,10 @@ lazyframe__sink_ndjson <- function(
 #' * `FALSE`: Never coalesce join columns.
 #' Note that joining on any other expressions than `col` will turn off
 #' coalescing.
+#' @param allow_exact_matches Whether exact matches are valid join predicates.
+#' If `TRUE` (default), allow matching with the same on value (i.e.
+#' less-than-or-equal-to / greater-than-or-equal-to). Otherwise, don’t match
+#' the same on value (i.e., strictly less-than / strictly greater-than).
 #'
 #' @inheritSection polars_duration_string Polars duration string language
 #' @examples
@@ -2629,7 +2633,8 @@ lazyframe__join_asof <- function(
     tolerance = NULL,
     allow_parallel = TRUE,
     force_parallel = FALSE,
-    coalesce = TRUE) {
+    coalesce = TRUE,
+    allow_exact_matches = TRUE) {
   wrap({
     check_dots_empty0(...)
     strategy <- arg_match0(strategy, values = c("backward", "forward", "nearest"))
@@ -2662,7 +2667,8 @@ lazyframe__join_asof <- function(
       strategy = strategy,
       tolerance = tolerance_num,
       tolerance_str = tolerance_str,
-      coalesce = coalesce
+      coalesce = coalesce,
+      allow_eq = allow_exact_matches
     )
   })
 }
diff --git a/src/init.c b/src/init.c
index 1811da30..ff113c67 100644
--- a/src/init.c
+++ b/src/init.c
@@ -2349,8 +2349,8 @@ SEXP savvy_PlRLazyFrame_join__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__l
     return handle_result(res);
 }
 
-SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
-    SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
+SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
+    SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__allow_eq, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
     return handle_result(res);
 }
 
@@ -2474,8 +2474,8 @@ SEXP savvy_PlRLazyFrame_sink_json__impl(SEXP self__, SEXP c_arg__path, SEXP c_ar
     return handle_result(res);
 }
 
-SEXP savvy_PlRLazyFrame_sink_parquet__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__statistics, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options) {
-    SEXP res = savvy_PlRLazyFrame_sink_parquet__ffi(self__, c_arg__path, c_arg__compression, c_arg__maintain_order, c_arg__statistics, c_arg__retries, c_arg__compression_level, c_arg__row_group_size, c_arg__data_page_size, c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_sink_parquet__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__stat_min, SEXP c_arg__stat_max, SEXP c_arg__stat_distinct_count, SEXP c_arg__stat_null_count, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options) {
+    SEXP res = savvy_PlRLazyFrame_sink_parquet__ffi(self__, c_arg__path, c_arg__compression, c_arg__maintain_order, c_arg__stat_min, c_arg__stat_max, c_arg__stat_distinct_count, c_arg__stat_null_count, c_arg__retries, c_arg__compression_level, c_arg__row_group_size, c_arg__data_page_size, c_arg__storage_options);
     return handle_result(res);
 }
 
@@ -3234,7 +3234,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_group_by__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by__impl, 3},
     {"savvy_PlRLazyFrame_group_by_dynamic__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by_dynamic__impl, 10},
     {"savvy_PlRLazyFrame_join__impl", (DL_FUNC) &savvy_PlRLazyFrame_join__impl, 11},
-    {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 13},
+    {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 14},
     {"savvy_PlRLazyFrame_join_where__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_where__impl, 4},
     {"savvy_PlRLazyFrame_max__impl", (DL_FUNC) &savvy_PlRLazyFrame_max__impl, 1},
     {"savvy_PlRLazyFrame_mean__impl", (DL_FUNC) &savvy_PlRLazyFrame_mean__impl, 1},
@@ -3259,7 +3259,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_sink_csv__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_csv__impl, 18},
     {"savvy_PlRLazyFrame_sink_ipc__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_ipc__impl, 6},
     {"savvy_PlRLazyFrame_sink_json__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_json__impl, 5},
-    {"savvy_PlRLazyFrame_sink_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_parquet__impl, 10},
+    {"savvy_PlRLazyFrame_sink_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_parquet__impl, 13},
     {"savvy_PlRLazyFrame_slice__impl", (DL_FUNC) &savvy_PlRLazyFrame_slice__impl, 3},
     {"savvy_PlRLazyFrame_sort__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort__impl, 6},
     {"savvy_PlRLazyFrame_sort_by_exprs__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort_by_exprs__impl, 6},
diff --git a/src/rust/api.h b/src/rust/api.h
index 9a7ba25a..524e79a3 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -473,7 +473,7 @@ SEXP savvy_PlRLazyFrame_filter__ffi(SEXP self__, SEXP c_arg__predicate);
 SEXP savvy_PlRLazyFrame_group_by__ffi(SEXP self__, SEXP c_arg__by, SEXP c_arg__maintain_order);
 SEXP savvy_PlRLazyFrame_group_by_dynamic__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__every, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__label, SEXP c_arg__include_boundaries, SEXP c_arg__closed, SEXP c_arg__group_by, SEXP c_arg__start_by);
 SEXP savvy_PlRLazyFrame_join__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__join_nulls, SEXP c_arg__how, SEXP c_arg__suffix, SEXP c_arg__validate, SEXP c_arg__coalesce);
-SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
+SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
 SEXP savvy_PlRLazyFrame_join_where__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__predicates, SEXP c_arg__suffix);
 SEXP savvy_PlRLazyFrame_max__ffi(SEXP self__);
 SEXP savvy_PlRLazyFrame_mean__ffi(SEXP self__);
@@ -498,7 +498,7 @@ SEXP savvy_PlRLazyFrame_shift__ffi(SEXP self__, SEXP c_arg__n, SEXP c_arg__fill_
 SEXP savvy_PlRLazyFrame_sink_csv__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__include_bom, SEXP c_arg__include_header, SEXP c_arg__separator, SEXP c_arg__line_terminator, SEXP c_arg__quote_char, SEXP c_arg__maintain_order, SEXP c_arg__batch_size, SEXP c_arg__retries, SEXP c_arg__datetime_format, SEXP c_arg__date_format, SEXP c_arg__time_format, SEXP c_arg__float_scientific, SEXP c_arg__float_precision, SEXP c_arg__null_value, SEXP c_arg__quote_style, SEXP c_arg__storage_options);
 SEXP savvy_PlRLazyFrame_sink_ipc__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__compression, SEXP c_arg__storage_options);
 SEXP savvy_PlRLazyFrame_sink_json__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__storage_options);
-SEXP savvy_PlRLazyFrame_sink_parquet__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__statistics, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options);
+SEXP savvy_PlRLazyFrame_sink_parquet__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__stat_min, SEXP c_arg__stat_max, SEXP c_arg__stat_distinct_count, SEXP c_arg__stat_null_count, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options);
 SEXP savvy_PlRLazyFrame_slice__ffi(SEXP self__, SEXP c_arg__offset, SEXP c_arg__len);
 SEXP savvy_PlRLazyFrame_sort__ffi(SEXP self__, SEXP c_arg__by_column, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
 SEXP savvy_PlRLazyFrame_sort_by_exprs__ffi(SEXP self__, SEXP c_arg__by, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index e80d5088..405e8b99 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -644,6 +644,7 @@ impl PlRLazyFrame {
         suffix: &str,
         coalesce: bool,
         strategy: &str,
+        allow_eq: bool,
         left_by: Option<StringSexp>,
         right_by: Option<StringSexp>,
         tolerance: Option<&PlRSeries>,
@@ -685,6 +686,7 @@ impl PlRLazyFrame {
                 right_by,
                 tolerance,
                 tolerance_str: tolerance_str.map(|s| s.into()),
+                allow_eq,
             }))
             .suffix(suffix)
             .finish()

From eb7920f5848f9fd058121ba6de4b54a29995f54a Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Thu, 23 Jan 2025 23:28:21 +0100
Subject: [PATCH 16/23] redoc [skip ci]

---
 NAMESPACE                      |  1 +
 man/lazyframe__join_asof.Rd    |  8 +++++++-
 man/lazyframe__sink_parquet.Rd |  6 ++----
 man/parquet_statistics.Rd      | 26 ++++++++++++++++++++++++++
 4 files changed, 36 insertions(+), 5 deletions(-)
 create mode 100644 man/parquet_statistics.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 5aa01e8e..aadc2864 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -145,6 +145,7 @@ export(is_polars_expr)
 export(is_polars_lf)
 export(is_polars_selector)
 export(is_polars_series)
+export(parquet_statistics)
 export(pl)
 import(rlang)
 useDynLib(neopolars, .registration = TRUE)
diff --git a/man/lazyframe__join_asof.Rd b/man/lazyframe__join_asof.Rd
index 5c9575d4..748e957d 100644
--- a/man/lazyframe__join_asof.Rd
+++ b/man/lazyframe__join_asof.Rd
@@ -18,7 +18,8 @@ lazyframe__join_asof(
   tolerance = NULL,
   allow_parallel = TRUE,
   force_parallel = FALSE,
-  coalesce = TRUE
+  coalesce = TRUE,
+  allow_exact_matches = TRUE
 )
 }
 \arguments{
@@ -58,6 +59,11 @@ duration string language (see details).}
 Note that joining on any other expressions than \code{col} will turn off
 coalescing.
 }}
+
+\item{allow_exact_matches}{Whether exact matches are valid join predicates.
+If \code{TRUE} (default), allow matching with the same on value (i.e.
+less-than-or-equal-to / greater-than-or-equal-to). Otherwise, don’t match
+the same on value (i.e., strictly less-than / strictly greater-than).}
 }
 \description{
 This is similar to a left-join except that we match on nearest key rather
diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd
index fb35e05a..42c86ccb 100644
--- a/man/lazyframe__sink_parquet.Rd
+++ b/man/lazyframe__sink_parquet.Rd
@@ -54,10 +54,8 @@ headers. Possible values:
 \itemize{
 \item \code{TRUE}: enable default set of statistics (default)
 \item \code{FALSE}: disable all statistics
-\item \code{"full"}: calculate and write all available statistics.
-\item A named list where all values must be \code{TRUE} or \code{FALSE}, e.g.
-\code{list(min = TRUE, max = FALSE)}. Statistics available are \code{"min"}, \code{"max"},
-\code{"distinct_count"}, \code{"null_count"}.
+\item A list created via \code{\link[=parquet_statistics]{parquet_statistics()}} to specify which statistics to
+include.
 }}
 
 \item{row_group_size}{Size of the row groups in number of rows. If \code{NULL}
diff --git a/man/parquet_statistics.Rd b/man/parquet_statistics.Rd
new file mode 100644
index 00000000..d6b672d7
--- /dev/null
+++ b/man/parquet_statistics.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-various.R
+\name{parquet_statistics}
+\alias{parquet_statistics}
+\title{Prepare statistics for writing to Parquet file}
+\usage{
+parquet_statistics(
+  min = TRUE,
+  max = TRUE,
+  distinct_count = TRUE,
+  null_count = TRUE
+)
+}
+\arguments{
+\item{min}{Include stats on the minimum values in the column.}
+
+\item{max}{Include stats on the maximum values in the column.}
+
+\item{distinct_count}{Include stats on the number of distinct values in the
+column.}
+
+\item{null_count}{Include stats on the number of null values in the column.}
+}
+\description{
+Prepare statistics for writing to Parquet file
+}

From 93860a50f37ca484332771d34f9338a1480ccb61 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:14:16 +0100
Subject: [PATCH 17/23] remove `sink_*` functions

---
 R/000-wrappers.R                  |  28 ---
 R/lazyframe-frame.R               | 388 ------------------------------
 R/utils-various.R                 |  24 --
 src/init.c                        |  24 --
 src/rust/api.h                    |   4 -
 src/rust/src/lazyframe/general.rs | 238 ------------------
 6 files changed, 706 deletions(-)

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index 361b9ec2..0b22a653 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -3557,30 +3557,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   }
 }
 
-`PlRLazyFrame_sink_csv` <- function(self) {
-  function(`path`, `include_bom`, `include_header`, `separator`, `line_terminator`, `quote_char`, `maintain_order`, `batch_size`, `retries`, `datetime_format` = NULL, `date_format` = NULL, `time_format` = NULL, `float_scientific` = NULL, `float_precision` = NULL, `null_value` = NULL, `quote_style` = NULL, `storage_options` = NULL) {
-    invisible(.Call(savvy_PlRLazyFrame_sink_csv__impl, `self`, `path`, `include_bom`, `include_header`, `separator`, `line_terminator`, `quote_char`, `maintain_order`, `batch_size`, `retries`, `datetime_format`, `date_format`, `time_format`, `float_scientific`, `float_precision`, `null_value`, `quote_style`, `storage_options`))
-  }
-}
-
-`PlRLazyFrame_sink_ipc` <- function(self) {
-  function(`path`, `maintain_order`, `retries`, `compression` = NULL, `storage_options` = NULL) {
-    invisible(.Call(savvy_PlRLazyFrame_sink_ipc__impl, `self`, `path`, `maintain_order`, `retries`, `compression`, `storage_options`))
-  }
-}
-
-`PlRLazyFrame_sink_json` <- function(self) {
-  function(`path`, `maintain_order`, `retries`, `storage_options` = NULL) {
-    invisible(.Call(savvy_PlRLazyFrame_sink_json__impl, `self`, `path`, `maintain_order`, `retries`, `storage_options`))
-  }
-}
-
-`PlRLazyFrame_sink_parquet` <- function(self) {
-  function(`path`, `compression`, `maintain_order`, `stat_min`, `stat_max`, `stat_distinct_count`, `stat_null_count`, `retries`, `compression_level` = NULL, `row_group_size` = NULL, `data_page_size` = NULL, `storage_options` = NULL) {
-    invisible(.Call(savvy_PlRLazyFrame_sink_parquet__impl, `self`, `path`, `compression`, `maintain_order`, `stat_min`, `stat_max`, `stat_distinct_count`, `stat_null_count`, `retries`, `compression_level`, `row_group_size`, `data_page_size`, `storage_options`))
-  }
-}
-
 `PlRLazyFrame_slice` <- function(self) {
   function(`offset`, `len` = NULL) {
     .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_slice__impl, `self`, `offset`, `len`))
@@ -3713,10 +3689,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
   e$`select_seq` <- `PlRLazyFrame_select_seq`(ptr)
   e$`serialize` <- `PlRLazyFrame_serialize`(ptr)
   e$`shift` <- `PlRLazyFrame_shift`(ptr)
-  e$`sink_csv` <- `PlRLazyFrame_sink_csv`(ptr)
-  e$`sink_ipc` <- `PlRLazyFrame_sink_ipc`(ptr)
-  e$`sink_json` <- `PlRLazyFrame_sink_json`(ptr)
-  e$`sink_parquet` <- `PlRLazyFrame_sink_parquet`(ptr)
   e$`slice` <- `PlRLazyFrame_slice`(ptr)
   e$`sort` <- `PlRLazyFrame_sort`(ptr)
   e$`sort_by_exprs` <- `PlRLazyFrame_sort_by_exprs`(ptr)
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 26960dfa..884c2b83 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -2118,394 +2118,6 @@ lazyframe__with_row_index <- function(name = "index", offset = 0) {
   })
 }
 
-#' Evaluate the query in streaming mode and write to a Parquet file
-#'
-#' @description
-#' `r lifecycle::badge("experimental")`
-#'
-#' This allows streaming results that are larger than RAM to be written to disk.
-#'
-#' @inheritParams rlang::args_dots_empty
-#' @param path A character. File path to which the file should be written.
-#' @param compression The compression method. Must be one of:
-#' * `"lz4"`: fast compression/decompression.
-#' * `"uncompressed"`
-#' * `"snappy"`: this guarantees that the parquet file will be compatible with
-#'   older parquet readers.
-#' * `"gzip"`
-#' * `"lzo"`
-#' * `"brotli"`
-#' * `"zstd"`: good compression performance.
-#' @param compression_level `NULL` or integer. The level of compression to use.
-#'  Only used if method is one of `"gzip"`, `"brotli"`, or `"zstd"`. Higher
-#' compression means smaller files on disk:
-#'  * `"gzip"`: min-level: 0, max-level: 10.
-#'  * `"brotli"`: min-level: 0, max-level: 11.
-#'  * `"zstd"`: min-level: 1, max-level: 22.
-#' @param statistics Whether statistics should be written to the Parquet
-#' headers. Possible values:
-#' * `TRUE`: enable default set of statistics (default)
-#' * `FALSE`: disable all statistics
-#' * A list created via [parquet_statistics()] to specify which statistics to
-#'   include.
-#' @param row_group_size Size of the row groups in number of rows. If `NULL`
-#' (default), the chunks of the DataFrame are used. Writing in smaller chunks
-#' may reduce memory pressure and improve writing speeds.
-#' @param data_page_size Size of the data page in bytes. If `NULL` (default), it
-#' is set to 1024^2 bytes.
-#' @param maintain_order Maintain the order in which data is processed. Setting
-#' this to `FALSE` will be slightly faster.
-#' @inheritParams lazyframe__collect
-#' @inheritParams pl__scan_parquet
-#'
-#' @return Invisibly returns the input LazyFrame
-#'
-#' @examples
-#' # sink table 'mtcars' from mem to parquet
-#' tmpf <- tempfile()
-#' as_polars_lf(mtcars)$sink_parquet(tmpf)
-#'
-#' # stream a query end-to-end
-#' tmpf2 <- tempfile()
-#' pl$scan_parquet(tmpf)$select(pl$col("cyl") * 2)$sink_parquet(tmpf2)
-#'
-#' # load parquet directly into a DataFrame / memory
-#' pl$scan_parquet(tmpf2)$collect()
-lazyframe__sink_parquet <- function(
-    path,
-    ...,
-    compression = "zstd",
-    compression_level = 3,
-    statistics = TRUE,
-    row_group_size = NULL,
-    data_page_size = NULL,
-    maintain_order = TRUE,
-    type_coercion = TRUE,
-    predicate_pushdown = TRUE,
-    projection_pushdown = TRUE,
-    simplify_expression = TRUE,
-    slice_pushdown = TRUE,
-    no_optimization = FALSE,
-    storage_options = NULL,
-    retries = 2) {
-  wrap({
-    check_dots_empty0(...)
-    compression <- arg_match0(
-      compression,
-      values = c("lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd")
-    )
-
-    if (isTRUE(no_optimization)) {
-      predicate_pushdown <- FALSE
-      projection_pushdown <- FALSE
-      slice_pushdown <- FALSE
-    }
-
-    lf <- self$`_ldf`$optimization_toggle(
-      type_coercion = type_coercion,
-      predicate_pushdown = predicate_pushdown,
-      projection_pushdown = projection_pushdown,
-      simplify_expression = simplify_expression,
-      slice_pushdown = slice_pushdown,
-      comm_subplan_elim = FALSE,
-      comm_subexpr_elim = FALSE,
-      cluster_with_columns = FALSE,
-      streaming = FALSE,
-      `_eager` = FALSE
-    )
-
-    if (isTRUE(statistics)) {
-      statistics <- parquet_statistics(
-        min = TRUE,
-        max = TRUE,
-        distinct_count = TRUE,
-        null_count = TRUE
-      )
-    } else if (isFALSE(statistics)) {
-      statistics <- parquet_statistics(
-        min = FALSE,
-        max = FALSE,
-        distinct_count = FALSE,
-        null_count = FALSE
-      )
-    }
-    statistics <- if (!inherits(statistics, "polars_parquet_statistics")) {
-      abort("`statistics` must be TRUE, FALSE, or a call to `parquet_statistics()`.")
-    }
-    stat_min <- statistics[["min"]]
-    stat_max <- statistics[["max"]]
-    stat_distinct_count <- statistics[["distinct_count"]]
-    stat_null_count <- statistics[["null_count"]]
-
-    lf$sink_parquet(
-      path = path,
-      compression = compression,
-      compression_level = compression_level,
-      stat_min = stat_min,
-      stat_max = stat_max,
-      stat_null_count = stat_null_count,
-      stat_distinct_count = stat_distinct_count,
-      row_group_size = row_group_size,
-      data_page_size = data_page_size,
-      maintain_order = maintain_order,
-      storage_options = storage_options,
-      retries = retries
-    )
-
-    invisible(self)
-  })
-}
-
-#' Evaluate the query in streaming mode and write to an IPC file
-#'
-#' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::args_dots_empty
-#' @param compression `NULL` or one of:
-#' * `"uncompressed"`: same as `NULL`.
-#' * `"lz4"`: fast compression/decompression.
-#' * `"zstd"`: good compression performance.
-#'
-#' @examples
-#' # sink table 'mtcars' from mem to ipc
-#' tmpf <- tempfile()
-#' as_polars_lf(mtcars)$sink_ipc(tmpf)
-#'
-#' # stream a query end-to-end (not supported yet, https://github.com/pola-rs/polars/issues/1040)
-#' # tmpf2 = tempfile()
-#' # pl$scan_ipc(tmpf)$select(pl$col("cyl") * 2)$sink_ipc(tmpf2)
-#'
-#' # load ipc directly into a DataFrame / memory
-#' # pl$scan_ipc(tmpf2)$collect()
-lazyframe__sink_ipc <- function(
-    path,
-    ...,
-    compression = c("zstd", "lz4", "uncompressed"),
-    maintain_order = TRUE,
-    type_coercion = TRUE,
-    predicate_pushdown = TRUE,
-    projection_pushdown = TRUE,
-    simplify_expression = TRUE,
-    slice_pushdown = TRUE,
-    no_optimization = FALSE,
-    storage_options = NULL,
-    retries = 2) {
-  wrap({
-    check_dots_empty0(...)
-    compression <- compression %||% "uncompressed"
-    compression <- arg_match0(
-      compression,
-      values = c("lz4", "uncompressed", "zstd")
-    )
-
-    if (isTRUE(no_optimization)) {
-      predicate_pushdown <- FALSE
-      projection_pushdown <- FALSE
-      slice_pushdown <- FALSE
-    }
-
-    lf <- self$`_ldf`$optimization_toggle(
-      type_coercion = type_coercion,
-      predicate_pushdown = predicate_pushdown,
-      projection_pushdown = projection_pushdown,
-      simplify_expression = simplify_expression,
-      slice_pushdown = slice_pushdown,
-      comm_subplan_elim = FALSE,
-      comm_subexpr_elim = FALSE,
-      cluster_with_columns = FALSE,
-      streaming = FALSE,
-      `_eager` = FALSE
-    )
-
-    lf$sink_ipc(
-      path = path,
-      compression = compression,
-      maintain_order = maintain_order,
-      storage_options = storage_options,
-      retries = retries
-    )
-
-    invisible(self)
-  })
-}
-
-#' Evaluate the query in streaming mode and write to a CSV file
-#'
-#' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::args_dots_empty
-#' @param include_bom Logical, whether to include UTF-8 BOM in the CSV output.
-#' @param include_header Logical, hether to include header in the CSV output.
-#' @param separator Separate CSV fields with this symbol.
-#' @param line_terminator String used to end each row.
-#' @param quote_char Byte to use as quoting character.
-#' @param batch_size Number of rows that will be processed per thread.
-#' @param datetime_format A format string, with the specifiers defined by the
-#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
-#' Rust crate. If no format specified, the default fractional-second precision
-#' is inferred from the maximum timeunit found in the frame’s Datetime cols (if
-#' any).
-#' @param date_format A format string, with the specifiers defined by the
-#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
-#' Rust crate.
-#' @param time_format A format string, with the specifiers defined by the
-#' [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
-#' Rust crate.
-#' @param float_precision Whether to use scientific form always (`TRUE`), never
-#' (`FALSE`), or automatically (`NULL`) for Float32 and Float64 datatypes.
-#' @param null_value A string representing null values (defaulting to the empty
-#' string).
-#' @param quote_style Determines the quoting strategy used. Must be one of:
-#' * `"necessary"` (default): This puts quotes around fields only when
-#'   necessary. They are necessary when fields contain a quote, delimiter or
-#'   record terminator. Quotes are also necessary when writing an empty record
-#'   (which is indistinguishable from a record with one empty field). This is
-#'   the default.
-#' * `"always"`: This puts quotes around every field. Always.
-#' * `"never"`: This never puts quotes around fields, even if that results in
-#'   invalid CSV data (e.g.: by not quoting strings containing the separator).
-#' * `"non_numeric"`: This puts quotes around all fields that are non-numeric.
-#'   Namely, when writing a field that does not parse as a valid float or
-#'   integer, then quotes will be used even if they aren`t strictly necessary.
-#'
-#' @examples
-#' # sink table 'mtcars' from mem to CSV
-#' tmpf <- tempfile()
-#' pl$LazyFrame(mtcars)$sink_csv(tmpf)
-#'
-#' # stream a query end-to-end
-#' tmpf2 <- tempfile()
-#' pl$scan_csv(tmpf)$select(pl$col("cyl") * 2)$sink_csv(tmpf2)
-#'
-#' # load parquet directly into a DataFrame / memory
-#' pl$scan_csv(tmpf2)$collect()
-lazyframe__sink_csv <- function(
-    path,
-    ...,
-    include_bom = FALSE,
-    include_header = TRUE,
-    separator = ",",
-    line_terminator = "\n",
-    quote_char = '"',
-    batch_size = 1024,
-    datetime_format = NULL,
-    date_format = NULL,
-    time_format = NULL,
-    float_precision = NULL,
-    null_value = "",
-    quote_style = "necessary",
-    maintain_order = TRUE,
-    type_coercion = TRUE,
-    predicate_pushdown = TRUE,
-    projection_pushdown = TRUE,
-    simplify_expression = TRUE,
-    slice_pushdown = TRUE,
-    no_optimization = FALSE,
-    storage_options = NULL,
-    retries = 2) {
-  wrap({
-    check_dots_empty0(...)
-    quote_style <- arg_match0(
-      quote_style,
-      values = c("necessary", "always", "never", "non_numeric")
-    )
-
-    if (isTRUE(no_optimization)) {
-      predicate_pushdown <- FALSE
-      projection_pushdown <- FALSE
-      slice_pushdown <- FALSE
-    }
-
-    lf <- self$`_ldf`$optimization_toggle(
-      type_coercion = type_coercion,
-      predicate_pushdown = predicate_pushdown,
-      projection_pushdown = projection_pushdown,
-      simplify_expression = simplify_expression,
-      slice_pushdown = slice_pushdown,
-      comm_subplan_elim = FALSE,
-      comm_subexpr_elim = FALSE,
-      cluster_with_columns = FALSE,
-      streaming = FALSE,
-      `_eager` = FALSE
-    )
-
-    lf$sink_csv(
-      path = path,
-      include_bom = include_bom,
-      include_header = include_header,
-      separator = separator,
-      line_terminator = line_terminator,
-      quote_char = quote_char,
-      batch_size = batch_size,
-      datetime_format = datetime_format,
-      date_format = date_format,
-      time_format = time_format,
-      float_precision = float_precision,
-      null_value = null_value,
-      quote_style = quote_style,
-      maintain_order = maintain_order,
-      storage_options = storage_options,
-      retries = retries
-    )
-
-    invisible(self)
-  })
-}
-
-#' Evaluate the query in streaming mode and write to an NDJSON file
-#'
-#' @inherit lazyframe__sink_parquet description params return
-#' @inheritParams rlang::args_dots_empty
-#'
-#' @examples
-#' # sink table 'mtcars' from mem to NDJSON
-#' tmpf <- tempfile(fileext = ".ndjson")
-#' pl$LazyFrame(mtcars)$sink_ndjson(tmpf)
-#'
-#' # load parquet directly into a DataFrame / memory
-#' pl$scan_ndjson(tmpf)$collect()
-lazyframe__sink_ndjson <- function(
-    path,
-    ...,
-    maintain_order = TRUE,
-    type_coercion = TRUE,
-    predicate_pushdown = TRUE,
-    projection_pushdown = TRUE,
-    simplify_expression = TRUE,
-    slice_pushdown = TRUE,
-    no_optimization = FALSE,
-    storage_options = NULL,
-    retries = 2) {
-  wrap({
-    check_dots_empty0(...)
-    if (isTRUE(no_optimization)) {
-      predicate_pushdown <- FALSE
-      projection_pushdown <- FALSE
-      slice_pushdown <- FALSE
-    }
-
-    lf <- self$`_ldf`$optimization_toggle(
-      type_coercion = type_coercion,
-      predicate_pushdown = predicate_pushdown,
-      projection_pushdown = projection_pushdown,
-      simplify_expression = simplify_expression,
-      slice_pushdown = slice_pushdown,
-      comm_subplan_elim = FALSE,
-      comm_subexpr_elim = FALSE,
-      cluster_with_columns = FALSE,
-      streaming = FALSE,
-      `_eager` = FALSE
-    )
-
-    lf$sink_json(
-      path = path,
-      maintain_order = maintain_order,
-      storage_options = storage_options,
-      retries = retries
-    )
-
-    invisible(self)
-  })
-}
-
 #' Perform joins on nearest keys
 #'
 #' @description
diff --git a/R/utils-various.R b/R/utils-various.R
index 59cd4743..2aab4e6f 100644
--- a/R/utils-various.R
+++ b/R/utils-various.R
@@ -76,27 +76,3 @@ make_profile_plot <- function(data, truncate_nodes) {
   }
   plot
 }
-
-#' Prepare statistics for writing to Parquet file
-#'
-#' @param min Include stats on the minimum values in the column.
-#' @param max Include stats on the maximum values in the column.
-#' @param distinct_count Include stats on the number of distinct values in the
-#' column.
-#' @param null_count Include stats on the number of null values in the column.
-#'
-#' @export
-parquet_statistics <- function(
-    min = TRUE,
-    max = TRUE,
-    distinct_count = TRUE,
-    null_count = TRUE) {
-  out <- list(
-    min = min,
-    max = max,
-    distinct_count = distinct_count,
-    null_count = null_count
-  )
-  class(out) <- "polars_parquet_statistics"
-  out
-}
diff --git a/src/init.c b/src/init.c
index ff113c67..489c3477 100644
--- a/src/init.c
+++ b/src/init.c
@@ -2459,26 +2459,6 @@ SEXP savvy_PlRLazyFrame_shift__impl(SEXP self__, SEXP c_arg__n, SEXP c_arg__fill
     return handle_result(res);
 }
 
-SEXP savvy_PlRLazyFrame_sink_csv__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__include_bom, SEXP c_arg__include_header, SEXP c_arg__separator, SEXP c_arg__line_terminator, SEXP c_arg__quote_char, SEXP c_arg__maintain_order, SEXP c_arg__batch_size, SEXP c_arg__retries, SEXP c_arg__datetime_format, SEXP c_arg__date_format, SEXP c_arg__time_format, SEXP c_arg__float_scientific, SEXP c_arg__float_precision, SEXP c_arg__null_value, SEXP c_arg__quote_style, SEXP c_arg__storage_options) {
-    SEXP res = savvy_PlRLazyFrame_sink_csv__ffi(self__, c_arg__path, c_arg__include_bom, c_arg__include_header, c_arg__separator, c_arg__line_terminator, c_arg__quote_char, c_arg__maintain_order, c_arg__batch_size, c_arg__retries, c_arg__datetime_format, c_arg__date_format, c_arg__time_format, c_arg__float_scientific, c_arg__float_precision, c_arg__null_value, c_arg__quote_style, c_arg__storage_options);
-    return handle_result(res);
-}
-
-SEXP savvy_PlRLazyFrame_sink_ipc__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__compression, SEXP c_arg__storage_options) {
-    SEXP res = savvy_PlRLazyFrame_sink_ipc__ffi(self__, c_arg__path, c_arg__maintain_order, c_arg__retries, c_arg__compression, c_arg__storage_options);
-    return handle_result(res);
-}
-
-SEXP savvy_PlRLazyFrame_sink_json__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__storage_options) {
-    SEXP res = savvy_PlRLazyFrame_sink_json__ffi(self__, c_arg__path, c_arg__maintain_order, c_arg__retries, c_arg__storage_options);
-    return handle_result(res);
-}
-
-SEXP savvy_PlRLazyFrame_sink_parquet__impl(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__stat_min, SEXP c_arg__stat_max, SEXP c_arg__stat_distinct_count, SEXP c_arg__stat_null_count, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options) {
-    SEXP res = savvy_PlRLazyFrame_sink_parquet__ffi(self__, c_arg__path, c_arg__compression, c_arg__maintain_order, c_arg__stat_min, c_arg__stat_max, c_arg__stat_distinct_count, c_arg__stat_null_count, c_arg__retries, c_arg__compression_level, c_arg__row_group_size, c_arg__data_page_size, c_arg__storage_options);
-    return handle_result(res);
-}
-
 SEXP savvy_PlRLazyFrame_slice__impl(SEXP self__, SEXP c_arg__offset, SEXP c_arg__len) {
     SEXP res = savvy_PlRLazyFrame_slice__ffi(self__, c_arg__offset, c_arg__len);
     return handle_result(res);
@@ -3256,10 +3236,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_select_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_select_seq__impl, 2},
     {"savvy_PlRLazyFrame_serialize__impl", (DL_FUNC) &savvy_PlRLazyFrame_serialize__impl, 1},
     {"savvy_PlRLazyFrame_shift__impl", (DL_FUNC) &savvy_PlRLazyFrame_shift__impl, 3},
-    {"savvy_PlRLazyFrame_sink_csv__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_csv__impl, 18},
-    {"savvy_PlRLazyFrame_sink_ipc__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_ipc__impl, 6},
-    {"savvy_PlRLazyFrame_sink_json__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_json__impl, 5},
-    {"savvy_PlRLazyFrame_sink_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_sink_parquet__impl, 13},
     {"savvy_PlRLazyFrame_slice__impl", (DL_FUNC) &savvy_PlRLazyFrame_slice__impl, 3},
     {"savvy_PlRLazyFrame_sort__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort__impl, 6},
     {"savvy_PlRLazyFrame_sort_by_exprs__impl", (DL_FUNC) &savvy_PlRLazyFrame_sort_by_exprs__impl, 6},
diff --git a/src/rust/api.h b/src/rust/api.h
index 524e79a3..3ca7b89e 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -495,10 +495,6 @@ SEXP savvy_PlRLazyFrame_select__ffi(SEXP self__, SEXP c_arg__exprs);
 SEXP savvy_PlRLazyFrame_select_seq__ffi(SEXP self__, SEXP c_arg__exprs);
 SEXP savvy_PlRLazyFrame_serialize__ffi(SEXP self__);
 SEXP savvy_PlRLazyFrame_shift__ffi(SEXP self__, SEXP c_arg__n, SEXP c_arg__fill_value);
-SEXP savvy_PlRLazyFrame_sink_csv__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__include_bom, SEXP c_arg__include_header, SEXP c_arg__separator, SEXP c_arg__line_terminator, SEXP c_arg__quote_char, SEXP c_arg__maintain_order, SEXP c_arg__batch_size, SEXP c_arg__retries, SEXP c_arg__datetime_format, SEXP c_arg__date_format, SEXP c_arg__time_format, SEXP c_arg__float_scientific, SEXP c_arg__float_precision, SEXP c_arg__null_value, SEXP c_arg__quote_style, SEXP c_arg__storage_options);
-SEXP savvy_PlRLazyFrame_sink_ipc__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__compression, SEXP c_arg__storage_options);
-SEXP savvy_PlRLazyFrame_sink_json__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__maintain_order, SEXP c_arg__retries, SEXP c_arg__storage_options);
-SEXP savvy_PlRLazyFrame_sink_parquet__ffi(SEXP self__, SEXP c_arg__path, SEXP c_arg__compression, SEXP c_arg__maintain_order, SEXP c_arg__stat_min, SEXP c_arg__stat_max, SEXP c_arg__stat_distinct_count, SEXP c_arg__stat_null_count, SEXP c_arg__retries, SEXP c_arg__compression_level, SEXP c_arg__row_group_size, SEXP c_arg__data_page_size, SEXP c_arg__storage_options);
 SEXP savvy_PlRLazyFrame_slice__ffi(SEXP self__, SEXP c_arg__offset, SEXP c_arg__len);
 SEXP savvy_PlRLazyFrame_sort__ffi(SEXP self__, SEXP c_arg__by_column, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
 SEXP savvy_PlRLazyFrame_sort_by_exprs__ffi(SEXP self__, SEXP c_arg__by, SEXP c_arg__descending, SEXP c_arg__nulls_last, SEXP c_arg__maintain_order, SEXP c_arg__multithreaded);
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index 405e8b99..ce162040 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -324,244 +324,6 @@ impl PlRLazyFrame {
         Ok(out.into())
     }
 
-    #[cfg(not(target_arch = "wasm32"))]
-    fn sink_parquet(
-        &self,
-        path: &str,
-        compression: &str,
-        maintain_order: bool,
-        stat_min: bool,
-        stat_max: bool,
-        stat_distinct_count: bool,
-        stat_null_count: bool,
-        retries: NumericScalar,
-        compression_level: Option<NumericScalar>,
-        row_group_size: Option<NumericScalar>,
-        data_page_size: Option<NumericScalar>,
-        storage_options: Option<StringSexp>,
-    ) -> Result<()> {
-        let path: PathBuf = path.into();
-        let statistics = StatisticsOptions {
-            min_value: stat_min,
-            max_value: stat_max,
-            null_count: stat_null_count,
-            distinct_count: stat_distinct_count,
-        };
-        let compression_level: Option<i32> = match compression_level {
-            Some(x) => Some(x.as_i32()?),
-            None => None,
-        };
-        let compression = parse_parquet_compression(compression, compression_level)?;
-        let row_group_size: Option<usize> = match row_group_size {
-            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
-            None => None,
-        };
-        let data_page_size: Option<usize> = match data_page_size {
-            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
-            None => None,
-        };
-        let retries = <Wrap<usize>>::try_from(retries)?.0;
-
-        let options = ParquetWriteOptions {
-            compression,
-            statistics,
-            row_group_size,
-            data_page_size,
-            maintain_order,
-        };
-        let cloud_options = match storage_options {
-            Some(x) => {
-                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                    RPolarsErr::Other(
-                        "`storage_options` must be a named character vector".to_string(),
-                    )
-                })?;
-                Some(out.0)
-            }
-            None => None,
-        };
-        let cloud_options = {
-            let cloud_options =
-                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
-            Some(cloud_options.with_max_retries(retries))
-        };
-        let _ = self
-            .ldf
-            .clone()
-            .sink_parquet(&path, options, cloud_options)
-            .map_err(RPolarsErr::from);
-        Ok(())
-    }
-
-    fn sink_ipc(
-        &self,
-        path: &str,
-        maintain_order: bool,
-        retries: NumericScalar,
-        compression: Option<&str>,
-        storage_options: Option<StringSexp>,
-    ) -> Result<()> {
-        let path: PathBuf = path.into();
-
-        let retries = <Wrap<usize>>::try_from(retries)?.0;
-        let compression: Option<IpcCompression> = match compression {
-            Some(x) => {
-                if x == "uncompressed" {
-                    None
-                } else {
-                    Some(<Wrap<IpcCompression>>::try_from(x)?.0)
-                }
-            }
-
-            None => None,
-        };
-        let options = IpcWriterOptions {
-            compression,
-            maintain_order,
-        };
-
-        let cloud_options = match storage_options {
-            Some(x) => {
-                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                    RPolarsErr::Other(
-                        "`storage_options` must be a named character vector".to_string(),
-                    )
-                })?;
-                Some(out.0)
-            }
-            None => None,
-        };
-        let cloud_options = {
-            let cloud_options =
-                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
-            Some(cloud_options.with_max_retries(retries))
-        };
-
-        let _ = self
-            .ldf
-            .clone()
-            .sink_ipc(&path, options, cloud_options)
-            .map_err(RPolarsErr::from);
-        Ok(())
-    }
-
-    fn sink_csv(
-        &self,
-        path: &str,
-        include_bom: bool,
-        include_header: bool,
-        separator: &str,
-        line_terminator: &str,
-        quote_char: &str,
-        maintain_order: bool,
-        batch_size: NumericScalar,
-        retries: NumericScalar,
-        datetime_format: Option<&str>,
-        date_format: Option<&str>,
-        time_format: Option<&str>,
-        float_scientific: Option<bool>,
-        float_precision: Option<NumericScalar>,
-        null_value: Option<&str>,
-        quote_style: Option<&str>,
-        storage_options: Option<StringSexp>,
-    ) -> Result<()> {
-        let path: PathBuf = path.into();
-        let quote_style = match quote_style {
-            Some(x) => <Wrap<QuoteStyle>>::try_from(x)?.0,
-            None => QuoteStyle::default(),
-        };
-        let retries = <Wrap<usize>>::try_from(retries)?.0;
-        let null_value = null_value
-            .map(|x| x.to_string())
-            .unwrap_or(SerializeOptions::default().null);
-        let batch_size = <Wrap<NonZeroUsize>>::try_from(batch_size)?.0;
-        let float_precision = match float_precision {
-            Some(x) => Some(<Wrap<usize>>::try_from(x)?.0),
-            None => None,
-        };
-        let separator = <Wrap<u8>>::try_from(separator)?.0;
-        let quote_char = <Wrap<u8>>::try_from(quote_char)?.0;
-
-        let serialize_options = SerializeOptions {
-            date_format: date_format.map(|x| x.to_string()),
-            time_format: time_format.map(|x| x.to_string()),
-            datetime_format: datetime_format.map(|x| x.to_string()),
-            float_scientific,
-            float_precision,
-            separator,
-            quote_char,
-            null: null_value.to_string(),
-            line_terminator: line_terminator.to_string(),
-            quote_style,
-        };
-
-        let options = CsvWriterOptions {
-            include_bom,
-            include_header,
-            maintain_order,
-            batch_size,
-            serialize_options,
-        };
-        let cloud_options = match storage_options {
-            Some(x) => {
-                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                    RPolarsErr::Other(
-                        "`storage_options` must be a named character vector".to_string(),
-                    )
-                })?;
-                Some(out.0)
-            }
-            None => None,
-        };
-        let cloud_options = {
-            let cloud_options =
-                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
-            Some(cloud_options.with_max_retries(retries))
-        };
-
-        let _ = self
-            .ldf
-            .clone()
-            .sink_csv(&path, options, cloud_options)
-            .map_err(RPolarsErr::from);
-        Ok(())
-    }
-
-    fn sink_json(
-        &self,
-        path: &str,
-        maintain_order: bool,
-        retries: NumericScalar,
-        storage_options: Option<StringSexp>,
-    ) -> Result<()> {
-        let path: PathBuf = path.into();
-        let retries = <Wrap<usize>>::try_from(retries)?.0;
-        let options = JsonWriterOptions { maintain_order };
-        let cloud_options = match storage_options {
-            Some(x) => {
-                let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                    RPolarsErr::Other(
-                        "`storage_options` must be a named character vector".to_string(),
-                    )
-                })?;
-                Some(out.0)
-            }
-            None => None,
-        };
-        let cloud_options = {
-            let cloud_options =
-                parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
-            Some(cloud_options.with_max_retries(retries))
-        };
-
-        let _ = self
-            .ldf
-            .clone()
-            .sink_json(&path, options, cloud_options)
-            .map_err(RPolarsErr::from);
-        Ok(())
-    }
-
     fn serialize(&self) -> Result<Sexp> {
         let dump = serde_json::to_string(&self.ldf.logical_plan)
             .map_err(|err| RPolarsErr::Other(err.to_string()))?;

From d2727469d2ad317745cfe64f8c714ae36158eece Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:29:33 +0100
Subject: [PATCH 18/23] remove parse_parquet_compression()

---
 src/rust/src/conversion/mod.rs | 39 ----------------------------------
 1 file changed, 39 deletions(-)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index 50c45790..e25abeb9 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -698,45 +698,6 @@ impl TryFrom<&str> for Wrap<StartBy> {
     }
 }
 
-#[cfg(not(target_arch = "wasm32"))]
-pub(crate) fn parse_parquet_compression(
-    compression: &str,
-    compression_level: Option<i32>,
-) -> savvy::Result<ParquetCompression> {
-    let parsed = match compression {
-        "uncompressed" => ParquetCompression::Uncompressed,
-        "snappy" => ParquetCompression::Snappy,
-        "gzip" => ParquetCompression::Gzip(
-            compression_level
-                .map(|lvl| {
-                    GzipLevel::try_new(lvl as u8)
-                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
-                })
-                .transpose()?,
-        ),
-        "lzo" => ParquetCompression::Lzo,
-        "brotli" => ParquetCompression::Brotli(
-            compression_level
-                .map(|lvl| {
-                    BrotliLevel::try_new(lvl as u32)
-                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
-                })
-                .transpose()?,
-        ),
-        "lz4" => ParquetCompression::Lz4Raw,
-        "zstd" => ParquetCompression::Zstd(
-            compression_level
-                .map(|lvl| {
-                    ZstdLevel::try_new(lvl)
-                        .map_err(|e| savvy::Error::new(format!("{e:?}").as_str()))
-                })
-                .transpose()?,
-        ),
-        _ => return Err(RPolarsErr::Other("unreachable".to_string()).into()),
-    };
-    Ok(parsed)
-}
-
 impl TryFrom<&str> for Wrap<IpcCompression> {
     type Error = String;
 

From 9050ab1cbe63c69ac68c1e2bb3a60a59af0a1769 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:30:11 +0100
Subject: [PATCH 19/23] add arg `check_sortedness` in `join_asof()`

---
 R/000-wrappers.R                  | 4 ++--
 R/lazyframe-frame.R               | 6 +++++-
 src/init.c                        | 6 +++---
 src/rust/api.h                    | 2 +-
 src/rust/src/lazyframe/general.rs | 2 ++
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/R/000-wrappers.R b/R/000-wrappers.R
index 0b22a653..e9ad9a79 100644
--- a/R/000-wrappers.R
+++ b/R/000-wrappers.R
@@ -3440,12 +3440,12 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_neopolars__sealed")
 }
 
 `PlRLazyFrame_join_asof` <- function(self) {
-  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
+  function(`other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `check_sortedness`, `left_by` = NULL, `right_by` = NULL, `tolerance` = NULL, `tolerance_str` = NULL) {
     `other` <- .savvy_extract_ptr(`other`, "PlRLazyFrame")
     `left_on` <- .savvy_extract_ptr(`left_on`, "PlRExpr")
     `right_on` <- .savvy_extract_ptr(`right_on`, "PlRExpr")
     `tolerance` <- .savvy_extract_ptr(`tolerance`, "PlRSeries")
-    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
+    .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_join_asof__impl, `self`, `other`, `left_on`, `right_on`, `allow_parallel`, `force_parallel`, `suffix`, `coalesce`, `strategy`, `allow_eq`, `check_sortedness`, `left_by`, `right_by`, `tolerance`, `tolerance_str`))
   }
 }
 
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 884c2b83..35634bd7 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -2156,6 +2156,9 @@ lazyframe__with_row_index <- function(name = "index", offset = 0) {
 #' If `TRUE` (default), allow matching with the same on value (i.e.
 #' less-than-or-equal-to / greater-than-or-equal-to). Otherwise, don’t match
 #' the same on value (i.e., strictly less-than / strictly greater-than).
+#' @param check_sortedness Check the sortedness of the asof keys. If the keys
+#' are not sorted, polars will error, or raise a warning if the `by` argument
+#' is provided. This might become a hard error in the future.
 #'
 #' @inheritSection polars_duration_string Polars duration string language
 #' @examples
@@ -2246,7 +2249,8 @@ lazyframe__join_asof <- function(
     allow_parallel = TRUE,
     force_parallel = FALSE,
     coalesce = TRUE,
-    allow_exact_matches = TRUE) {
+    allow_exact_matches = TRUE,
+    check_sortedness = TRUE) {
   wrap({
     check_dots_empty0(...)
     strategy <- arg_match0(strategy, values = c("backward", "forward", "nearest"))
diff --git a/src/init.c b/src/init.c
index 489c3477..3f6dd4b9 100644
--- a/src/init.c
+++ b/src/init.c
@@ -2349,8 +2349,8 @@ SEXP savvy_PlRLazyFrame_join__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__l
     return handle_result(res);
 }
 
-SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
-    SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__allow_eq, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
+SEXP savvy_PlRLazyFrame_join_asof__impl(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__check_sortedness, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str) {
+    SEXP res = savvy_PlRLazyFrame_join_asof__ffi(self__, c_arg__other, c_arg__left_on, c_arg__right_on, c_arg__allow_parallel, c_arg__force_parallel, c_arg__suffix, c_arg__coalesce, c_arg__strategy, c_arg__allow_eq, c_arg__check_sortedness, c_arg__left_by, c_arg__right_by, c_arg__tolerance, c_arg__tolerance_str);
     return handle_result(res);
 }
 
@@ -3214,7 +3214,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"savvy_PlRLazyFrame_group_by__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by__impl, 3},
     {"savvy_PlRLazyFrame_group_by_dynamic__impl", (DL_FUNC) &savvy_PlRLazyFrame_group_by_dynamic__impl, 10},
     {"savvy_PlRLazyFrame_join__impl", (DL_FUNC) &savvy_PlRLazyFrame_join__impl, 11},
-    {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 14},
+    {"savvy_PlRLazyFrame_join_asof__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_asof__impl, 15},
     {"savvy_PlRLazyFrame_join_where__impl", (DL_FUNC) &savvy_PlRLazyFrame_join_where__impl, 4},
     {"savvy_PlRLazyFrame_max__impl", (DL_FUNC) &savvy_PlRLazyFrame_max__impl, 1},
     {"savvy_PlRLazyFrame_mean__impl", (DL_FUNC) &savvy_PlRLazyFrame_mean__impl, 1},
diff --git a/src/rust/api.h b/src/rust/api.h
index 3ca7b89e..cb1b8ddd 100644
--- a/src/rust/api.h
+++ b/src/rust/api.h
@@ -473,7 +473,7 @@ SEXP savvy_PlRLazyFrame_filter__ffi(SEXP self__, SEXP c_arg__predicate);
 SEXP savvy_PlRLazyFrame_group_by__ffi(SEXP self__, SEXP c_arg__by, SEXP c_arg__maintain_order);
 SEXP savvy_PlRLazyFrame_group_by_dynamic__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__every, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__label, SEXP c_arg__include_boundaries, SEXP c_arg__closed, SEXP c_arg__group_by, SEXP c_arg__start_by);
 SEXP savvy_PlRLazyFrame_join__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__join_nulls, SEXP c_arg__how, SEXP c_arg__suffix, SEXP c_arg__validate, SEXP c_arg__coalesce);
-SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
+SEXP savvy_PlRLazyFrame_join_asof__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__left_on, SEXP c_arg__right_on, SEXP c_arg__allow_parallel, SEXP c_arg__force_parallel, SEXP c_arg__suffix, SEXP c_arg__coalesce, SEXP c_arg__strategy, SEXP c_arg__allow_eq, SEXP c_arg__check_sortedness, SEXP c_arg__left_by, SEXP c_arg__right_by, SEXP c_arg__tolerance, SEXP c_arg__tolerance_str);
 SEXP savvy_PlRLazyFrame_join_where__ffi(SEXP self__, SEXP c_arg__other, SEXP c_arg__predicates, SEXP c_arg__suffix);
 SEXP savvy_PlRLazyFrame_max__ffi(SEXP self__);
 SEXP savvy_PlRLazyFrame_mean__ffi(SEXP self__);
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index ce162040..74aad3e7 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -407,6 +407,7 @@ impl PlRLazyFrame {
         coalesce: bool,
         strategy: &str,
         allow_eq: bool,
+        check_sortedness: bool,
         left_by: Option<StringSexp>,
         right_by: Option<StringSexp>,
         tolerance: Option<&PlRSeries>,
@@ -449,6 +450,7 @@ impl PlRLazyFrame {
                 tolerance,
                 tolerance_str: tolerance_str.map(|s| s.into()),
                 allow_eq,
+                check_sortedness,
             }))
             .suffix(suffix)
             .finish()

From 73277f0c2e59f74b8d6e6535d58b101a4ad9f441 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:30:18 +0100
Subject: [PATCH 20/23] redoc

---
 NAMESPACE                      |   1 -
 man/lazyframe__join_asof.Rd    |   7 +-
 man/lazyframe__sink_csv.Rd     | 137 ---------------------------------
 man/lazyframe__sink_ipc.Rd     |  86 ---------------------
 man/lazyframe__sink_ndjson.Rd  |  74 ------------------
 man/lazyframe__sink_parquet.Rd | 120 -----------------------------
 man/parquet_statistics.Rd      |  26 -------
 7 files changed, 6 insertions(+), 445 deletions(-)
 delete mode 100644 man/lazyframe__sink_csv.Rd
 delete mode 100644 man/lazyframe__sink_ipc.Rd
 delete mode 100644 man/lazyframe__sink_ndjson.Rd
 delete mode 100644 man/lazyframe__sink_parquet.Rd
 delete mode 100644 man/parquet_statistics.Rd

diff --git a/NAMESPACE b/NAMESPACE
index aadc2864..5aa01e8e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -145,7 +145,6 @@ export(is_polars_expr)
 export(is_polars_lf)
 export(is_polars_selector)
 export(is_polars_series)
-export(parquet_statistics)
 export(pl)
 import(rlang)
 useDynLib(neopolars, .registration = TRUE)
diff --git a/man/lazyframe__join_asof.Rd b/man/lazyframe__join_asof.Rd
index 748e957d..a0e5b054 100644
--- a/man/lazyframe__join_asof.Rd
+++ b/man/lazyframe__join_asof.Rd
@@ -19,7 +19,8 @@ lazyframe__join_asof(
   allow_parallel = TRUE,
   force_parallel = FALSE,
   coalesce = TRUE,
-  allow_exact_matches = TRUE
+  allow_exact_matches = TRUE,
+  check_sortedness = TRUE
 )
 }
 \arguments{
@@ -64,6 +65,10 @@ coalescing.
 If \code{TRUE} (default), allow matching with the same on value (i.e.
 less-than-or-equal-to / greater-than-or-equal-to). Otherwise, don’t match
 the same on value (i.e., strictly less-than / strictly greater-than).}
+
+\item{check_sortedness}{Check the sortedness of the asof keys. If the keys
+are not sorted, polars will error, or raise a warning if the \code{by} argument
+is provided. This might become a hard error in the future.}
 }
 \description{
 This is similar to a left-join except that we match on nearest key rather
diff --git a/man/lazyframe__sink_csv.Rd b/man/lazyframe__sink_csv.Rd
deleted file mode 100644
index 7810747f..00000000
--- a/man/lazyframe__sink_csv.Rd
+++ /dev/null
@@ -1,137 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__sink_csv}
-\alias{lazyframe__sink_csv}
-\title{Evaluate the query in streaming mode and write to a CSV file}
-\usage{
-lazyframe__sink_csv(
-  path,
-  ...,
-  include_bom = FALSE,
-  include_header = TRUE,
-  separator = ",",
-  line_terminator = "\\n",
-  quote_char = "\\"",
-  batch_size = 1024,
-  datetime_format = NULL,
-  date_format = NULL,
-  time_format = NULL,
-  float_precision = NULL,
-  null_value = "",
-  quote_style = "necessary",
-  maintain_order = TRUE,
-  type_coercion = TRUE,
-  predicate_pushdown = TRUE,
-  projection_pushdown = TRUE,
-  simplify_expression = TRUE,
-  slice_pushdown = TRUE,
-  no_optimization = FALSE,
-  storage_options = NULL,
-  retries = 2
-)
-}
-\arguments{
-\item{path}{A character. File path to which the file should be written.}
-
-\item{...}{These dots are for future extensions and must be empty.}
-
-\item{include_bom}{Logical, whether to include UTF-8 BOM in the CSV output.}
-
-\item{include_header}{Logical, hether to include header in the CSV output.}
-
-\item{separator}{Separate CSV fields with this symbol.}
-
-\item{line_terminator}{String used to end each row.}
-
-\item{quote_char}{Byte to use as quoting character.}
-
-\item{batch_size}{Number of rows that will be processed per thread.}
-
-\item{datetime_format}{A format string, with the specifiers defined by the
-\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
-Rust crate. If no format specified, the default fractional-second precision
-is inferred from the maximum timeunit found in the frame’s Datetime cols (if
-any).}
-
-\item{date_format}{A format string, with the specifiers defined by the
-\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
-Rust crate.}
-
-\item{time_format}{A format string, with the specifiers defined by the
-\href{https://docs.rs/chrono/latest/chrono/format/strftime/index.html}{chrono}
-Rust crate.}
-
-\item{float_precision}{Whether to use scientific form always (\code{TRUE}), never
-(\code{FALSE}), or automatically (\code{NULL}) for Float32 and Float64 datatypes.}
-
-\item{null_value}{A string representing null values (defaulting to the empty
-string).}
-
-\item{quote_style}{Determines the quoting strategy used. Must be one of:
-\itemize{
-\item \code{"necessary"} (default): This puts quotes around fields only when
-necessary. They are necessary when fields contain a quote, delimiter or
-record terminator. Quotes are also necessary when writing an empty record
-(which is indistinguishable from a record with one empty field). This is
-the default.
-\item \code{"always"}: This puts quotes around every field. Always.
-\item \code{"never"}: This never puts quotes around fields, even if that results in
-invalid CSV data (e.g.: by not quoting strings containing the separator).
-\item \code{"non_numeric"}: This puts quotes around all fields that are non-numeric.
-Namely, when writing a field that does not parse as a valid float or
-integer, then quotes will be used even if they aren`t strictly necessary.
-}}
-
-\item{maintain_order}{Maintain the order in which data is processed. Setting
-this to \code{FALSE} will be slightly faster.}
-
-\item{type_coercion}{A logical, indicats type coercion optimization.}
-
-\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
-
-\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
-
-\item{simplify_expression}{A logical, indicats simplify expression optimization.}
-
-\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
-
-\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
-
-\item{storage_options}{Named vector containing options that indicate how to
-connect to a cloud provider. The cloud providers currently supported are
-AWS, GCP, and Azure.
-See supported keys here:
-\itemize{
-\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
-\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
-\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
-\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
-\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
-variable.
-}
-
-If \code{storage_options} is not provided, Polars will try to infer the
-information from environment variables.}
-
-\item{retries}{Number of retries if accessing a cloud instance fails.}
-}
-\value{
-Invisibly returns the input LazyFrame
-}
-\description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
-
-This allows streaming results that are larger than RAM to be written to disk.
-}
-\examples{
-# sink table 'mtcars' from mem to CSV
-tmpf <- tempfile()
-pl$LazyFrame(mtcars)$sink_csv(tmpf)
-
-# stream a query end-to-end
-tmpf2 <- tempfile()
-pl$scan_csv(tmpf)$select(pl$col("cyl") * 2)$sink_csv(tmpf2)
-
-# load parquet directly into a DataFrame / memory
-pl$scan_csv(tmpf2)$collect()
-}
diff --git a/man/lazyframe__sink_ipc.Rd b/man/lazyframe__sink_ipc.Rd
deleted file mode 100644
index 2910291f..00000000
--- a/man/lazyframe__sink_ipc.Rd
+++ /dev/null
@@ -1,86 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__sink_ipc}
-\alias{lazyframe__sink_ipc}
-\title{Evaluate the query in streaming mode and write to an IPC file}
-\usage{
-lazyframe__sink_ipc(
-  path,
-  ...,
-  compression = c("zstd", "lz4", "uncompressed"),
-  maintain_order = TRUE,
-  type_coercion = TRUE,
-  predicate_pushdown = TRUE,
-  projection_pushdown = TRUE,
-  simplify_expression = TRUE,
-  slice_pushdown = TRUE,
-  no_optimization = FALSE,
-  storage_options = NULL,
-  retries = 2
-)
-}
-\arguments{
-\item{path}{A character. File path to which the file should be written.}
-
-\item{...}{These dots are for future extensions and must be empty.}
-
-\item{compression}{\code{NULL} or one of:
-\itemize{
-\item \code{"uncompressed"}: same as \code{NULL}.
-\item \code{"lz4"}: fast compression/decompression.
-\item \code{"zstd"}: good compression performance.
-}}
-
-\item{maintain_order}{Maintain the order in which data is processed. Setting
-this to \code{FALSE} will be slightly faster.}
-
-\item{type_coercion}{A logical, indicats type coercion optimization.}
-
-\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
-
-\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
-
-\item{simplify_expression}{A logical, indicats simplify expression optimization.}
-
-\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
-
-\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
-
-\item{storage_options}{Named vector containing options that indicate how to
-connect to a cloud provider. The cloud providers currently supported are
-AWS, GCP, and Azure.
-See supported keys here:
-\itemize{
-\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
-\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
-\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
-\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
-\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
-variable.
-}
-
-If \code{storage_options} is not provided, Polars will try to infer the
-information from environment variables.}
-
-\item{retries}{Number of retries if accessing a cloud instance fails.}
-}
-\value{
-Invisibly returns the input LazyFrame
-}
-\description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
-
-This allows streaming results that are larger than RAM to be written to disk.
-}
-\examples{
-# sink table 'mtcars' from mem to ipc
-tmpf <- tempfile()
-as_polars_lf(mtcars)$sink_ipc(tmpf)
-
-# stream a query end-to-end (not supported yet, https://github.com/pola-rs/polars/issues/1040)
-# tmpf2 = tempfile()
-# pl$scan_ipc(tmpf)$select(pl$col("cyl") * 2)$sink_ipc(tmpf2)
-
-# load ipc directly into a DataFrame / memory
-# pl$scan_ipc(tmpf2)$collect()
-}
diff --git a/man/lazyframe__sink_ndjson.Rd b/man/lazyframe__sink_ndjson.Rd
deleted file mode 100644
index 11ad024f..00000000
--- a/man/lazyframe__sink_ndjson.Rd
+++ /dev/null
@@ -1,74 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__sink_ndjson}
-\alias{lazyframe__sink_ndjson}
-\title{Evaluate the query in streaming mode and write to an NDJSON file}
-\usage{
-lazyframe__sink_ndjson(
-  path,
-  ...,
-  maintain_order = TRUE,
-  type_coercion = TRUE,
-  predicate_pushdown = TRUE,
-  projection_pushdown = TRUE,
-  simplify_expression = TRUE,
-  slice_pushdown = TRUE,
-  no_optimization = FALSE,
-  storage_options = NULL,
-  retries = 2
-)
-}
-\arguments{
-\item{path}{A character. File path to which the file should be written.}
-
-\item{...}{These dots are for future extensions and must be empty.}
-
-\item{maintain_order}{Maintain the order in which data is processed. Setting
-this to \code{FALSE} will be slightly faster.}
-
-\item{type_coercion}{A logical, indicats type coercion optimization.}
-
-\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
-
-\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
-
-\item{simplify_expression}{A logical, indicats simplify expression optimization.}
-
-\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
-
-\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
-
-\item{storage_options}{Named vector containing options that indicate how to
-connect to a cloud provider. The cloud providers currently supported are
-AWS, GCP, and Azure.
-See supported keys here:
-\itemize{
-\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
-\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
-\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
-\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
-\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
-variable.
-}
-
-If \code{storage_options} is not provided, Polars will try to infer the
-information from environment variables.}
-
-\item{retries}{Number of retries if accessing a cloud instance fails.}
-}
-\value{
-Invisibly returns the input LazyFrame
-}
-\description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
-
-This allows streaming results that are larger than RAM to be written to disk.
-}
-\examples{
-# sink table 'mtcars' from mem to NDJSON
-tmpf <- tempfile(fileext = ".ndjson")
-pl$LazyFrame(mtcars)$sink_ndjson(tmpf)
-
-# load parquet directly into a DataFrame / memory
-pl$scan_ndjson(tmpf)$collect()
-}
diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd
deleted file mode 100644
index 42c86ccb..00000000
--- a/man/lazyframe__sink_parquet.Rd
+++ /dev/null
@@ -1,120 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lazyframe-frame.R
-\name{lazyframe__sink_parquet}
-\alias{lazyframe__sink_parquet}
-\title{Evaluate the query in streaming mode and write to a Parquet file}
-\usage{
-lazyframe__sink_parquet(
-  path,
-  ...,
-  compression = "zstd",
-  compression_level = 3,
-  statistics = TRUE,
-  row_group_size = NULL,
-  data_page_size = NULL,
-  maintain_order = TRUE,
-  type_coercion = TRUE,
-  predicate_pushdown = TRUE,
-  projection_pushdown = TRUE,
-  simplify_expression = TRUE,
-  slice_pushdown = TRUE,
-  no_optimization = FALSE,
-  storage_options = NULL,
-  retries = 2
-)
-}
-\arguments{
-\item{path}{A character. File path to which the file should be written.}
-
-\item{...}{These dots are for future extensions and must be empty.}
-
-\item{compression}{The compression method. Must be one of:
-\itemize{
-\item \code{"lz4"}: fast compression/decompression.
-\item \code{"uncompressed"}
-\item \code{"snappy"}: this guarantees that the parquet file will be compatible with
-older parquet readers.
-\item \code{"gzip"}
-\item \code{"lzo"}
-\item \code{"brotli"}
-\item \code{"zstd"}: good compression performance.
-}}
-
-\item{compression_level}{\code{NULL} or integer. The level of compression to use.
-Only used if method is one of \code{"gzip"}, \code{"brotli"}, or \code{"zstd"}. Higher
-compression means smaller files on disk:
-\itemize{
-\item \code{"gzip"}: min-level: 0, max-level: 10.
-\item \code{"brotli"}: min-level: 0, max-level: 11.
-\item \code{"zstd"}: min-level: 1, max-level: 22.
-}}
-
-\item{statistics}{Whether statistics should be written to the Parquet
-headers. Possible values:
-\itemize{
-\item \code{TRUE}: enable default set of statistics (default)
-\item \code{FALSE}: disable all statistics
-\item A list created via \code{\link[=parquet_statistics]{parquet_statistics()}} to specify which statistics to
-include.
-}}
-
-\item{row_group_size}{Size of the row groups in number of rows. If \code{NULL}
-(default), the chunks of the DataFrame are used. Writing in smaller chunks
-may reduce memory pressure and improve writing speeds.}
-
-\item{data_page_size}{Size of the data page in bytes. If \code{NULL} (default), it
-is set to 1024^2 bytes.}
-
-\item{maintain_order}{Maintain the order in which data is processed. Setting
-this to \code{FALSE} will be slightly faster.}
-
-\item{type_coercion}{A logical, indicats type coercion optimization.}
-
-\item{predicate_pushdown}{A logical, indicats predicate pushdown optimization.}
-
-\item{projection_pushdown}{A logical, indicats projection pushdown optimization.}
-
-\item{simplify_expression}{A logical, indicats simplify expression optimization.}
-
-\item{slice_pushdown}{A logical, indicats slice pushdown optimization.}
-
-\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.}
-
-\item{storage_options}{Named vector containing options that indicate how to
-connect to a cloud provider. The cloud providers currently supported are
-AWS, GCP, and Azure.
-See supported keys here:
-\itemize{
-\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws}
-\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp}
-\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure}
-\item Hugging Face (\verb{hf://}): Accepts an API key under the token parameter
-\code{c(token = YOUR_TOKEN)} or by setting the \code{HF_TOKEN} environment
-variable.
-}
-
-If \code{storage_options} is not provided, Polars will try to infer the
-information from environment variables.}
-
-\item{retries}{Number of retries if accessing a cloud instance fails.}
-}
-\value{
-Invisibly returns the input LazyFrame
-}
-\description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
-
-This allows streaming results that are larger than RAM to be written to disk.
-}
-\examples{
-# sink table 'mtcars' from mem to parquet
-tmpf <- tempfile()
-as_polars_lf(mtcars)$sink_parquet(tmpf)
-
-# stream a query end-to-end
-tmpf2 <- tempfile()
-pl$scan_parquet(tmpf)$select(pl$col("cyl") * 2)$sink_parquet(tmpf2)
-
-# load parquet directly into a DataFrame / memory
-pl$scan_parquet(tmpf2)$collect()
-}
diff --git a/man/parquet_statistics.Rd b/man/parquet_statistics.Rd
deleted file mode 100644
index d6b672d7..00000000
--- a/man/parquet_statistics.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-various.R
-\name{parquet_statistics}
-\alias{parquet_statistics}
-\title{Prepare statistics for writing to Parquet file}
-\usage{
-parquet_statistics(
-  min = TRUE,
-  max = TRUE,
-  distinct_count = TRUE,
-  null_count = TRUE
-)
-}
-\arguments{
-\item{min}{Include stats on the minimum values in the column.}
-
-\item{max}{Include stats on the maximum values in the column.}
-
-\item{distinct_count}{Include stats on the number of distinct values in the
-column.}
-
-\item{null_count}{Include stats on the number of null values in the column.}
-}
-\description{
-Prepare statistics for writing to Parquet file
-}

From 975a569b3b3164bf0fe31b3ef24cc03c43ade4bb Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:33:18 +0100
Subject: [PATCH 21/23] remove conversion to IpcCompression

---
 src/rust/src/conversion/mod.rs | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index e25abeb9..e32a6592 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -698,19 +698,6 @@ impl TryFrom<&str> for Wrap<StartBy> {
     }
 }
 
-impl TryFrom<&str> for Wrap<IpcCompression> {
-    type Error = String;
-
-    fn try_from(compression: &str) -> Result<Self, String> {
-        let parsed = match compression {
-            "lz4" => IpcCompression::LZ4,
-            "zstd" => IpcCompression::ZSTD,
-            _ => return Err("unreachable".to_string()),
-        };
-        Ok(Wrap(parsed))
-    }
-}
-
 impl TryFrom<&str> for Wrap<QuoteStyle> {
     type Error = String;
 

From abf4366fafe3d8f13c12da9ca544e9674879f354 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Mon, 3 Feb 2025 11:33:25 +0100
Subject: [PATCH 22/23] typo

---
 src/rust/src/conversion/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index e32a6592..7c24b544 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -701,8 +701,8 @@ impl TryFrom<&str> for Wrap<StartBy> {
 impl TryFrom<&str> for Wrap<QuoteStyle> {
     type Error = String;
 
-    fn try_from(compression: &str) -> Result<Self, String> {
-        let parsed = match compression {
+    fn try_from(quote_style: &str) -> Result<Self, String> {
+        let parsed = match quote_style {
             "always" => QuoteStyle::Always,
             "necessary" => QuoteStyle::Necessary,
             "non_numeric" => QuoteStyle::NonNumeric,

From 37ce3de62ad1ae61e101bb5fb50b4e7972c83043 Mon Sep 17 00:00:00 2001
From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Date: Mon, 3 Feb 2025 12:16:32 +0100
Subject: [PATCH 23/23] refactor: fix some lints in Rust code (#76)

---
 src/rust/src/conversion/mod.rs      |  8 +++---
 src/rust/src/expr/general.rs        | 21 +++++----------
 src/rust/src/expr/rolling.rs        | 40 ++++++-----------------------
 src/rust/src/lazyframe/general.rs   | 39 +++++++++-------------------
 src/rust/src/series/construction.rs |  2 +-
 5 files changed, 31 insertions(+), 79 deletions(-)

diff --git a/src/rust/src/conversion/mod.rs b/src/rust/src/conversion/mod.rs
index 0d6a0ab7..1e0f83ff 100644
--- a/src/rust/src/conversion/mod.rs
+++ b/src/rust/src/conversion/mod.rs
@@ -625,13 +625,13 @@ impl TryFrom<StringSexp> for Wrap<NullValues> {
             let names = null_values.get_names().unwrap();
             let res = names
                 .into_iter()
-                .zip(values.into_iter())
+                .zip(values)
                 .map(|(xi, yi)| (xi.into(), yi.into()))
                 .collect::<Vec<(PlSmallStr, PlSmallStr)>>();
-            return Ok(Wrap(NullValues::Named(res)));
+            Ok(Wrap(NullValues::Named(res)))
         } else if null_values.len() == 1 {
             let vals = null_values.to_vec();
-            let val = *(vals.get(0).unwrap());
+            let val = *(vals.first().unwrap());
             return Ok(Wrap(NullValues::AllColumnsSingle(val.into())));
         } else {
             let vals = null_values
@@ -639,7 +639,7 @@ impl TryFrom<StringSexp> for Wrap<NullValues> {
                 .into_iter()
                 .map(|x| x.into())
                 .collect::<Vec<PlSmallStr>>();
-            return Ok(Wrap(NullValues::AllColumns(vals.into())));
+            return Ok(Wrap(NullValues::AllColumns(vals)));
         }
     }
 }
diff --git a/src/rust/src/expr/general.rs b/src/rust/src/expr/general.rs
index 1ba2a768..c7f04436 100644
--- a/src/rust/src/expr/general.rs
+++ b/src/rust/src/expr/general.rs
@@ -767,7 +767,7 @@ impl PlRExpr {
 
     fn backward_fill(&self, limit: Option<NumericScalar>) -> Result<Self> {
         let limit: FillNullLimit = match limit {
-            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0.into()),
+            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0),
             None => None,
         };
         Ok(self.inner.clone().backward_fill(limit).into())
@@ -775,7 +775,7 @@ impl PlRExpr {
 
     fn forward_fill(&self, limit: Option<NumericScalar>) -> Result<Self> {
         let limit: FillNullLimit = match limit {
-            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0.into()),
+            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0),
             None => None,
         };
         Ok(self.inner.clone().forward_fill(limit).into())
@@ -800,7 +800,7 @@ impl PlRExpr {
         limit: Option<NumericScalar>,
     ) -> Result<Self> {
         let limit: FillNullLimit = match limit {
-            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0.into()),
+            Some(x) => Some(<Wrap<u32>>::try_from(x)?.0),
             None => None,
         };
         let strategy = parse_fill_null_strategy(strategy, limit)?;
@@ -870,10 +870,7 @@ impl PlRExpr {
         labels: Option<StringSexp>,
     ) -> Result<Self> {
         let breaks: Vec<f64> = breaks.as_slice_f64().into();
-        let labels = match labels {
-            Some(x) => Some(x.to_vec()),
-            None => None,
-        };
+        let labels = labels.map(|x| x.to_vec());
         Ok(self
             .inner
             .clone()
@@ -890,10 +887,7 @@ impl PlRExpr {
         labels: Option<StringSexp>,
     ) -> Result<Self> {
         let probs: Vec<f64> = probs.as_slice_f64().into();
-        let labels = match labels {
-            Some(x) => Some(x.to_vec()),
-            None => None,
-        };
+        let labels = labels.map(|x| x.to_vec());
         Ok(self
             .inner
             .clone()
@@ -910,10 +904,7 @@ impl PlRExpr {
         labels: Option<StringSexp>,
     ) -> Result<Self> {
         let n_bins = <Wrap<usize>>::try_from(n_bins)?.0;
-        let labels = match labels {
-            Some(x) => Some(x.to_vec()),
-            None => None,
-        };
+        let labels = labels.map(|x| x.to_vec());
         Ok(self
             .inner
             .clone()
diff --git a/src/rust/src/expr/rolling.rs b/src/rust/src/expr/rolling.rs
index 14df71ed..4a15c370 100644
--- a/src/rust/src/expr/rolling.rs
+++ b/src/rust/src/expr/rolling.rs
@@ -12,10 +12,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -60,10 +57,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -108,10 +102,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -156,10 +147,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -208,10 +196,7 @@ impl PlRExpr {
     ) -> Result<Self> {
         let ddof = <Wrap<u8>>::try_from(ddof)?.0;
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -262,10 +247,7 @@ impl PlRExpr {
     ) -> Result<Self> {
         let ddof = <Wrap<u8>>::try_from(ddof)?.0;
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -314,10 +296,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
             None => window_size,
@@ -364,10 +343,7 @@ impl PlRExpr {
         min_periods: Option<NumericScalar>,
     ) -> Result<Self> {
         let window_size = <Wrap<usize>>::try_from(window_size)?.0;
-        let weights: Option<Vec<f64>> = match weights {
-            Some(x) => Some(x.as_slice_f64().into()),
-            None => None,
-        };
+        let weights: Option<Vec<f64>> = weights.map(|x| x.as_slice_f64().into());
         let interpolation = <Wrap<QuantileMethod>>::try_from(interpolation)?.0;
         let min_periods: usize = match min_periods {
             Some(x) => <Wrap<usize>>::try_from(x)?.0,
diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs
index db7c2c07..997bcd4b 100644
--- a/src/rust/src/lazyframe/general.rs
+++ b/src/rust/src/lazyframe/general.rs
@@ -383,13 +383,10 @@ impl PlRLazyFrame {
             .copied()
             .map_err(RPolarsErr::from)?;
 
-        let row_index = match row_index_name {
-            Some(x) => Some(RowIndex {
+        let row_index = row_index_name.map(|x| RowIndex {
                 name: x.into(),
                 offset: row_index_offset,
-            }),
-            None => None,
-        };
+            });
 
         let overwrite_dtype = match overwrite_dtype {
             Some(x) => Some(<Wrap<Schema>>::try_from(x)?.0),
@@ -406,9 +403,7 @@ impl PlRLazyFrame {
         let cloud_options = match storage_options {
             Some(x) => {
                 let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                    RPolarsErr::Other(format!(
-                        "`storage_options` must be a named character vector"
-                    ))
+                    RPolarsErr::Other("`storage_options` must be a named character vector".to_string())
                 })?;
                 Some(out.0)
             }
@@ -509,13 +504,10 @@ impl PlRLazyFrame {
                 None => None,
             };
 
-            let row_index = match row_index_name {
-                Some(x) => Some(RowIndex {
+            let row_index = row_index_name.map(|x| RowIndex {
                     name: x.into(),
                     offset: row_index_offset,
-                }),
-                None => None,
-            };
+                });
 
             let hive_options = HiveOptions {
                 enabled: hive_partitioning,
@@ -533,7 +525,7 @@ impl PlRLazyFrame {
                 low_memory,
                 cloud_options: None,
                 use_statistics,
-                schema: schema.map(|x| Arc::new(x)),
+                schema: schema.map(Arc::new),
                 hive_options,
                 glob,
                 include_file_paths: include_file_paths.map(|x| x.into()),
@@ -547,9 +539,7 @@ impl PlRLazyFrame {
             let cloud_options = match storage_options {
                 Some(x) => {
                     let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                        RPolarsErr::Other(format!(
-                            "`storage_options` must be a named character vector"
-                        ))
+                        RPolarsErr::Other("`storage_options` must be a named character vector".to_string())
                     })?;
                     Some(out.0)
                 }
@@ -631,13 +621,10 @@ impl PlRLazyFrame {
                 None => None,
             };
 
-            let row_index = match row_index_name {
-                Some(x) => Some(RowIndex {
+            let row_index = row_index_name.map(|x| RowIndex {
                     name: x.into(),
                     offset: row_index_offset,
-                }),
-                None => None,
-            };
+                });
 
             let first_path = source.first().unwrap().clone().into();
 
@@ -648,9 +635,7 @@ impl PlRLazyFrame {
             let cloud_options = match storage_options {
                 Some(x) => {
                     let out = <Wrap<Vec<(String, String)>>>::try_from(x).map_err(|_| {
-                        RPolarsErr::Other(format!(
-                            "`storage_options` must be a named character vector"
-                        ))
+                        RPolarsErr::Other("`storage_options` must be a named character vector".to_string())
                     })?;
                     Some(out.0)
                 }
@@ -682,8 +667,8 @@ impl PlRLazyFrame {
                 .with_n_rows(n_rows)
                 .low_memory(low_memory)
                 .with_rechunk(rechunk)
-                .with_schema(schema.map(|schema| Arc::new(schema)))
-                .with_schema_overwrite(schema_overrides.map(|x| Arc::new(x)))
+                .with_schema(schema.map(Arc::new))
+                .with_schema_overwrite(schema_overrides.map(Arc::new))
                 .with_row_index(row_index)
                 .with_ignore_errors(ignore_errors)
                 .with_include_file_paths(include_file_paths.map(|x| x.into()))
diff --git a/src/rust/src/series/construction.rs b/src/rust/src/series/construction.rs
index 9c0da0bf..f4379951 100644
--- a/src/rust/src/series/construction.rs
+++ b/src/rust/src/series/construction.rs
@@ -226,7 +226,7 @@ impl PlRSeries {
                 } else {
                     let left_u32 = *l as u32;
                     let right_u32 = *r as u32;
-                    let out_u64 = (left_u32 as u64) << 32 | right_u32 as u64;
+                    let out_u64 = ((left_u32 as u64) << 32) | right_u32 as u64;
                     Some(
                         i64::from_ne_bytes(
                             (out_u64.wrapping_sub(9_223_372_036_854_775_808u64)).to_ne_bytes(),