From ddaf3932c79c5cc0df0b3fb29e321b3a56352c92 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 22 May 2024 21:10:56 +0200 Subject: [PATCH] docs(rust): Update outdated performance section (#16409) --- crates/polars/src/docs/mod.rs | 1 - crates/polars/src/docs/performance.rs | 101 -------------------------- crates/polars/src/lib.rs | 14 ++-- 3 files changed, 9 insertions(+), 107 deletions(-) delete mode 100644 crates/polars/src/docs/performance.rs diff --git a/crates/polars/src/docs/mod.rs b/crates/polars/src/docs/mod.rs index f2c7ba77c0f1..be809c6ea356 100644 --- a/crates/polars/src/docs/mod.rs +++ b/crates/polars/src/docs/mod.rs @@ -1,3 +1,2 @@ pub mod eager; pub mod lazy; -pub mod performance; diff --git a/crates/polars/src/docs/performance.rs b/crates/polars/src/docs/performance.rs deleted file mode 100644 index 647d7bc3ada3..000000000000 --- a/crates/polars/src/docs/performance.rs +++ /dev/null @@ -1,101 +0,0 @@ -//! # Performance -//! -//! Understanding the memory format used by Arrow/Polars can really increase performance of your -//! queries. This is especially true for large string data. The figure below shows how an Arrow UTF8 -//! array is laid out in memory. -//! -//! The array `["foo", "bar", "ham"]` is encoded by -//! -//! * a concatenated string `"foobarham"` -//! * an offset array indicating the start (and end) of each string `[0, 2, 5, 8]` -//! * a null bitmap, indicating null values -//! -//! ![](https://raw.githubusercontent.com/pola-rs/polars-static/master/docs/arrow-string.svg) -//! -//! This memory structure is very cache efficient if we are to read the string values. Especially if -//! we compare it to a [`Vec`]. -//! -//! ![](https://raw.githubusercontent.com/pola-rs/polars-static/master/docs/pandas-string.svg) -//! -//! However, if we need to reorder the Arrow UTF8 array, we need to swap around all the bytes of the -//! string values, which can become very expensive when we're dealing with large strings. On the -//! other hand, for the [`Vec`], we only need to swap pointers around which is only 8 bytes data -//! that have to be moved. -//! -//! If you have a [`DataFrame`] with a large number of -//! [`StringChunked`] columns and you need to reorder them due to an -//! operation like a FILTER, JOIN, GROUPBY, etc. than this can become quite expensive. -//! -//! ## Categorical type -//! For this reason Polars has a [`CategoricalType`]. -//! A [`CategoricalChunked`] is an array filled with `u32` values that each represent a unique string value. -//! Thereby maintaining cache-efficiency, whilst also making it cheap to move values around. -//! -//! [`DataFrame`]: crate::frame::DataFrame -//! [`StringChunked`]: crate::datatypes::StringChunked -//! [`CategoricalType`]: crate::datatypes::CategoricalType -//! [`CategoricalChunked`]: crate::datatypes::CategoricalChunked -//! -//! ### Example: Single DataFrame -//! -//! In the example below we show how you can cast a [`StringChunked`] column to a [`CategoricalChunked`]. -//! -//! ```rust -//! use polars::prelude::*; -//! -//! fn example(path: &str) -> PolarsResult { -//! let mut df = CsvReader::from_path(path)? -//! .finish()?; -//! -//! df.try_apply("string-column", |s| s.categorical().cloned())?; -//! Ok(df) -//! } -//! -//! ``` -//! -//! ### Example: Eager join multiple DataFrames on a Categorical -//! When the strings of one column need to be joined with the string data from another [`DataFrame`]. -//! The [`Categorical`] data needs to be synchronized (Categories in df A need to point to the same -//! underlying string data as Categories in df B). You can do that by turning the global string cache -//! on. -//! -//! [`Categorical`]: crate::datatypes::CategoricalChunked -//! -//! ```rust -//! use polars::prelude::*; -//! use polars::enable_string_cache; -//! -//! fn example(mut df_a: DataFrame, mut df_b: DataFrame) -> PolarsResult { -//! // Set a global string cache -//! enable_string_cache(); -//! -//! df_a.try_apply("a", |s| s.categorical().cloned())?; -//! df_b.try_apply("b", |s| s.categorical().cloned())?; -//! df_a.join(&df_b, ["a"], ["b"], JoinArgs::new(JoinType::Inner)) -//! } -//! ``` -//! -//! ### Example: Lazy join multiple DataFrames on a Categorical -//! A lazy Query always has a global string cache (unless you opt-out) for the duration of that query (until [`collect`] is called). -//! The example below shows how you could join two [`DataFrame`]s with [`Categorical`] types. -//! -//! [`collect`]: polars_lazy::frame::LazyFrame::collect -//! -//! ```rust -//! # #[cfg(feature = "lazy")] -//! # { -//! use polars::prelude::*; -//! -//! fn lazy_example(mut df_a: LazyFrame, mut df_b: LazyFrame) -> PolarsResult { -//! -//! let q1 = df_a.with_columns(vec![ -//! col("a").cast(DataType::Categorical(None)), -//! ]); -//! -//! let q2 = df_b.with_columns(vec![ -//! col("b").cast(DataType::Categorical(None)) -//! ]); -//! q1.inner_join(q2, col("a"), col("b")).collect() -//! } -//! # } -//! ``` diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs index 569dea1d6f26..fd01e7301f00 100644 --- a/crates/polars/src/lib.rs +++ b/crates/polars/src/lib.rs @@ -315,13 +315,17 @@ //! * `dtype-full` - all opt-in dtypes. //! * `dtype-slim` - slim preset of opt-in dtypes. //! -//! ## Performance and string data -//! Large string data can really slow down your queries. -//! Read more in the [performance section](crate::docs::performance) +//! ## Performance +//! To gains most performance out of Polars we recommend compiling on a nightly compiler +//! with the features `simd` and `performant` activated. The activated cpu features also influence +//! the amount of simd acceleration we can use. +//! +//! See this the features we activate for our python builds, or if you just run locally and want to +//! use all available features on your cpu, set `RUSTFLAGS='-C target-cpu=native'`. //! //! ### Custom allocator -//! A DataFrame library naturally does a lot of heap allocations. It is recommended to use a custom -//! allocator. +//! An OLAP query engine does a lot of heap allocations. It is recommended to use a custom +//! allocator, (we have found this to have up to ~25% runtime influence). //! [JeMalloc](https://crates.io/crates/jemallocator) and //! [Mimalloc](https://crates.io/crates/mimalloc) for instance, show a significant //! performance gain in runtime as well as memory usage.