From be0fc6c40e6a787b0bb53c4719fc7963e4e699fb Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 30 Jul 2024 11:07:13 +0200 Subject: [PATCH] split arrow args --- .../src/executors/scan/python_scan.rs | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/polars-mem-engine/src/executors/scan/python_scan.rs b/crates/polars-mem-engine/src/executors/scan/python_scan.rs index ace26f9f591d..190120cb1f3e 100644 --- a/crates/polars-mem-engine/src/executors/scan/python_scan.rs +++ b/crates/polars-mem-engine/src/executors/scan/python_scan.rs @@ -64,21 +64,26 @@ impl Executor for PythonScanExec { }, }; - let batch_size = if self.options.is_pyarrow { - None + let generator_init = if self.options.is_pyarrow { + let args = (python_scan_function, with_columns, predicate, n_rows); + callable.call1(args).map_err(to_compute_err) } else { - Some(100_000usize) - }; - - let generator_init = callable - .call1(( + // If there are filters, take smaller chunks to ensure we can keep memory + // pressure low. + let batch_size = if self.predicate.is_some() { + Some(100_000usize) + } else { + None + }; + let args = ( python_scan_function, with_columns, predicate, n_rows, batch_size, - )) - .map_err(to_compute_err)?; + ); + callable.call1(args).map_err(to_compute_err) + }?; // This isn't a generator, but a `DataFrame`. if generator_init.getattr(intern!(py, "_df")).is_ok() {