Skip to content

Commit

Permalink
migrate string functions to inovke_with_args (#14722)
Browse files Browse the repository at this point in the history
* migrate string functions to inovke_with_args

* move clone of args in bench out of black_box

* modify obsolete calls in to_hex bench
  • Loading branch information
zjregee authored Feb 18, 2025
1 parent 04dc656 commit 2be19e5
Show file tree
Hide file tree
Showing 28 changed files with 293 additions and 231 deletions.
15 changes: 12 additions & 3 deletions datafusion/functions/benches/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
// under the License.

use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
use arrow::util::bench_util::create_string_array_with_len;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string::concat;
use std::sync::Arc;

Expand All @@ -39,8 +40,16 @@ fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("concat function");
group.bench_function(BenchmarkId::new("concat", size), |b| {
b.iter(|| {
// TODO use invoke_with_args
criterion::black_box(concat().invoke_batch(&args, size).unwrap())
let args_cloned = args.clone();
criterion::black_box(
concat()
.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
})
.unwrap(),
)
})
});
group.finish();
Expand Down
51 changes: 38 additions & 13 deletions datafusion/functions/benches/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
extern crate criterion;

use arrow::array::{ArrayRef, StringArray, StringViewBuilder};
use arrow::datatypes::DataType;
use arrow::util::bench_util::{
create_string_array_with_len, create_string_view_array_with_len,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string;
use std::sync::Arc;

Expand Down Expand Up @@ -125,8 +126,12 @@ fn criterion_benchmark(c: &mut Criterion) {
let args = create_args1(size, 32);
c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
})
});

Expand All @@ -135,8 +140,12 @@ fn criterion_benchmark(c: &mut Criterion) {
&format!("lower_the_first_value_is_nonascii: {}", size),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -146,8 +155,12 @@ fn criterion_benchmark(c: &mut Criterion) {
&format!("lower_the_middle_value_is_nonascii: {}", size),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -167,8 +180,12 @@ fn criterion_benchmark(c: &mut Criterion) {
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
size, str_len, null_density, mixed),
|b| b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs{
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
}),
);

Expand All @@ -177,8 +194,12 @@ fn criterion_benchmark(c: &mut Criterion) {
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
size, str_len, null_density, mixed),
|b| b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs{
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
}),
);

Expand All @@ -187,8 +208,12 @@ fn criterion_benchmark(c: &mut Criterion) {
&format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
size, str_len, 0.1, null_density, mixed),
|b| b.iter(|| {
// TODO use invoke_with_args
black_box(lower.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(lower.invoke_with_args(ScalarFunctionArgs{
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
}),
);
}
Expand Down
11 changes: 8 additions & 3 deletions datafusion/functions/benches/ltrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
extern crate criterion;

use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
use arrow::datatypes::DataType;
use criterion::{
black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup,
Criterion, SamplingMode,
};
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarUDF};
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
use datafusion_functions::string;
use rand::{distributions::Alphanumeric, rngs::StdRng, Rng, SeedableRng};
use std::{fmt, sync::Arc};
Expand Down Expand Up @@ -141,8 +142,12 @@ fn run_with_string_type<M: Measurement>(
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(ltrim.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(ltrim.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
})
},
);
Expand Down
59 changes: 44 additions & 15 deletions datafusion/functions/benches/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
extern crate criterion;

use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
use arrow::datatypes::DataType;
use arrow::util::bench_util::{
create_string_array_with_len, create_string_view_array_with_len,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string;
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -73,8 +74,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, repeat_times as usize))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -87,8 +92,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, repeat_times as usize))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -101,8 +110,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, repeat_times as usize))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -124,8 +137,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, repeat_times as usize))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -138,8 +155,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -152,8 +173,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, repeat_times as usize))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand All @@ -175,8 +200,12 @@ fn criterion_benchmark(c: &mut Criterion) {
),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(repeat.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: repeat_times as usize,
return_type: &DataType::Utf8,
}))
})
},
);
Expand Down
32 changes: 25 additions & 7 deletions datafusion/functions/benches/to_hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@

extern crate criterion;

use arrow::{
datatypes::{Int32Type, Int64Type},
util::bench_util::create_primitive_array,
};
use arrow::datatypes::{DataType, Int32Type, Int64Type};
use arrow::util::bench_util::create_primitive_array;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string;
use std::sync::Arc;

Expand All @@ -33,13 +31,33 @@ fn criterion_benchmark(c: &mut Criterion) {
let batch_len = i32_array.len();
let i32_args = vec![ColumnarValue::Array(i32_array)];
c.bench_function(&format!("to_hex i32 array: {}", size), |b| {
b.iter(|| black_box(hex.invoke_batch(&i32_args, batch_len).unwrap()))
b.iter(|| {
let args_cloned = i32_args.clone();
black_box(
hex.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: batch_len,
return_type: &DataType::Utf8,
})
.unwrap(),
)
})
});
let i64_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.2));
let batch_len = i64_array.len();
let i64_args = vec![ColumnarValue::Array(i64_array)];
c.bench_function(&format!("to_hex i64 array: {}", size), |b| {
b.iter(|| black_box(hex.invoke_batch(&i64_args, batch_len).unwrap()))
b.iter(|| {
let args_cloned = i64_args.clone();
black_box(
hex.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: batch_len,
return_type: &DataType::Utf8,
})
.unwrap(),
)
})
});
}

Expand Down
11 changes: 8 additions & 3 deletions datafusion/functions/benches/upper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@

extern crate criterion;

use arrow::datatypes::DataType;
use arrow::util::bench_util::create_string_array_with_len;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string;
use std::sync::Arc;

Expand All @@ -38,8 +39,12 @@ fn criterion_benchmark(c: &mut Criterion) {
let args = create_args(size, 32);
c.bench_function("upper_all_values_are_ascii", |b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(upper.invoke_batch(&args, size))
let args_cloned = args.clone();
black_box(upper.invoke_with_args(ScalarFunctionArgs {
args: args_cloned,
number_rows: size,
return_type: &DataType::Utf8,
}))
})
});
}
Expand Down
10 changes: 9 additions & 1 deletion datafusion/functions/benches/uuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,21 @@

extern crate criterion;

use arrow::datatypes::DataType;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ScalarFunctionArgs;
use datafusion_functions::string;

fn criterion_benchmark(c: &mut Criterion) {
let uuid = string::uuid();
c.bench_function("uuid", |b| {
b.iter(|| black_box(uuid.invoke_batch(&[], 1024)))
b.iter(|| {
black_box(uuid.invoke_with_args(ScalarFunctionArgs {
args: vec![],
number_rows: 1024,
return_type: &DataType::Utf8,
}))
})
});
}

Expand Down
Loading

0 comments on commit 2be19e5

Please sign in to comment.