From 1018067defe54c8b5095b1165ecf6d1e30c189ad Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 20 Feb 2025 17:22:36 +0200 Subject: [PATCH 1/3] feat: add to concat different data types error message the data types --- arrow-select/src/concat.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs index 050f4ae96a8a..87092e2160a3 100644 --- a/arrow-select/src/concat.rs +++ b/arrow-select/src/concat.rs @@ -37,7 +37,7 @@ use arrow_array::*; use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer}; use arrow_data::transform::{Capacities, MutableArrayData}; use arrow_schema::{ArrowError, DataType, FieldRef, SchemaRef}; -use std::sync::Arc; +use std::{collections::HashSet, sync::Arc}; fn binary_capacity(arrays: &[&dyn Array]) -> Capacities { let mut item_capacity = 0; @@ -223,8 +223,22 @@ pub fn concat(arrays: &[&dyn Array]) -> Result { let d = arrays[0].data_type(); if arrays.iter().skip(1).any(|array| array.data_type() != d) { + // Get all the unique data types + let input_data_types = { + let unique = arrays + .iter() + .map(|array| array.data_type()) + .collect::>(); + + unique + .iter() + .map(|dt| format!("{dt}")) + .collect::>() + .join(", ") + }; + return Err(ArrowError::InvalidArgumentError( - "It is not possible to concatenate arrays of different data types.".to_string(), + format!("It is not possible to concatenate arrays of different data types ({input_data_types})."), )); } @@ -342,7 +356,13 @@ mod tests { &PrimitiveArray::::from(vec![Some(-1), Some(2), None]), &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), ]); - assert!(re.is_err()); + + match re.expect_err("concat should have failed") { + ArrowError::InvalidArgumentError(desc) => { + assert_eq!(desc, "It is not possible to concatenate arrays of different data types (Int64, Utf8)."); + } + _ => panic!("Expected InvalidArgumentError"), + } } #[test] From baa63194f0e0855898b303d7c469d610ba63d7eb Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:11:32 +0200 Subject: [PATCH 2/3] improve test, fix it and made the names to be in the order of appearance --- arrow-select/src/concat.rs | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs index 87092e2160a3..2f127b63ba5f 100644 --- a/arrow-select/src/concat.rs +++ b/arrow-select/src/concat.rs @@ -225,16 +225,18 @@ pub fn concat(arrays: &[&dyn Array]) -> Result { if arrays.iter().skip(1).any(|array| array.data_type() != d) { // Get all the unique data types let input_data_types = { - let unique = arrays - .iter() - .map(|array| array.data_type()) - .collect::>(); - - unique - .iter() - .map(|dt| format!("{dt}")) - .collect::>() - .join(", ") + let (names, _) = arrays.iter().map(|array| array.data_type()).fold( + (vec![], HashSet::<&DataType>::new()), + |(mut names, mut unique_data_types), dt| { + if unique_data_types.insert(dt) { + names.push(format!("{}", dt)); + } + + (names, unique_data_types) + }, + ); + + names.join(", ") }; return Err(ArrowError::InvalidArgumentError( @@ -354,15 +356,14 @@ mod tests { fn test_concat_incompatible_datatypes() { let re = concat(&[ &PrimitiveArray::::from(vec![Some(-1), Some(2), None]), + // 2 string to make sure we only mention unique types &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), + &StringArray::from(vec![Some("hey"), Some(""), Some("you")]), + // Another type to make sure we are showing all the incompatible types + &PrimitiveArray::::from(vec![Some(-1), Some(2), None]), ]); - match re.expect_err("concat should have failed") { - ArrowError::InvalidArgumentError(desc) => { - assert_eq!(desc, "It is not possible to concatenate arrays of different data types (Int64, Utf8)."); - } - _ => panic!("Expected InvalidArgumentError"), - } + assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."); } #[test] @@ -944,7 +945,7 @@ mod tests { .unwrap(); let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err(); - assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types."); + assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."); } #[test] From fd03ad3c2985e79f6f1bec78725a084f4e5e5065 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:21:45 +0200 Subject: [PATCH 3/3] simplify --- arrow-select/src/concat.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs index 2f127b63ba5f..2187ee49e4eb 100644 --- a/arrow-select/src/concat.rs +++ b/arrow-select/src/concat.rs @@ -225,16 +225,21 @@ pub fn concat(arrays: &[&dyn Array]) -> Result { if arrays.iter().skip(1).any(|array| array.data_type() != d) { // Get all the unique data types let input_data_types = { - let (names, _) = arrays.iter().map(|array| array.data_type()).fold( - (vec![], HashSet::<&DataType>::new()), - |(mut names, mut unique_data_types), dt| { - if unique_data_types.insert(dt) { - names.push(format!("{}", dt)); - } + let unique = arrays + .iter() + .map(|array| array.data_type()) + .collect::>(); + + // Allow unused mut as we need it for tests + #[allow(unused_mut)] + let mut names = unique + .into_iter() + .map(|dt| format!("{dt}")) + .collect::>(); - (names, unique_data_types) - }, - ); + // Only sort in tests to make the error message is deterministic + #[cfg(test)] + names.sort(); names.join(", ") }; @@ -363,7 +368,7 @@ mod tests { &PrimitiveArray::::from(vec![Some(-1), Some(2), None]), ]); - assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."); + assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Int64, Utf8)."); } #[test]