diff --git a/fuzz/fuzz_targets/array_ops.rs b/fuzz/fuzz_targets/array_ops.rs index fedc61857c..e4362f3886 100644 --- a/fuzz/fuzz_targets/array_ops.rs +++ b/fuzz/fuzz_targets/array_ops.rs @@ -42,7 +42,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { } Action::SearchSorted(s, side) => { // TODO(robert): Ideally we'd preserve the encoding perfectly but this is close enough - let mut sorted = sort_canonical_array(¤t_array); + let mut sorted = sort_canonical_array(¤t_array).unwrap(); if !HashSet::from([ &PrimitiveEncoding as EncodingRef, &VarBinEncoding, @@ -85,8 +85,8 @@ fn assert_search_sorted( ) { let search_result = search_sorted(&array, s.clone(), side).unwrap(); assert_eq!( - search_result, expected, + search_result, "Expected to find {s}({}) at {expected} in {} from {side} but instead found it at {search_result} in step {step}", s.dtype(), array.tree_display() diff --git a/fuzz/fuzz_targets/file_io.rs b/fuzz/fuzz_targets/file_io.rs index 71fe2c3b64..eb02ae8fd7 100644 --- a/fuzz/fuzz_targets/file_io.rs +++ b/fuzz/fuzz_targets/file_io.rs @@ -9,7 +9,7 @@ use libfuzzer_sys::{fuzz_target, Corpus}; use vortex_array::array::ChunkedArray; use vortex_array::compute::{compare, Operator}; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical}; -use vortex_dtype::Nullability; +use vortex_dtype::DType; use vortex_file::{Scan, VortexOpenOptions, VortexWriteOptions}; use vortex_sampling_compressor::ALL_ENCODINGS_CONTEXT; @@ -18,7 +18,7 @@ fuzz_target!(|array_data: ArrayData| -> Corpus { return Corpus::Reject; } - if array_data.dtype().nullability() != Nullability::NonNullable { + if has_nullable_struct(array_data.dtype()) { return Corpus::Reject; } @@ -68,25 +68,31 @@ fuzz_target!(|array_data: ArrayData| -> Corpus { Corpus::Keep }); -fn compare_struct(lhs: ArrayData, rhs: ArrayData) { - assert!(lhs.dtype().eq_ignore_nullability(rhs.dtype())); - assert_eq!(lhs.len(), rhs.len(), "Arrays length isn't preserved"); +fn compare_struct(expected: ArrayData, actual: ArrayData) { + assert_eq!( + expected.dtype(), + actual.dtype(), + "DTypes aren't preserved expected {}, actual {}", + expected.dtype(), + actual.dtype() + ); + assert_eq!( + expected.len(), + actual.len(), + "Arrays length isn't preserved expected: {}, actual: {}", + expected.len(), + actual.len() + ); - if lhs.dtype().as_struct().unwrap().names().len() == 0 - && lhs.dtype().as_struct().unwrap().names().len() - == rhs.dtype().as_struct().unwrap().names().len() + if expected.dtype().as_struct().unwrap().names().len() == 0 + && expected.dtype().as_struct().unwrap().names().len() + == actual.dtype().as_struct().unwrap().names().len() { return; } - if let Some(st) = lhs.dtype().as_struct() { - if st.names().len() == 0 { - return; - } - } - - let arrow_lhs = lhs.clone().into_arrow().unwrap(); - let arrow_rhs = rhs.clone().into_arrow().unwrap(); + let arrow_lhs = expected.clone().into_arrow().unwrap(); + let arrow_rhs = actual.clone().into_arrow().unwrap(); let cmp_fn = make_comparator(&arrow_lhs, &arrow_rhs, SortOptions::default()).unwrap(); @@ -99,8 +105,16 @@ fn compare_struct(lhs: ArrayData, rhs: ArrayData) { assert_eq!( bool_builder.finish().count_set_bits(), arrow_lhs.len(), - "\nLHS: {}RHS: {}", - lhs.tree_display(), - rhs.tree_display() + "\nEXPECTED: {}ACTUAL: {}", + expected.tree_display(), + actual.tree_display() ); } + +fn has_nullable_struct(dtype: &DType) -> bool { + dtype.is_nullable() + || dtype + .as_struct() + .map(|sdt| sdt.dtypes().any(|dtype| has_nullable_struct(&dtype))) + .unwrap_or(false) +} diff --git a/fuzz/src/filter.rs b/fuzz/src/filter.rs index 2a98a585fc..b35ae3590e 100644 --- a/fuzz/src/filter.rs +++ b/fuzz/src/filter.rs @@ -5,18 +5,16 @@ use vortex_array::variants::StructArrayTrait; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType}; -use vortex_error::VortexExpect; +use vortex_error::VortexResult; use crate::take::take_canonical_array; -pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData { +pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> VortexResult { let validity = if array.dtype().is_nullable() { let validity_buff = array - .logical_validity() - .vortex_expect("Failed to get logical validity") + .logical_validity()? .into_array() - .into_bool() - .unwrap() + .into_bool()? .boolean_buffer(); Validity::from_iter( filter @@ -31,7 +29,7 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData { match array.dtype() { DType::Bool(_) => { - let bool_array = array.clone().into_bool().unwrap(); + let bool_array = array.clone().into_bool()?; BoolArray::try_new( BooleanBuffer::from_iter( filter @@ -42,12 +40,11 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData { ), validity, ) - .vortex_expect("Validity length cannot mismatch") - .into_array() + .map(|a| a.into_array()) } DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| { - let primitive_array = array.clone().into_primitive().unwrap(); - PrimitiveArray::new( + let primitive_array = array.clone().into_primitive()?; + Ok(PrimitiveArray::new( filter .iter() .zip(primitive_array.as_slice::<$P>().iter().copied()) @@ -56,26 +53,24 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData { .collect::>(), validity, ) - .into_array() + .into_array()) }), DType::Utf8(_) | DType::Binary(_) => { - let utf8 = array.clone().into_varbinview().unwrap(); - let values = utf8 - .with_iterator(|iter| { - iter.zip(filter.iter()) - .filter(|(_, f)| **f) - .map(|(v, _)| v.map(|u| u.to_vec())) - .collect::>() - }) - .unwrap(); - VarBinViewArray::from_iter(values, array.dtype().clone()).into_array() + let utf8 = array.clone().into_varbinview()?; + let values = utf8.with_iterator(|iter| { + iter.zip(filter.iter()) + .filter(|(_, f)| **f) + .map(|(v, _)| v.map(|u| u.to_vec())) + .collect::>() + })?; + Ok(VarBinViewArray::from_iter(values, array.dtype().clone()).into_array()) } DType::Struct(..) => { - let struct_array = array.clone().into_struct().unwrap(); + let struct_array = array.clone().into_struct()?; let filtered_children = struct_array .children() .map(|c| filter_canonical_array(&c, filter)) - .collect::>(); + .collect::>>()?; StructArray::try_new( struct_array.names().clone(), @@ -83,8 +78,7 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData { filter.iter().filter(|b| **b).map(|b| *b as usize).sum(), validity, ) - .unwrap() - .into_array() + .map(|a| a.into_array()) } DType::List(..) => { let mut indices = Vec::new(); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 1fec59fa62..82dfba31ff 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -91,7 +91,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { 1 => { let start = u.choose_index(current_array.len())?; let stop = u.int_in_range(start..=current_array.len())?; - current_array = slice_canonical_array(¤t_array, start, stop); + current_array = slice_canonical_array(¤t_array, start, stop).unwrap(); ( Action::Slice(start..stop), @@ -104,7 +104,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { } let indices = random_vec_in_range(u, 0, current_array.len() - 1)?; - current_array = take_canonical_array(¤t_array, &indices); + current_array = take_canonical_array(¤t_array, &indices).unwrap(); let indices_array = indices .iter() .map(|i| *i as u64) @@ -129,7 +129,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { return Err(EmptyChoose); } - let sorted = sort_canonical_array(¤t_array); + let sorted = sort_canonical_array(¤t_array).unwrap(); let side = if u.arbitrary()? { SearchSortedSide::Left @@ -147,7 +147,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { let mask = (0..current_array.len()) .map(|_| bool::arbitrary(u)) .collect::>>()?; - current_array = filter_canonical_array(¤t_array, &mask); + current_array = filter_canonical_array(¤t_array, &mask).unwrap(); ( Action::Filter(Mask::from_iter(mask)), ExpectedValue::Array(current_array.clone()), diff --git a/fuzz/src/search_sorted.rs b/fuzz/src/search_sorted.rs index 5026e8e9e5..2bf5e11df3 100644 --- a/fuzz/src/search_sorted.rs +++ b/fuzz/src/search_sorted.rs @@ -1,4 +1,5 @@ use std::cmp::Ordering; +use std::fmt::Debug; use vortex_array::accessor::ArrayAccessor; use vortex_array::compute::{ @@ -13,18 +14,10 @@ use vortex_scalar::Scalar; struct SearchNullableSlice(Vec>); -impl IndexOrd> for SearchNullableSlice { +impl IndexOrd> for SearchNullableSlice { fn index_cmp(&self, idx: usize, elem: &Option) -> Option { - match elem { - None => unreachable!("Can't search for None"), - Some(v) => { - // SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds - match unsafe { self.0.get_unchecked(idx) } { - None => Some(Ordering::Greater), - Some(i) => i.partial_cmp(v), - } - } - } + // SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds + unsafe { self.0.get_unchecked(idx) }.partial_cmp(elem) } } @@ -43,7 +36,7 @@ impl IndexOrd> for SearchPrimitiveSlice { Some(v) => { // SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds match unsafe { self.0.get_unchecked(idx) } { - None => Some(Ordering::Greater), + None => Some(Ordering::Less), Some(i) => Some(i.total_compare(*v)), } } @@ -64,7 +57,7 @@ pub fn search_sorted_canonical_array( ) -> VortexResult { match array.dtype() { DType::Bool(_) => { - let bool_array = array.clone().into_bool().unwrap(); + let bool_array = array.clone().into_bool()?; let validity = bool_array.logical_validity()?.to_boolean_buffer(); let opt_values = bool_array .boolean_buffer() @@ -72,11 +65,11 @@ pub fn search_sorted_canonical_array( .zip(validity.iter()) .map(|(b, v)| v.then_some(b)) .collect::>(); - let to_find = scalar.try_into().unwrap(); + let to_find = scalar.try_into()?; Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side)) } DType::Primitive(p, _) => { - let primitive_array = array.clone().into_primitive().unwrap(); + let primitive_array = array.clone().into_primitive()?; let validity = primitive_array.logical_validity()?.to_boolean_buffer(); match_each_native_ptype!(p, |$P| { let opt_values = primitive_array @@ -86,37 +79,32 @@ pub fn search_sorted_canonical_array( .zip(validity.iter()) .map(|(b, v)| v.then_some(b)) .collect::>(); - let to_find: $P = scalar.try_into().unwrap(); + let to_find: $P = scalar.try_into()?; Ok(SearchPrimitiveSlice(opt_values).search_sorted(&Some(to_find), side)) }) } DType::Utf8(_) | DType::Binary(_) => { - let utf8 = array.clone().into_varbinview().unwrap(); - let opt_values = utf8 - .with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>()) - .unwrap(); + let utf8 = array.clone().into_varbinview()?; + let opt_values = + utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>())?; let to_find = if matches!(array.dtype(), DType::Utf8(_)) { - BufferString::try_from(scalar) - .unwrap() - .as_str() - .as_bytes() - .to_vec() + BufferString::try_from(scalar)?.as_str().as_bytes().to_vec() } else { - ByteBuffer::try_from(scalar).unwrap().to_vec() + ByteBuffer::try_from(scalar)?.to_vec() }; Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side)) } DType::Struct(..) => { let scalar_vals = (0..array.len()) - .map(|i| scalar_at(array, i).unwrap()) - .collect::>(); - Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)) + .map(|i| scalar_at(array, i)) + .collect::>>()?; + Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side)) } DType::List(..) => { let scalar_vals = (0..array.len()) - .map(|i| scalar_at(array, i).unwrap()) - .collect::>(); - Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)) + .map(|i| scalar_at(array, i)) + .collect::>>()?; + Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side)) } _ => unreachable!("Not a canonical array"), } diff --git a/fuzz/src/slice.rs b/fuzz/src/slice.rs index b06796a2cb..d5fe38f84c 100644 --- a/fuzz/src/slice.rs +++ b/fuzz/src/slice.rs @@ -5,14 +5,15 @@ use vortex_array::validity::{ArrayValidity, Validity}; use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait}; use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; use vortex_dtype::{match_each_native_ptype, DType, NativePType}; -use vortex_error::VortexExpect; +use vortex_error::VortexResult; -pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData { +pub fn slice_canonical_array( + array: &ArrayData, + start: usize, + stop: usize, +) -> VortexResult { let validity = if array.dtype().is_nullable() { - let bool_buff = array - .logical_validity() - .vortex_expect("Failed to compute validity") - .to_boolean_buffer(); + let bool_buff = array.logical_validity()?.to_boolean_buffer(); Validity::from(bool_buff.slice(start, stop - start)) } else { @@ -21,59 +22,55 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar match array.dtype() { DType::Bool(_) => { - let bool_array = array.clone().into_bool().unwrap(); + let bool_array = array.clone().into_bool()?; let sliced_bools = bool_array.boolean_buffer().slice(start, stop - start); - BoolArray::try_new(sliced_bools, validity) - .vortex_expect("Validity length cannot mismatch") - .into_array() + BoolArray::try_new(sliced_bools, validity).map(|a| a.into_array()) } DType::Primitive(p, _) => { - let primitive_array = array.clone().into_primitive().unwrap(); + let primitive_array = array.clone().into_primitive()?; match_each_native_ptype!(p, |$P| { - PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array() + Ok(PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array()) }) } DType::Utf8(_) | DType::Binary(_) => { - let utf8 = array.clone().into_varbinview().unwrap(); - let values = utf8 - .with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>()) - .unwrap(); - VarBinViewArray::from_iter(values[start..stop].iter().cloned(), array.dtype().clone()) - .into_array() + let utf8 = array.clone().into_varbinview()?; + let values = + utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>())?; + Ok(VarBinViewArray::from_iter( + values[start..stop].iter().cloned(), + array.dtype().clone(), + ) + .into_array()) } DType::Struct(..) => { - let struct_array = array.clone().into_struct().unwrap(); + let struct_array = array.clone().into_struct()?; let sliced_children = struct_array .children() .map(|c| slice_canonical_array(&c, start, stop)) - .collect::>(); + .collect::>>()?; StructArray::try_new( struct_array.names().clone(), sliced_children, stop - start, validity, ) - .unwrap() - .into_array() + .map(|a| a.into_array()) } DType::List(..) => { - let list_array = array.clone().into_list().unwrap(); - let offsets = slice_canonical_array(&list_array.offsets(), start, stop + 1) - .into_primitive() - .unwrap(); + let list_array = array.clone().into_list()?; + let offsets = + slice_canonical_array(&list_array.offsets(), start, stop + 1)?.into_primitive()?; let elements = slice_canonical_array( &list_array.elements(), offsets.get_as_cast::(0) as usize, offsets.get_as_cast::(offsets.len() - 1) as usize, - ); + )?; let offsets = match_each_native_ptype!(offsets.ptype(), |$P| { shift_offsets::<$P>(offsets) }) .into_array(); - ListArray::try_new(elements, offsets, validity) - .unwrap() - .into_array() + ListArray::try_new(elements, offsets, validity).map(|a| a.into_array()) } _ => unreachable!("Not a canonical array"), } diff --git a/fuzz/src/sort.rs b/fuzz/src/sort.rs index c4532d3a55..0db3429899 100644 --- a/fuzz/src/sort.rs +++ b/fuzz/src/sort.rs @@ -6,14 +6,14 @@ use vortex_array::compute::scalar_at; use vortex_array::validity::ArrayValidity; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; use vortex_dtype::{match_each_native_ptype, DType, NativePType}; -use vortex_error::VortexExpect; +use vortex_error::{VortexExpect, VortexResult}; use crate::take::take_canonical_array; -pub fn sort_canonical_array(array: &ArrayData) -> ArrayData { +pub fn sort_canonical_array(array: &ArrayData) -> VortexResult { match array.dtype() { DType::Bool(_) => { - let bool_array = array.clone().into_bool().unwrap(); + let bool_array = array.clone().into_bool()?; let mut opt_values = bool_array .boolean_buffer() .iter() @@ -27,10 +27,10 @@ pub fn sort_canonical_array(array: &ArrayData) -> ArrayData { .map(|(b, v)| v.then_some(b)) .collect::>(); opt_values.sort(); - BoolArray::from_iter(opt_values).into_array() + Ok(BoolArray::from_iter(opt_values).into_array()) } DType::Primitive(p, _) => { - let primitive_array = array.clone().into_primitive().unwrap(); + let primitive_array = array.clone().into_primitive()?; match_each_native_ptype!(p, |$P| { let mut opt_values = primitive_array .as_slice::<$P>() @@ -38,24 +38,22 @@ pub fn sort_canonical_array(array: &ArrayData) -> ArrayData { .copied() .zip( primitive_array - .logical_validity() - .unwrap() + .logical_validity()? .to_boolean_buffer() .iter(), ) .map(|(p, v)| v.then_some(p)) .collect::>(); sort_primitive_slice(&mut opt_values); - PrimitiveArray::from_option_iter(opt_values).into_array() + Ok(PrimitiveArray::from_option_iter(opt_values).into_array()) }) } DType::Utf8(_) | DType::Binary(_) => { - let utf8 = array.clone().into_varbinview().unwrap(); - let mut opt_values = utf8 - .with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>()) - .unwrap(); + let utf8 = array.clone().into_varbinview()?; + let mut opt_values = + utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>())?; opt_values.sort(); - VarBinViewArray::from_iter(opt_values, array.dtype().clone()).into_array() + Ok(VarBinViewArray::from_iter(opt_values, array.dtype().clone()).into_array()) } DType::Struct(..) => { let mut sort_indices = (0..array.len()).collect::>(); diff --git a/fuzz/src/take.rs b/fuzz/src/take.rs index a9ebedb08b..d489c823ce 100644 --- a/fuzz/src/take.rs +++ b/fuzz/src/take.rs @@ -8,13 +8,12 @@ use vortex_array::variants::StructArrayTrait; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType}; -use vortex_error::VortexExpect; +use vortex_error::VortexResult; -pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { +pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> VortexResult { let validity = if array.dtype().is_nullable() { let validity_idx = array - .logical_validity() - .unwrap() + .logical_validity()? .to_boolean_buffer() .iter() .collect::>(); @@ -26,35 +25,33 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { match array.dtype() { DType::Bool(_) => { - let bool_array = array.clone().into_bool().unwrap(); + let bool_array = array.clone().into_bool()?; let vec_values = bool_array.boolean_buffer().iter().collect::>(); BoolArray::try_new(indices.iter().map(|i| vec_values[*i]).collect(), validity) - .vortex_expect("Validity length cannot mismatch") - .into_array() + .map(|a| a.into_array()) } DType::Primitive(p, _) => { - let primitive_array = array.clone().into_primitive().unwrap(); + let primitive_array = array.clone().into_primitive()?; match_each_native_ptype!(p, |$P| { - take_primitive::<$P>(primitive_array, validity, indices) + Ok(take_primitive::<$P>(primitive_array, validity, indices)) }) } DType::Utf8(_) | DType::Binary(_) => { - let utf8 = array.clone().into_varbinview().unwrap(); - let values = utf8 - .with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>()) - .unwrap(); - VarBinViewArray::from_iter( + let utf8 = array.clone().into_varbinview()?; + let values = + utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::>())?; + Ok(VarBinViewArray::from_iter( indices.iter().map(|i| values[*i].clone()), array.dtype().clone(), ) - .into_array() + .into_array()) } DType::Struct(..) => { - let struct_array = array.clone().into_struct().unwrap(); + let struct_array = array.clone().into_struct()?; let taken_children = struct_array .children() .map(|c| take_canonical_array(&c, indices)) - .collect::>(); + .collect::>>()?; StructArray::try_new( struct_array.names().clone(), @@ -62,17 +59,14 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { indices.len(), validity, ) - .unwrap() - .into_array() + .map(|a| a.into_array()) } DType::List(..) => { let mut builder = builder_with_capacity(array.dtype(), indices.len()); for idx in indices { - builder - .append_scalar(&scalar_at(array, *idx).unwrap()) - .unwrap(); + builder.append_scalar(&scalar_at(array, *idx)?)?; } - builder.finish().unwrap() + builder.finish() } _ => unreachable!("Not a canonical array"), }