Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzzer cleanup #2107

Merged
merged 3 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fuzz/fuzz_targets/array_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
}
Action::SearchSorted(s, side) => {
// TODO(robert): Ideally we'd preserve the encoding perfectly but this is close enough
let mut sorted = sort_canonical_array(&current_array);
let mut sorted = sort_canonical_array(&current_array).unwrap();
if !HashSet::from([
&PrimitiveEncoding as EncodingRef,
&VarBinEncoding,
Expand Down Expand Up @@ -85,8 +85,8 @@ fn assert_search_sorted(
) {
let search_result = search_sorted(&array, s.clone(), side).unwrap();
assert_eq!(
search_result,
expected,
search_result,
"Expected to find {s}({}) at {expected} in {} from {side} but instead found it at {search_result} in step {step}",
s.dtype(),
array.tree_display()
Expand Down
52 changes: 33 additions & 19 deletions fuzz/fuzz_targets/file_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use libfuzzer_sys::{fuzz_target, Corpus};
use vortex_array::array::ChunkedArray;
use vortex_array::compute::{compare, Operator};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical};
use vortex_dtype::Nullability;
use vortex_dtype::DType;
use vortex_file::{Scan, VortexOpenOptions, VortexWriteOptions};
use vortex_sampling_compressor::ALL_ENCODINGS_CONTEXT;

Expand All @@ -18,7 +18,7 @@ fuzz_target!(|array_data: ArrayData| -> Corpus {
return Corpus::Reject;
}

if array_data.dtype().nullability() != Nullability::NonNullable {
if has_nullable_struct(array_data.dtype()) {
return Corpus::Reject;
}

Expand Down Expand Up @@ -68,25 +68,31 @@ fuzz_target!(|array_data: ArrayData| -> Corpus {
Corpus::Keep
});

fn compare_struct(lhs: ArrayData, rhs: ArrayData) {
assert!(lhs.dtype().eq_ignore_nullability(rhs.dtype()));
assert_eq!(lhs.len(), rhs.len(), "Arrays length isn't preserved");
fn compare_struct(expected: ArrayData, actual: ArrayData) {
assert_eq!(
expected.dtype(),
actual.dtype(),
"DTypes aren't preserved expected {}, actual {}",
expected.dtype(),
actual.dtype()
);
assert_eq!(
expected.len(),
actual.len(),
"Arrays length isn't preserved expected: {}, actual: {}",
expected.len(),
actual.len()
);

if lhs.dtype().as_struct().unwrap().names().len() == 0
&& lhs.dtype().as_struct().unwrap().names().len()
== rhs.dtype().as_struct().unwrap().names().len()
if expected.dtype().as_struct().unwrap().names().len() == 0
&& expected.dtype().as_struct().unwrap().names().len()
== actual.dtype().as_struct().unwrap().names().len()
{
return;
}

if let Some(st) = lhs.dtype().as_struct() {
if st.names().len() == 0 {
return;
}
}

let arrow_lhs = lhs.clone().into_arrow().unwrap();
let arrow_rhs = rhs.clone().into_arrow().unwrap();
let arrow_lhs = expected.clone().into_arrow().unwrap();
let arrow_rhs = actual.clone().into_arrow().unwrap();

let cmp_fn = make_comparator(&arrow_lhs, &arrow_rhs, SortOptions::default()).unwrap();

Expand All @@ -99,8 +105,16 @@ fn compare_struct(lhs: ArrayData, rhs: ArrayData) {
assert_eq!(
bool_builder.finish().count_set_bits(),
arrow_lhs.len(),
"\nLHS: {}RHS: {}",
lhs.tree_display(),
rhs.tree_display()
"\nEXPECTED: {}ACTUAL: {}",
expected.tree_display(),
actual.tree_display()
);
}

fn has_nullable_struct(dtype: &DType) -> bool {
dtype.is_nullable()
|| dtype
.as_struct()
.map(|sdt| sdt.dtypes().any(|dtype| has_nullable_struct(&dtype)))
.unwrap_or(false)
}
46 changes: 20 additions & 26 deletions fuzz/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@ use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_buffer::Buffer;
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_error::VortexExpect;
use vortex_error::VortexResult;

use crate::take::take_canonical_array;

pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> VortexResult<ArrayData> {
let validity = if array.dtype().is_nullable() {
let validity_buff = array
.logical_validity()
.vortex_expect("Failed to get logical validity")
.logical_validity()?
.into_array()
.into_bool()
.unwrap()
.into_bool()?
.boolean_buffer();
Validity::from_iter(
filter
Expand All @@ -31,7 +29,7 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {

match array.dtype() {
DType::Bool(_) => {
let bool_array = array.clone().into_bool().unwrap();
let bool_array = array.clone().into_bool()?;
BoolArray::try_new(
BooleanBuffer::from_iter(
filter
Expand All @@ -42,12 +40,11 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
),
validity,
)
.vortex_expect("Validity length cannot mismatch")
.into_array()
.map(|a| a.into_array())
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
let primitive_array = array.clone().into_primitive().unwrap();
PrimitiveArray::new(
let primitive_array = array.clone().into_primitive()?;
Ok(PrimitiveArray::new(
filter
.iter()
.zip(primitive_array.as_slice::<$P>().iter().copied())
Expand All @@ -56,35 +53,32 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
.collect::<Buffer<_>>(),
validity,
)
.into_array()
.into_array())
}),
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
.with_iterator(|iter| {
iter.zip(filter.iter())
.filter(|(_, f)| **f)
.map(|(v, _)| v.map(|u| u.to_vec()))
.collect::<Vec<_>>()
})
.unwrap();
VarBinViewArray::from_iter(values, array.dtype().clone()).into_array()
let utf8 = array.clone().into_varbinview()?;
let values = utf8.with_iterator(|iter| {
iter.zip(filter.iter())
.filter(|(_, f)| **f)
.map(|(v, _)| v.map(|u| u.to_vec()))
.collect::<Vec<_>>()
})?;
Ok(VarBinViewArray::from_iter(values, array.dtype().clone()).into_array())
}
DType::Struct(..) => {
let struct_array = array.clone().into_struct().unwrap();
let struct_array = array.clone().into_struct()?;
let filtered_children = struct_array
.children()
.map(|c| filter_canonical_array(&c, filter))
.collect::<Vec<_>>();
.collect::<VortexResult<Vec<_>>>()?;

StructArray::try_new(
struct_array.names().clone(),
filtered_children,
filter.iter().filter(|b| **b).map(|b| *b as usize).sum(),
validity,
)
.unwrap()
.into_array()
.map(|a| a.into_array())
}
DType::List(..) => {
let mut indices = Vec::new();
Expand Down
8 changes: 4 additions & 4 deletions fuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
1 => {
let start = u.choose_index(current_array.len())?;
let stop = u.int_in_range(start..=current_array.len())?;
current_array = slice_canonical_array(&current_array, start, stop);
current_array = slice_canonical_array(&current_array, start, stop).unwrap();

(
Action::Slice(start..stop),
Expand All @@ -104,7 +104,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
}

let indices = random_vec_in_range(u, 0, current_array.len() - 1)?;
current_array = take_canonical_array(&current_array, &indices);
current_array = take_canonical_array(&current_array, &indices).unwrap();
let indices_array = indices
.iter()
.map(|i| *i as u64)
Expand All @@ -129,7 +129,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
return Err(EmptyChoose);
}

let sorted = sort_canonical_array(&current_array);
let sorted = sort_canonical_array(&current_array).unwrap();

let side = if u.arbitrary()? {
SearchSortedSide::Left
Expand All @@ -147,7 +147,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
let mask = (0..current_array.len())
.map(|_| bool::arbitrary(u))
.collect::<Result<Vec<_>>>()?;
current_array = filter_canonical_array(&current_array, &mask);
current_array = filter_canonical_array(&current_array, &mask).unwrap();
(
Action::Filter(Mask::from_iter(mask)),
ExpectedValue::Array(current_array.clone()),
Expand Down
52 changes: 20 additions & 32 deletions fuzz/src/search_sorted.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::cmp::Ordering;
use std::fmt::Debug;

use vortex_array::accessor::ArrayAccessor;
use vortex_array::compute::{
Expand All @@ -13,18 +14,10 @@ use vortex_scalar::Scalar;

struct SearchNullableSlice<T>(Vec<Option<T>>);

impl<T: PartialOrd> IndexOrd<Option<T>> for SearchNullableSlice<T> {
impl<T: PartialOrd + Debug> IndexOrd<Option<T>> for SearchNullableSlice<T> {
fn index_cmp(&self, idx: usize, elem: &Option<T>) -> Option<Ordering> {
match elem {
None => unreachable!("Can't search for None"),
Some(v) => {
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
match unsafe { self.0.get_unchecked(idx) } {
None => Some(Ordering::Greater),
Some(i) => i.partial_cmp(v),
}
}
}
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
unsafe { self.0.get_unchecked(idx) }.partial_cmp(elem)
}
}

Expand All @@ -43,7 +36,7 @@ impl<T: NativePType> IndexOrd<Option<T>> for SearchPrimitiveSlice<T> {
Some(v) => {
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
match unsafe { self.0.get_unchecked(idx) } {
None => Some(Ordering::Greater),
None => Some(Ordering::Less),
Some(i) => Some(i.total_compare(*v)),
}
}
Expand All @@ -64,19 +57,19 @@ pub fn search_sorted_canonical_array(
) -> VortexResult<SearchResult> {
match array.dtype() {
DType::Bool(_) => {
let bool_array = array.clone().into_bool().unwrap();
let bool_array = array.clone().into_bool()?;
let validity = bool_array.logical_validity()?.to_boolean_buffer();
let opt_values = bool_array
.boolean_buffer()
.iter()
.zip(validity.iter())
.map(|(b, v)| v.then_some(b))
.collect::<Vec<_>>();
let to_find = scalar.try_into().unwrap();
let to_find = scalar.try_into()?;
Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side))
}
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
let primitive_array = array.clone().into_primitive()?;
let validity = primitive_array.logical_validity()?.to_boolean_buffer();
match_each_native_ptype!(p, |$P| {
let opt_values = primitive_array
Expand All @@ -86,37 +79,32 @@ pub fn search_sorted_canonical_array(
.zip(validity.iter())
.map(|(b, v)| v.then_some(b))
.collect::<Vec<_>>();
let to_find: $P = scalar.try_into().unwrap();
let to_find: $P = scalar.try_into()?;
Ok(SearchPrimitiveSlice(opt_values).search_sorted(&Some(to_find), side))
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let opt_values = utf8
.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::<Vec<_>>())
.unwrap();
let utf8 = array.clone().into_varbinview()?;
let opt_values =
utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::<Vec<_>>())?;
let to_find = if matches!(array.dtype(), DType::Utf8(_)) {
BufferString::try_from(scalar)
.unwrap()
.as_str()
.as_bytes()
.to_vec()
BufferString::try_from(scalar)?.as_str().as_bytes().to_vec()
} else {
ByteBuffer::try_from(scalar).unwrap().to_vec()
ByteBuffer::try_from(scalar)?.to_vec()
};
Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side))
}
DType::Struct(..) => {
let scalar_vals = (0..array.len())
.map(|i| scalar_at(array, i).unwrap())
.collect::<Vec<_>>();
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side))
.map(|i| scalar_at(array, i))
.collect::<VortexResult<Vec<_>>>()?;
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
}
DType::List(..) => {
let scalar_vals = (0..array.len())
.map(|i| scalar_at(array, i).unwrap())
.collect::<Vec<_>>();
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side))
.map(|i| scalar_at(array, i))
.collect::<VortexResult<Vec<_>>>()?;
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
}
_ => unreachable!("Not a canonical array"),
}
Expand Down
Loading
Loading