Skip to content

Commit

Permalink
NullArray + statsset cleanup (#350)
Browse files Browse the repository at this point in the history
Add first-class NullArray that maps back/forth with Arrow.

Cleans up StatsSet with all-nulls stats set a bit
  • Loading branch information
a10y authored Jun 12, 2024
1 parent 3f8282b commit 8d87deb
Show file tree
Hide file tree
Showing 13 changed files with 267 additions and 117 deletions.
19 changes: 2 additions & 17 deletions vortex-array/src/array/bool/stats.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use std::collections::HashMap;

use arrow_buffer::BooleanBuffer;
use vortex_dtype::{DType, Nullability};
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::bool::BoolArray;
use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet};
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayTrait, IntoArray};
use crate::{ArrayDType, ArrayTrait, IntoArray};

impl ArrayStatisticsCompute for BoolArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
Expand All @@ -18,7 +16,7 @@ impl ArrayStatisticsCompute for BoolArray {

match self.logical_validity() {
LogicalValidity::AllValid(_) => self.boolean_buffer().compute_statistics(stat),
LogicalValidity::AllInvalid(v) => all_null_stats(v),
LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, self.dtype())),
LogicalValidity::Array(a) => NullableBools(
&self.boolean_buffer(),
&a.into_array().flatten_bool()?.boolean_buffer(),
Expand All @@ -28,19 +26,6 @@ impl ArrayStatisticsCompute for BoolArray {
}
}

fn all_null_stats(len: usize) -> VortexResult<StatsSet> {
Ok(StatsSet::from(HashMap::from([
(Stat::Min, Scalar::null(DType::Bool(Nullability::Nullable))),
(Stat::Max, Scalar::null(DType::Bool(Nullability::Nullable))),
(Stat::IsConstant, true.into()),
(Stat::IsSorted, true.into()),
(Stat::IsStrictSorted, (len < 2).into()),
(Stat::RunCount, 1.into()),
(Stat::NullCount, len.into()),
(Stat::TrueCount, 0.into()),
])))
}

struct NullableBools<'a>(&'a BooleanBuffer, &'a BooleanBuffer);

impl ArrayStatisticsCompute for NullableBools<'_> {
Expand Down
7 changes: 3 additions & 4 deletions vortex-array/src/array/chunked/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
use itertools::Itertools;
use vortex_dtype::{match_each_native_ptype, DType, Nullability, PType, StructDType};
use vortex_error::{vortex_bail, ErrString, VortexResult};
use vortex_scalar::Scalar;

use crate::accessor::ArrayAccessor;
use crate::array::bool::BoolArray;
use crate::array::chunked::ChunkedArray;
use crate::array::constant::ConstantArray;
use crate::array::extension::ExtensionArray;
use crate::array::null::NullArray;
use crate::array::primitive::PrimitiveArray;
use crate::array::r#struct::StructArray;
use crate::array::varbin::builder::VarBinBuilder;
Expand Down Expand Up @@ -73,8 +72,8 @@ pub(crate) fn try_flatten_chunks(chunks: Vec<Array>, dtype: DType) -> VortexResu
}
DType::Null => {
let len = chunks.iter().map(|chunk| chunk.len()).sum();
let const_array = ConstantArray::new(Scalar::null(DType::Null), len);
Ok(Flattened::Null(const_array))
let null_array = NullArray::new(len);
Ok(Flattened::Null(null_array))
}
}
}
Expand Down
50 changes: 0 additions & 50 deletions vortex-array/src/array/constant/as_arrow.rs

This file was deleted.

1 change: 0 additions & 1 deletion vortex-array/src/array/constant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use crate::impl_encoding;
use crate::stats::Stat;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
mod as_arrow;
mod compute;
mod flatten;
mod stats;
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod chunked;
pub mod constant;
pub mod datetime;
pub mod extension;
pub mod null;
pub mod primitive;
pub mod sparse;
pub mod r#struct;
Expand Down
48 changes: 48 additions & 0 deletions vortex-array/src/array/null/as_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//! Implementation of the [AsArrowArray] trait for [ConstantArray] that is representing
//! [DType::Null] values.
use std::sync::Arc;

use arrow_array::{ArrayRef as ArrowArrayRef, NullArray as ArrowNullArray};
use vortex_error::VortexResult;

use crate::array::null::NullArray;
use crate::compute::as_arrow::AsArrowArray;
use crate::ArrayTrait;

impl AsArrowArray for NullArray {
fn as_arrow(&self) -> VortexResult<ArrowArrayRef> {
let arrow_null = ArrowNullArray::new(self.len());
Ok(Arc::new(arrow_null))
}
}

#[cfg(test)]
mod test {
use arrow_array::{Array, NullArray as ArrowNullArray};

use crate::array::null::NullArray;
use crate::arrow::FromArrowArray;
use crate::compute::as_arrow::AsArrowArray;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayData, ArrayTrait, IntoArray};

#[test]
fn test_round_trip() {
let arrow_nulls = ArrowNullArray::new(10);
let vortex_nulls = ArrayData::from_arrow(&arrow_nulls, true).into_array();

let vortex_nulls = NullArray::try_from(vortex_nulls).unwrap();
assert_eq!(vortex_nulls.len(), 10);
assert!(matches!(
vortex_nulls.logical_validity(),
LogicalValidity::AllInvalid(10)
));

let to_arrow = vortex_nulls.as_arrow().unwrap();
assert_eq!(
*to_arrow.as_any().downcast_ref::<ArrowNullArray>().unwrap(),
arrow_nulls
);
}
}
101 changes: 101 additions & 0 deletions vortex-array/src/array/null/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use vortex_dtype::{match_each_integer_ptype, DType};
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::null::NullArray;
use crate::compute::scalar_at::ScalarAtFn;
use crate::compute::slice::SliceFn;
use crate::compute::take::TakeFn;
use crate::compute::ArrayCompute;
use crate::{Array, ArrayTrait, IntoArray};

impl ArrayCompute for NullArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}

fn slice(&self) -> Option<&dyn SliceFn> {
Some(self)
}

fn take(&self) -> Option<&dyn TakeFn> {
Some(self)
}
}

impl SliceFn for NullArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
assert!(stop < self.len(), "cannot slice past end of the array");
Ok(NullArray::new(stop - start).into_array())
}
}

impl ScalarAtFn for NullArray {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
assert!(index < self.len(), "cannot index past end of the array");

Ok(Scalar::null(DType::Null))
}
}

impl TakeFn for NullArray {
fn take(&self, indices: &Array) -> VortexResult<Array> {
let indices = indices.clone().flatten_primitive()?;

// Enforce all indices are valid
match_each_integer_ptype!(indices.ptype(), |$T| {
for index in indices.scalar_buffer::<$T>().iter() {
assert!((*index as usize) < self.len(), "cannot take past end of the array");
}
});

Ok(NullArray::new(indices.len()).into_array())
}
}

#[cfg(test)]
mod test {
use vortex_dtype::DType;

use crate::array::null::NullArray;
use crate::compute::scalar_at::scalar_at;
use crate::compute::slice::slice;
use crate::compute::take::take;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayTrait, IntoArray};

#[test]
fn test_slice_nulls() {
let nulls = NullArray::new(10).into_array();
let sliced = NullArray::try_from(slice(&nulls, 0, 4).unwrap()).unwrap();

assert_eq!(sliced.len(), 4);
assert!(matches!(
sliced.logical_validity(),
LogicalValidity::AllInvalid(4)
));
}

#[test]
fn test_take_nulls() {
let nulls = NullArray::new(10).into_array();
let taken =
NullArray::try_from(take(&nulls, &vec![0u64, 2, 4, 6, 8].into_array()).unwrap())
.unwrap();

assert_eq!(taken.len(), 5);
assert!(matches!(
taken.logical_validity(),
LogicalValidity::AllInvalid(5)
));
}

#[test]
fn test_scalar_at_nulls() {
let nulls = NullArray::new(10);

let scalar = scalar_at(&nulls.into_array(), 0).unwrap();
assert!(scalar.is_null());
assert_eq!(scalar.dtype().clone(), DType::Null);
}
}
68 changes: 68 additions & 0 deletions vortex-array/src/array/null/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use serde::{Deserialize, Serialize};

use crate::stats::{ArrayStatisticsCompute, Stat};
use crate::validity::{ArrayValidity, LogicalValidity, Validity};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::{impl_encoding, ArrayFlatten};

mod as_arrow;
mod compute;

impl_encoding!("vortex.null", Null);

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NullMetadata {
len: usize,
}

impl NullArray {
pub fn new(len: usize) -> Self {
Self::try_from_parts(
DType::Null,
NullMetadata { len },
Arc::new([]),
StatsSet::nulls(len, &DType::Null),
)
.expect("NullArray::new cannot fail")
}
}

impl ArrayFlatten for NullArray {
fn flatten(self) -> VortexResult<Flattened> {
Ok(Flattened::Null(self))
}
}

impl ArrayValidity for NullArray {
fn is_valid(&self, _: usize) -> bool {
false
}

fn logical_validity(&self) -> LogicalValidity {
LogicalValidity::AllInvalid(self.len())
}
}

impl ArrayStatisticsCompute for NullArray {
fn compute_statistics(&self, _stat: Stat) -> VortexResult<StatsSet> {
Ok(StatsSet::nulls(self.len(), &DType::Null))
}
}

impl AcceptArrayVisitor for NullArray {
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
visitor.visit_validity(&Validity::AllInvalid)
}
}

impl ArrayTrait for NullArray {
fn len(&self) -> usize {
self.metadata().len
}

fn nbytes(&self) -> usize {
0
}
}

impl EncodingCompression for NullEncoding {}
Loading

0 comments on commit 8d87deb

Please sign in to comment.