Skip to content

Commit

Permalink
dear god
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Jan 15, 2025
1 parent 4cc2c0d commit 7031faa
Show file tree
Hide file tree
Showing 118 changed files with 920 additions and 349 deletions.
4 changes: 2 additions & 2 deletions bench-vortex/benches/bytes_at.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use parquet::data_type::AsBytes;
use vortex::array::{VarBinArray, VarBinViewArray};
use vortex::buffer::{buffer, ByteBuffer};
use vortex::dtype::{DType, Nullability};
use vortex::dtypes::DTYPE_STRING_NONNULL;
use vortex::ipc::iterator::{ArrayIteratorIPC, SyncIPCReader};
use vortex::iter::ArrayIteratorExt;
use vortex::validity::Validity;
Expand All @@ -17,7 +17,7 @@ fn array_data_fixture() -> VarBinArray {
VarBinArray::try_new(
buffer![0i32, 5i32, 10i32, 15i32, 20i32].into_array(),
ByteBuffer::copy_from(b"helloworldhelloworld".as_bytes()),
DType::Utf8(Nullability::NonNullable),
DTYPE_STRING_NONNULL.clone(),
Validity::NonNullable,
)
.unwrap()
Expand Down
3 changes: 2 additions & 1 deletion bench-vortex/src/bin/notimplemented.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use vortex::array::{
use vortex::buffer::buffer;
use vortex::datetime_dtype::{TemporalMetadata, TimeUnit, TIME_ID};
use vortex::dtype::{DType, ExtDType, Nullability, PType};
use vortex::dtypes::DTYPE_BOOL_NONNULL;
use vortex::encodings::alp::{ALPArray, Exponents, RDEncoder};
use vortex::encodings::bytebool::ByteBoolArray;
use vortex::encodings::datetime_parts::DateTimePartsArray;
Expand Down Expand Up @@ -79,7 +80,7 @@ fn enc_impls() -> Vec<ArrayData> {
BoolArray::from_iter([false]).into_array(),
BoolArray::from_iter([true]).into_array(),
],
DType::Bool(Nullability::NonNullable),
DTYPE_BOOL_NONNULL.clone(),
)
.unwrap()
.into_array(),
Expand Down
4 changes: 3 additions & 1 deletion bench-vortex/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,9 @@ pub async fn register_vortex_files(
let name: Arc<str> = field.name().as_str().into();
let dtype = types_map[&name].clone();
let chunks = arrays_map.remove(&name).unwrap();
let chunked_child = ChunkedArray::try_new(chunks, dtype).unwrap();
// TODO(aduffy): fix extra clone
let chunked_child =
ChunkedArray::try_new(chunks, Arc::new(dtype)).unwrap();

(name, chunked_child.into_array())
})
Expand Down
3 changes: 2 additions & 1 deletion bench-vortex/src/data_downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::fs::File;
use std::future::Future;
use std::io::{Read, Write};
use std::path::PathBuf;
use std::sync::Arc;

use arrow_array::RecordBatchReader;
use bzip2::read::BzDecoder;
Expand Down Expand Up @@ -49,7 +50,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa
.into_iter()
.map(|batch_result| ArrayData::try_from(batch_result.unwrap()).unwrap())
.collect(),
dtype,
Arc::new(dtype),
)
.unwrap()
.into_array();
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ pub fn fetch_taxi_data() -> ArrayData {
.map(ArrayData::try_from)
.map(Result::unwrap)
.collect_vec(),
DType::from_arrow(schema),
Arc::new(DType::from_arrow(schema)),
)
.unwrap()
.into_array()
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pub fn read_parquet_to_vortex<P: AsRef<Path>>(parquet_path: P) -> VortexResult<C
.map(|batch_result| batch_result.unwrap())
.map(ArrayData::try_from)
.collect::<VortexResult<Vec<_>>>()?;
ChunkedArray::try_new(chunks, dtype)
ChunkedArray::try_new(chunks, Arc::new(dtype))
}

pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ArrayData> {
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ async fn register_vortex_file(
let name: Arc<str> = field.name().as_str().into();
let dtype = types_map[&name].clone();
let chunks = arrays_map.remove(&name).unwrap();
let mut chunked_child = ChunkedArray::try_new(chunks, dtype).unwrap();
let mut chunked_child = ChunkedArray::try_new(chunks, Arc::new(dtype)).unwrap();
if !enable_compression {
chunked_child = chunked_child
.rechunk(TARGET_BLOCK_BYTESIZE, TARGET_BLOCK_SIZE)
Expand Down
5 changes: 3 additions & 2 deletions bench-vortex/src/vortex_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub async fn vortex_chunk_sizes(path: PathBuf) -> VortexResult<CompressionRunSta
let file = File::open(path.as_path())?;
let total_compressed_size = file.metadata()?.size();
let vortex = open_vortex(path.as_path()).await?;
let DType::Struct(st, _) = vortex.dtype() else {
let DType::Struct(st, _) = vortex.dtype().as_ref() else {
unreachable!()
};

Expand All @@ -32,7 +32,8 @@ pub async fn vortex_chunk_sizes(path: PathBuf) -> VortexResult<CompressionRunSta
}

let stats = CompressionRunStats {
schema: chunked_array.dtype().clone(),
// TODO(aduffy): fix extra clone
schema: chunked_array.dtype().as_ref().clone(),
file_type: FileType::Vortex,
total_compressed_size: Some(total_compressed_size),
compressed_sizes,
Expand Down
21 changes: 11 additions & 10 deletions encodings/alp/src/alp/array.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::fmt::{Debug, Display};
use std::sync::Arc;

use serde::{Deserialize, Serialize};
use vortex_array::array::PrimitiveArray;
Expand All @@ -9,8 +10,8 @@ use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable};
use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
impl_encoding, primitive_dtype, primitive_dtype_ref, ArrayDType, ArrayData, ArrayLen,
ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
};
use vortex_dtype::{DType, PType};
use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult};
Expand All @@ -37,9 +38,9 @@ impl ALPArray {
exponents: Exponents,
patches: Option<Patches>,
) -> VortexResult<Self> {
let dtype = match encoded.dtype() {
DType::Primitive(PType::I32, nullability) => DType::Primitive(PType::F32, *nullability),
DType::Primitive(PType::I64, nullability) => DType::Primitive(PType::F64, *nullability),
let dtype = match encoded.dtype().as_ref() {
DType::Primitive(PType::I32, nullability) => primitive_dtype!(PType::F32, *nullability),
DType::Primitive(PType::I64, nullability) => primitive_dtype!(PType::F64, *nullability),
d => vortex_bail!(MismatchedTypes: "int32 or int64", d),
};

Expand Down Expand Up @@ -76,7 +77,7 @@ impl ALPArray {

pub fn encoded(&self) -> ArrayData {
self.as_ref()
.child(0, &self.encoded_dtype(), self.len())
.child(0, self.encoded_dtype(), self.len())
.vortex_expect("Missing encoded child in ALPArray")
}

Expand All @@ -100,13 +101,13 @@ impl ALPArray {
}

#[inline]
fn encoded_dtype(&self) -> DType {
match self.dtype() {
fn encoded_dtype(&self) -> &Arc<DType> {
match self.dtype().as_ref() {
DType::Primitive(PType::F32, _) => {
DType::Primitive(PType::I32, self.dtype().nullability())
primitive_dtype_ref!(PType::I32, self.dtype().nullability())
}
DType::Primitive(PType::F64, _) => {
DType::Primitive(PType::I64, self.dtype().nullability())
primitive_dtype_ref!(PType::I64, self.dtype().nullability())
}
d => vortex_panic!(MismatchedTypes: "f32 or f64", d),
}
Expand Down
34 changes: 17 additions & 17 deletions encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::fmt::{Debug, Display};
use std::sync::Arc;

use serde::{Deserialize, Serialize};
use vortex_array::array::PrimitiveArray;
Expand All @@ -8,7 +9,8 @@ use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
impl_encoding, primitive_dtype_ref, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoCanonical,
};
use vortex_dtype::{DType, Nullability, PType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult};
Expand All @@ -34,7 +36,7 @@ impl Display for ALPRDMetadata {

impl ALPRDArray {
pub fn try_new(
dtype: DType,
dtype: Arc<DType>,
left_parts: ArrayData,
left_parts_dict: impl AsRef<[u16]>,
right_parts: ArrayData,
Expand Down Expand Up @@ -123,27 +125,25 @@ impl ALPRDArray {

/// The dtype of the left parts of the array.
#[inline]
fn left_parts_dtype(&self) -> DType {
DType::Primitive(self.metadata().left_parts_ptype, self.dtype().nullability())
fn left_parts_dtype(&self) -> &Arc<DType> {
primitive_dtype_ref!(self.metadata().left_parts_ptype, self.dtype().nullability())
}

/// The dtype of the right parts of the array.
#[inline]
fn right_parts_dtype(&self) -> DType {
DType::Primitive(
if self.is_f32() {
PType::U32
} else {
PType::U64
},
Nullability::NonNullable,
)
fn right_parts_dtype(&self) -> &Arc<DType> {
let ptype = if self.is_f32() {
PType::U32
} else {
PType::U64
};
primitive_dtype_ref!(ptype, Nullability::NonNullable)
}

/// The dtype of the patches of the left parts of the array.
#[inline]
fn left_parts_patches_dtype(&self) -> DType {
DType::Primitive(self.metadata().left_parts_ptype, Nullability::NonNullable)
fn left_parts_patches_dtype(&self) -> &Arc<DType> {
primitive_dtype_ref!(self.metadata().left_parts_ptype, Nullability::NonNullable)
}

/// The leftmost (most significant) bits of the floating point values stored in the array.
Expand All @@ -169,10 +169,10 @@ impl ALPRDArray {
Patches::new(
self.len(),
self.as_ref()
.child(2, &metadata.indices_dtype(), metadata.len())
.child(2, metadata.indices_dtype(), metadata.len())
.vortex_expect("ALPRDArray: patch indices"),
self.as_ref()
.child(3, &self.left_parts_patches_dtype(), metadata.len())
.child(3, self.left_parts_patches_dtype(), metadata.len())
.vortex_expect("ALPRDArray: patch values"),
)
})
Expand Down
6 changes: 3 additions & 3 deletions encodings/alp/src/alp_rd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ use itertools::Itertools;
use num_traits::{Float, One, PrimInt};
use vortex_array::aliases::hash_map::HashMap;
use vortex_array::array::PrimitiveArray;
use vortex_array::{ArrayDType, IntoArrayData, IntoArrayVariant};
use vortex_array::{primitive_dtype, ArrayDType, IntoArrayData, IntoArrayVariant};
use vortex_buffer::{Buffer, BufferMut};
use vortex_dtype::{match_each_integer_ptype, DType, NativePType};
use vortex_dtype::{match_each_integer_ptype, NativePType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult, VortexUnwrap};
use vortex_fastlanes::bitpack_encode_unchecked;

Expand Down Expand Up @@ -240,7 +240,7 @@ impl RDEncoder {
});

ALPRDArray::try_new(
DType::Primitive(T::PTYPE, packed_left.dtype().nullability()),
primitive_dtype!(T::PTYPE, packed_left.dtype().nullability()),
packed_left,
&self.codes,
packed_right,
Expand Down
10 changes: 6 additions & 4 deletions encodings/bytebool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ use std::sync::Arc;
use arrow_buffer::BooleanBuffer;
use serde::{Deserialize, Serialize};
use vortex_array::array::BoolArray;
use vortex_array::dtypes::DTYPE_BOOL_NONNULL;
use vortex_array::encoding::ids;
use vortex_array::stats::StatsSet;
use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical};
use vortex_array::{
bool_dtype, impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
};
use vortex_buffer::ByteBuffer;
use vortex_dtype::DType;
use vortex_error::{VortexExpect as _, VortexResult};

impl_encoding!("vortex.bytebool", ids::BYTE_BOOL, ByteBool);
Expand All @@ -31,7 +33,7 @@ impl ByteBoolArray {
pub fn validity(&self) -> Validity {
self.metadata().validity.to_validity(|| {
self.as_ref()
.child(0, &Validity::DTYPE, self.len())
.child(0, &DTYPE_BOOL_NONNULL, self.len())
.vortex_expect("ByteBoolArray: accessing validity child")
})
}
Expand All @@ -41,7 +43,7 @@ impl ByteBoolArray {

ArrayData::try_new_owned(
&ByteBoolEncoding,
DType::Bool(validity.nullability()),
bool_dtype!(validity.nullability()),
length,
Arc::new(ByteBoolMetadata {
validity: validity.to_metadata(length)?,
Expand Down
24 changes: 17 additions & 7 deletions encodings/datetime-parts/src/array.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::fmt::{Debug, Display};
use std::sync::Arc;

use serde::{Deserialize, Serialize};
use vortex_array::array::StructArray;
Expand All @@ -9,10 +10,10 @@ use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityV
use vortex_array::variants::{ExtensionArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
impl_encoding, primitive_dtype_ref, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoArrayData, IntoCanonical,
};
use vortex_dtype::{DType, PType};
use vortex_dtype::{DType, Nullability, PType};
use vortex_error::{vortex_bail, VortexExpect as _, VortexResult, VortexUnwrap};

use crate::compute::decode_to_temporal;
Expand Down Expand Up @@ -72,7 +73,8 @@ impl DateTimePartsArray {
};

Self::try_from_parts(
dtype,
// TODO(aduffy): fix cloning
Arc::new(dtype),
length,
metadata,
[days, seconds, subsecond].into(),
Expand All @@ -84,21 +86,29 @@ impl DateTimePartsArray {
self.as_ref()
.child(
0,
&DType::Primitive(self.metadata().days_ptype, self.dtype().nullability()),
primitive_dtype_ref!(self.metadata().days_ptype, self.dtype().nullability()),
self.len(),
)
.vortex_expect("DatetimePartsArray missing days array")
}

pub fn seconds(&self) -> ArrayData {
self.as_ref()
.child(1, &self.metadata().seconds_ptype.into(), self.len())
.child(
1,
primitive_dtype_ref!(self.metadata().seconds_ptype, Nullability::NonNullable),
self.len(),
)
.vortex_expect("DatetimePartsArray missing seconds array")
}

pub fn subsecond(&self) -> ArrayData {
self.as_ref()
.child(2, &self.metadata().subseconds_ptype.into(), self.len())
.child(
2,
primitive_dtype_ref!(self.metadata().subseconds_ptype, Nullability::NonNullable),
self.len(),
)
.vortex_expect("DatetimePartsArray missing subsecond array")
}

Expand Down
3 changes: 2 additions & 1 deletion encodings/datetime-parts/src/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use crate::{DateTimePartsArray, DateTimePartsEncoding};
impl FilterFn<DateTimePartsArray> for DateTimePartsEncoding {
fn filter(&self, array: &DateTimePartsArray, mask: &FilterMask) -> VortexResult<ArrayData> {
Ok(DateTimePartsArray::try_new(
array.dtype().clone(),
// TODO(aduffy): fix cloning
array.dtype().as_ref().clone(),
filter(array.days().as_ref(), mask)?,
filter(array.seconds().as_ref(), mask)?,
filter(array.subsecond().as_ref(), mask)?,
Expand Down
Loading

0 comments on commit 7031faa

Please sign in to comment.