diff --git a/vortex-array/src/array/constant/canonical.rs b/vortex-array/src/array/constant/canonical.rs index db3c45be6f..18fa42c01b 100644 --- a/vortex-array/src/array/constant/canonical.rs +++ b/vortex-array/src/array/constant/canonical.rs @@ -136,16 +136,12 @@ mod tests { let const_array = ConstantArray::new("four".to_string(), 4); // Check all values correct. - let canonical = const_array - .into_canonical() - .unwrap() - .into_varbinview() - .unwrap(); + let canonical = const_array.into_varbinview().unwrap(); assert_eq!(canonical.len(), 4); for i in 0..=3 { - assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into(),); + assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into()); } } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index dcffa6e6e3..2fc4d7bba0 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -8,15 +8,11 @@ use arrow_array::types::{BinaryViewType, ByteViewType, StringViewType}; use arrow_array::{ArrayRef, BinaryViewArray, GenericByteViewArray, StringViewArray}; use arrow_buffer::ScalarBuffer; use itertools::Itertools; -use rkyv::from_bytes; use static_assertions::{assert_eq_align, assert_eq_size}; use vortex_buffer::{Alignment, Buffer, ByteBuffer}; use vortex_dtype::DType; -use vortex_error::{ - vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect, VortexResult, VortexUnwrap, -}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult, VortexUnwrap}; -use crate::array::{StructArray, StructMetadata, VarBinMetadata}; use crate::arrow::FromArrowArray; use crate::encoding::ids; use crate::stats::StatsSet; @@ -275,7 +271,8 @@ impl VarBinViewArray { /// Will return a bytebuffer pointing to the underlying data without performing a copy #[inline] pub fn bytes_at(&self, index: usize) -> ByteBuffer { - let view = self.views()[index]; + let views = self.views(); + let view = &views[index]; // Expect this to be the common case: strings > 12 bytes. if !view.is_inlined() { let view_ref = view.as_view(); @@ -283,12 +280,10 @@ impl VarBinViewArray { .slice(view_ref.to_range()) } else { // Return access to the range of bytes around it. - let view_byte_start = index * size_of::() + 4; - let view_byte_end = view_byte_start + view.len() as usize; - self.0 - .byte_buffer(0) - .vortex_expect("Must have views buffer") - .slice_with_alignment(view_byte_start..view_byte_end, Alignment::new(1)) + views + .clone() + .into_byte_buffer() + .slice_ref(view.as_inlined().value()) } } diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index bf8c45a928..17a98aaba0 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -1,6 +1,8 @@ use std::any::type_name; +use std::cmp::Ordering; use std::collections::Bound; use std::fmt::{Debug, Formatter}; +use std::hash::{Hash, Hasher}; use std::ops::{Deref, RangeBounds}; use bytes::{Buf, Bytes}; @@ -10,7 +12,7 @@ use crate::debug::TruncatedDebug; use crate::{Alignment, BufferMut, ByteBuffer}; /// An immutable buffer of items of `T`. -#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)] +#[derive(Clone)] pub struct Buffer { pub(crate) bytes: Bytes, pub(crate) length: usize, @@ -18,6 +20,32 @@ pub struct Buffer { pub(crate) _marker: std::marker::PhantomData, } +impl PartialEq for Buffer { + fn eq(&self, other: &Self) -> bool { + self.bytes == other.bytes + } +} + +impl Eq for Buffer {} + +impl Ord for Buffer { + fn cmp(&self, other: &Self) -> Ordering { + self.bytes.cmp(&other.bytes) + } +} + +impl PartialOrd for Buffer { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.bytes.cmp(&other.bytes)) + } +} + +impl Hash for Buffer { + fn hash(&self, state: &mut H) { + self.bytes.as_ref().hash(state) + } +} + impl Buffer { /// Returns a new `Buffer` copied from the provided `Vec`, `&[T]`, etc. /// @@ -234,6 +262,52 @@ impl Buffer { } } + /// Returns a slice of self that is equivalent to the given subset. + /// + /// When processing the buffer you will often end up with &\[T\] that is a subset + /// of the underlying buffer. This function turns the slice into a slice of the buffer + /// it has been taken from. + /// + /// # Panics: + /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic. + #[inline(always)] + pub fn slice_ref(&self, subset: &[T]) -> Self { + self.slice_ref_with_alignment(subset, Alignment::of::()) + } + + /// Returns a slice of self that is equivalent to the given subset. + /// + /// When processing the buffer you will often end up with &\[T\] that is a subset + /// of the underlying buffer. This function turns the slice into a slice of the buffer + /// it has been taken from. + /// + /// # Panics: + /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic. + /// Also requires that the given alignment aligns to the type of slice and is smaller or equal to the buffers alignment + pub fn slice_ref_with_alignment(&self, subset: &[T], alignment: Alignment) -> Self { + if !alignment.is_aligned_to(Alignment::of::()) { + vortex_panic!("slice_ref alignment must at least align to type T") + } + + if !self.alignment.is_aligned_to(alignment) { + vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment") + } + + if subset.as_ptr().align_offset(*alignment) != 0 { + vortex_panic!("slice_ref subset must be aligned to {:?}", alignment); + } + + let subset_u8 = + unsafe { std::slice::from_raw_parts(subset.as_ptr().cast(), size_of_val(subset)) }; + + Self { + bytes: self.bytes.slice_ref(subset_u8), + length: subset.len(), + alignment, + _marker: Default::default(), + } + } + /// Returns the underlying aligned buffer. pub fn into_inner(self) -> Bytes { self.bytes