Skip to content

Commit

Permalink
Add Buffer::slice_ref (#2072)
Browse files Browse the repository at this point in the history
And fix PartialEq, PartialOrd, Hash for Buffer

fix #2009
  • Loading branch information
robert3005 authored Jan 28, 2025
1 parent bae2e8e commit 4d5fdaf
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 19 deletions.
8 changes: 2 additions & 6 deletions vortex-array/src/array/constant/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,12 @@ mod tests {
let const_array = ConstantArray::new("four".to_string(), 4);

// Check all values correct.
let canonical = const_array
.into_canonical()
.unwrap()
.into_varbinview()
.unwrap();
let canonical = const_array.into_varbinview().unwrap();

assert_eq!(canonical.len(), 4);

for i in 0..=3 {
assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into(),);
assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into());
}
}

Expand Down
19 changes: 7 additions & 12 deletions vortex-array/src/array/varbinview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,11 @@ use arrow_array::types::{BinaryViewType, ByteViewType, StringViewType};
use arrow_array::{ArrayRef, BinaryViewArray, GenericByteViewArray, StringViewArray};
use arrow_buffer::ScalarBuffer;
use itertools::Itertools;
use rkyv::from_bytes;
use static_assertions::{assert_eq_align, assert_eq_size};
use vortex_buffer::{Alignment, Buffer, ByteBuffer};
use vortex_dtype::DType;
use vortex_error::{
vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect, VortexResult, VortexUnwrap,
};
use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult, VortexUnwrap};

use crate::array::{StructArray, StructMetadata, VarBinMetadata};
use crate::arrow::FromArrowArray;
use crate::encoding::ids;
use crate::stats::StatsSet;
Expand Down Expand Up @@ -275,20 +271,19 @@ impl VarBinViewArray {
/// Will return a bytebuffer pointing to the underlying data without performing a copy
#[inline]
pub fn bytes_at(&self, index: usize) -> ByteBuffer {
let view = self.views()[index];
let views = self.views();
let view = &views[index];
// Expect this to be the common case: strings > 12 bytes.
if !view.is_inlined() {
let view_ref = view.as_view();
self.buffer(view_ref.buffer_index() as usize)
.slice(view_ref.to_range())
} else {
// Return access to the range of bytes around it.
let view_byte_start = index * size_of::<BinaryView>() + 4;
let view_byte_end = view_byte_start + view.len() as usize;
self.0
.byte_buffer(0)
.vortex_expect("Must have views buffer")
.slice_with_alignment(view_byte_start..view_byte_end, Alignment::new(1))
views
.clone()
.into_byte_buffer()
.slice_ref(view.as_inlined().value())
}
}

Expand Down
76 changes: 75 additions & 1 deletion vortex-buffer/src/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::any::type_name;
use std::cmp::Ordering;
use std::collections::Bound;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Deref, RangeBounds};

use bytes::{Buf, Bytes};
Expand All @@ -10,14 +12,40 @@ use crate::debug::TruncatedDebug;
use crate::{Alignment, BufferMut, ByteBuffer};

/// An immutable buffer of items of `T`.
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
#[derive(Clone)]
pub struct Buffer<T> {
pub(crate) bytes: Bytes,
pub(crate) length: usize,
pub(crate) alignment: Alignment,
pub(crate) _marker: std::marker::PhantomData<T>,
}

impl<T> PartialEq for Buffer<T> {
fn eq(&self, other: &Self) -> bool {
self.bytes == other.bytes
}
}

impl<T> Eq for Buffer<T> {}

impl<T> Ord for Buffer<T> {
fn cmp(&self, other: &Self) -> Ordering {
self.bytes.cmp(&other.bytes)
}
}

impl<T> PartialOrd for Buffer<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.bytes.cmp(&other.bytes))
}
}

impl<T> Hash for Buffer<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.bytes.as_ref().hash(state)
}
}

impl<T> Buffer<T> {
/// Returns a new `Buffer<T>` copied from the provided `Vec<T>`, `&[T]`, etc.
///
Expand Down Expand Up @@ -234,6 +262,52 @@ impl<T> Buffer<T> {
}
}

/// Returns a slice of self that is equivalent to the given subset.
///
/// When processing the buffer you will often end up with &\[T\] that is a subset
/// of the underlying buffer. This function turns the slice into a slice of the buffer
/// it has been taken from.
///
/// # Panics:
/// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
#[inline(always)]
pub fn slice_ref(&self, subset: &[T]) -> Self {
self.slice_ref_with_alignment(subset, Alignment::of::<T>())
}

/// Returns a slice of self that is equivalent to the given subset.
///
/// When processing the buffer you will often end up with &\[T\] that is a subset
/// of the underlying buffer. This function turns the slice into a slice of the buffer
/// it has been taken from.
///
/// # Panics:
/// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
/// Also requires that the given alignment aligns to the type of slice and is smaller or equal to the buffers alignment
pub fn slice_ref_with_alignment(&self, subset: &[T], alignment: Alignment) -> Self {
if !alignment.is_aligned_to(Alignment::of::<T>()) {
vortex_panic!("slice_ref alignment must at least align to type T")
}

if !self.alignment.is_aligned_to(alignment) {
vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment")
}

if subset.as_ptr().align_offset(*alignment) != 0 {
vortex_panic!("slice_ref subset must be aligned to {:?}", alignment);
}

let subset_u8 =
unsafe { std::slice::from_raw_parts(subset.as_ptr().cast(), size_of_val(subset)) };

Self {
bytes: self.bytes.slice_ref(subset_u8),
length: subset.len(),
alignment,
_marker: Default::default(),
}
}

/// Returns the underlying aligned buffer.
pub fn into_inner(self) -> Bytes {
self.bytes
Expand Down

0 comments on commit 4d5fdaf

Please sign in to comment.