Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust): Add general metadata structure to ChunkedArray #16399

Merged
merged 4 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions crates/polars-core/src/chunked_array/builder/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,7 @@ impl ChunkedBuilder<bool, BooleanType> for BooleanChunkedBuilder {

fn finish(mut self) -> BooleanChunked {
let arr = self.array_builder.as_box();

let mut ca = ChunkedArray {
field: Arc::new(self.field),
chunks: vec![arr],
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
ca.compute_len();
ca
ChunkedArray::new_with_compute_len(Arc::new(self.field), vec![arr])
}

fn shrink_to_fit(&mut self) {
Expand Down
8 changes: 1 addition & 7 deletions crates/polars-core/src/chunked_array/builder/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,7 @@ pub trait ListBuilderTrait {
fn finish(&mut self) -> ListChunked {
let arr = self.inner_array();

let mut ca = ListChunked {
field: Arc::new(self.field().clone()),
chunks: vec![arr],
phantom: PhantomData,
..Default::default()
};
ca.compute_len();
let mut ca = ListChunked::new_with_compute_len(Arc::new(self.field().clone()), vec![arr]);
if self.fast_explode() {
ca.set_fast_explode()
}
Expand Down
1 change: 0 additions & 1 deletion crates/polars-core/src/chunked_array/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ mod null;
mod primitive;
mod string;

use std::marker::PhantomData;
use std::sync::Arc;

use arrow::array::*;
Expand Down
11 changes: 1 addition & 10 deletions crates/polars-core/src/chunked_array/builder/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,7 @@ where

fn finish(mut self) -> ChunkedArray<T> {
let arr = self.array_builder.as_box();
let mut ca = ChunkedArray {
field: Arc::new(self.field),
chunks: vec![arr],
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
ca.compute_len();
ca
ChunkedArray::new_with_compute_len(Arc::new(self.field), vec![arr])
}

fn shrink_to_fit(&mut self) {
Expand Down
24 changes: 2 additions & 22 deletions crates/polars-core/src/chunked_array/builder/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,32 +52,12 @@ impl<T: ViewType + ?Sized> BinViewChunkedBuilder<T> {
impl StringChunkedBuilder {
pub fn finish(mut self) -> StringChunked {
let arr = self.chunk_builder.as_box();

let mut ca = ChunkedArray {
field: self.field,
chunks: vec![arr],
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
ca.compute_len();
ca
ChunkedArray::new_with_compute_len(self.field, vec![arr])
}
}
impl BinaryChunkedBuilder {
pub fn finish(mut self) -> BinaryChunked {
let arr = self.chunk_builder.as_box();

let mut ca = ChunkedArray {
field: self.field,
chunks: vec![arr],
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
ca.compute_len();
ca
ChunkedArray::new_with_compute_len(self.field, vec![arr])
}
}
16 changes: 14 additions & 2 deletions crates/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,13 @@ impl BinaryChunked {
.map(|arr| arr.to_utf8view_unchecked().boxed())
.collect();
let field = Arc::new(Field::new(self.name(), DataType::String));
StringChunked::from_chunks_and_metadata(chunks, field, self.bit_settings, true, true)
StringChunked::from_chunks_and_metadata(
chunks,
field,
Arc::new(self.effective_metadata().cast()),
true,
true,
)
}
}

Expand All @@ -318,7 +324,13 @@ impl StringChunked {
.collect();
let field = Arc::new(Field::new(self.name(), DataType::Binary));
unsafe {
BinaryChunked::from_chunks_and_metadata(chunks, field, self.bit_settings, true, true)
BinaryChunked::from_chunks_and_metadata(
chunks,
field,
Arc::new(self.effective_metadata().cast()),
true,
true,
)
}
}
}
Expand Down
75 changes: 18 additions & 57 deletions crates/polars-core/src/chunked_array/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,12 @@ where
})
.collect();

ChunkedArray {
ChunkedArray::new_with_dims(
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: length.try_into().expect(LENGTH_LIMIT_MSG),
null_count: null_count as IdxSize,
}
length.try_into().expect(LENGTH_LIMIT_MSG),
null_count as IdxSize,
)
}

/// Create a new [`ChunkedArray`] from existing chunks.
Expand All @@ -192,17 +190,7 @@ where
/// # Safety
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
let field = self.field.clone();
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
out.compute_len();
out
ChunkedArray::new_with_compute_len(self.field.clone(), chunks)
}

/// Create a new [`ChunkedArray`] from existing chunks.
Expand All @@ -223,16 +211,7 @@ where
}
}
let field = Arc::new(Field::new(name, dtype));
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
out.compute_len();
out
ChunkedArray::new_with_compute_len(field, chunks)
}

/// Create a new ChunkedArray from self, where the chunks are replaced.
Expand All @@ -242,25 +221,23 @@ where
pub(crate) unsafe fn from_chunks_and_metadata(
chunks: Vec<ArrayRef>,
field: Arc<Field>,
bit_settings: Settings,
metadata: Arc<Metadata<T>>,
keep_sorted: bool,
keep_fast_explode: bool,
) -> Self {
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings,
length: 0,
null_count: 0,
};
out.compute_len();
let mut out = ChunkedArray::new_with_compute_len(field, chunks);

let mut md = metadata;
if !keep_sorted {
out.set_sorted_flag(IsSorted::Not);
let inner = Arc::make_mut(&mut md);
inner.set_sorted_flag(IsSorted::Not);
}
if !keep_fast_explode {
out.unset_fast_explode_list()
let inner = Arc::make_mut(&mut md);
inner.set_fast_explode_list(false);
}
out.md = Some(md);

out
}

Expand All @@ -270,16 +247,7 @@ where
dtype: DataType,
) -> Self {
let field = Arc::new(Field::new(name, dtype));
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
out.compute_len();
out
ChunkedArray::new_with_compute_len(field, chunks)
}

pub fn full_null_like(ca: &Self, length: usize) -> Self {
Expand All @@ -300,14 +268,7 @@ where
/// Create a new ChunkedArray from a Vec and a validity mask.
pub fn from_vec_validity(name: &str, values: Vec<T::Native>, buffer: Option<Bitmap>) -> Self {
let arr = to_array::<T>(values, buffer);
let mut out = ChunkedArray {
field: Arc::new(Field::new(name, T::get_dtype())),
chunks: vec![arr],
phantom: PhantomData,
..Default::default()
};
out.compute_len();
out
ChunkedArray::new_with_compute_len(Arc::new(Field::new(name, T::get_dtype())), vec![arr])
}

/// Create a temporary [`ChunkedArray`] from a slice.
Expand Down
16 changes: 4 additions & 12 deletions crates/polars-core/src/chunked_array/from_iterator.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
//! Implementations of upstream traits for [`ChunkedArray<T>`]
use std::borrow::{Borrow, Cow};
#[cfg(feature = "object")]
use std::marker::PhantomData;

#[cfg(feature = "object")]
use arrow::bitmap::{Bitmap, MutableBitmap};
Expand Down Expand Up @@ -278,15 +276,9 @@ impl<T: PolarsObject> FromIterator<Option<T>> for ObjectChunked<T> {
offset: 0,
len,
});
let mut out = ChunkedArray {
field: Arc::new(Field::new("", get_object_type::<T>())),
chunks: vec![arr],
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
null_count: 0,
};
out.compute_len();
out
ChunkedArray::new_with_compute_len(
Arc::new(Field::new("", get_object_type::<T>())),
vec![arr],
)
}
}
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<'a, I: Iterator<Item = Option<ArrayBox>>> Iterator for AmortizedListIter<'a
unsafe { *self.inner.as_mut() = array_ref };

// last iteration could have set the sorted flag (e.g. in compute_len)
self.series_container.clear_settings();
self.series_container.clear_flags();
// make sure that the length is correct
self.series_container._get_inner_mut().compute_len();

Expand Down Expand Up @@ -151,7 +151,7 @@ impl ListChunked {
vec![inner_values.clone()],
&iter_dtype,
);
s.clear_settings();
s.clear_flags();
Box::pin(s)
};

Expand Down
7 changes: 3 additions & 4 deletions crates/polars-core/src/chunked_array/list/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Special list utility methods
pub(super) mod iterator;

use crate::chunked_array::Settings;
use crate::prelude::*;

impl ListChunked {
Expand All @@ -19,14 +18,14 @@ impl ListChunked {
field.coerce(DataType::List(Box::new(dtype)));
}
pub fn set_fast_explode(&mut self) {
self.bit_settings.insert(Settings::FAST_EXPLODE_LIST)
Arc::make_mut(self.metadata_mut()).set_fast_explode_list(true);
}
pub(crate) fn unset_fast_explode(&mut self) {
self.bit_settings.remove(Settings::FAST_EXPLODE_LIST)
Arc::make_mut(self.metadata_mut()).set_fast_explode_list(false);
}

pub fn _can_fast_explode(&self) -> bool {
self.bit_settings.contains(Settings::FAST_EXPLODE_LIST)
self.effective_metadata().get_fast_explode_list()
}

/// Set the logical type of the [`ListChunked`].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use polars_utils::sync::SyncPtr;
pub use revmap::*;

use super::*;
use crate::chunked_array::Settings;
use crate::chunked_array::metadata::MetadataFlags;
use crate::prelude::*;
use crate::series::IsSorted;
use crate::using_string_cache;
Expand Down Expand Up @@ -173,12 +173,12 @@ impl CategoricalChunked {
}
}

pub(crate) fn get_flags(&self) -> Settings {
pub(crate) fn get_flags(&self) -> MetadataFlags {
self.physical().get_flags()
}

/// Set flags for the Chunked Array
pub(crate) fn set_flags(&mut self, mut flags: Settings) {
pub(crate) fn set_flags(&mut self, mut flags: MetadataFlags) {
// We should not set the sorted flag if we are sorting in lexical order
if self.uses_lexical_ordering() {
flags.set_sorted_flag(IsSorted::Not)
Expand Down
Loading