Skip to content

Commit

Permalink
Add: joined and embedded ranges.
Browse files Browse the repository at this point in the history
Fix: Python build with new scans #43
Fix: retrieving the gist of document fields.
  • Loading branch information
ashvardanian committed Sep 5, 2022
1 parent 5224cbf commit 6b59392
Show file tree
Hide file tree
Showing 12 changed files with 355 additions and 368 deletions.
6 changes: 3 additions & 3 deletions include/ukv/cpp/members_range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class pairs_stream_t {

ukv_key_t next_min_key_ = std::numeric_limits<ukv_key_t>::min();
indexed_range_gt<ukv_key_t*> fetched_keys_;
joined_values_t values_view;
joined_bins_t values_view;
std::size_t fetched_offset_ = 0;

status_t prefetch() noexcept {
Expand Down Expand Up @@ -228,14 +228,14 @@ class pairs_stream_t {
ukv_options_default_k,
&found_vals,
&found_offs,
&found_lens,
nullptr,
nullptr,
arena_read_.member_ptr(),
status.member_ptr());
if (!status)
return status;

values_view = joined_values_t {found_vals, found_offs, found_lens, count};
values_view = joined_bins_t {found_vals, found_offs, count};
next_min_key_ = count <= read_ahead_ ? ukv_key_unknown_k : fetched_keys_[count - 1] + 1;
return {};
}
Expand Down
12 changes: 7 additions & 5 deletions include/ukv/cpp/members_ref.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class members_ref_gt {
using keys_extractor_t = places_arg_extractor_gt<locations_plain_t>;
static constexpr bool is_one_k = keys_extractor_t::is_one_k;

using value_t = std::conditional_t<is_one_k, value_view_t, joined_values_t>;
using value_t = std::conditional_t<is_one_k, value_view_t, embedded_bins_t>;
using present_t = std::conditional_t<is_one_k, bool, strided_range_gt<bool>>;
using length_t = std::conditional_t<is_one_k, ukv_val_len_t, indexed_range_gt<ukv_val_len_t*>>;

Expand Down Expand Up @@ -218,7 +218,7 @@ class members_ref_gt {
* @brief Find the names of all unique fields in requested documents.
* ! Applies only to document collections and when fields are not present in locations!
*/
expected_gt<strings_tape_iterator_t> gist(bool track = false) noexcept;
expected_gt<joined_strs_t> gist(bool track = false) noexcept;

/**
* @brief For N documents and M fields gather (N * M) responses.
Expand Down Expand Up @@ -328,7 +328,7 @@ expected_gt<expected_at> members_ref_gt<locations_at>::any_get(ukv_options_t opt
if constexpr (is_one_k)
return value_view_t {found_values + *found_offsets, *found_lengths};
else
return joined_values_t {found_values, found_offsets, found_lengths, count};
return embedded_bins_t {found_values, found_offsets, found_lengths, count};
}
}

Expand Down Expand Up @@ -396,10 +396,11 @@ status_t members_ref_gt<locations_at>::any_assign(contents_arg_at&& vals_ref, uk
}

template <typename locations_at>
expected_gt<strings_tape_iterator_t> members_ref_gt<locations_at>::gist(bool track) noexcept {
expected_gt<joined_strs_t> members_ref_gt<locations_at>::gist(bool track) noexcept {

status_t status;
ukv_size_t found_count = 0;
ukv_val_len_t* found_offsets = nullptr;
ukv_str_view_t found_strings = nullptr;

auto options = track ? ukv_option_read_track_k : ukv_options_default_k;
Expand All @@ -418,11 +419,12 @@ expected_gt<strings_tape_iterator_t> members_ref_gt<locations_at>::gist(bool tra
keys.stride(),
options,
&found_count,
&found_offsets,
&found_strings,
arena_,
status.member_ptr());

strings_tape_iterator_t view {found_count, found_strings};
joined_strs_t view {found_strings, found_offsets, found_count};
return {std::move(status), std::move(view)};
}

Expand Down
150 changes: 126 additions & 24 deletions include/ukv/cpp/ranges.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
* @date 4 Jul 2022
*
* @brief Smart Pointers, Monads and Range-like templates for C++ bindings.
* > "Strided": defines the number of bytes to jump until next entry, instead of `sizeof`.
* > "Joined": Indexes variable-length objects using just base pointer and N+1 offsets,
* assuming the next entry starts right after the previous one without gaps.
* > "Embedded": Extends "Joined" ranges to objects with lengths.
* In that case order of elements is irrelevant and we need just N offsets & lengths.
*/

#pragma once
Expand Down Expand Up @@ -316,60 +321,157 @@ struct range_gt {
* @brief A read-only iterator for values packed into a
* contiguous memory range. Doesn't own underlying memory.
*/
class joined_values_iterator_t {
template <typename chunk_at>
class joined_chunks_iterator_gt {

ukv_val_ptr_t contents_ = nullptr;
using chunk_t = chunk_at;
using element_t = typename chunk_t::value_type;

element_t* contents_ = nullptr;
ukv_val_len_t* offsets_ = nullptr;

public:
using iterator_category = std::random_access_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = chunk_t;
using pointer = void;
using reference = void;

joined_chunks_iterator_gt(element_t* vals, ukv_val_len_t* offs) noexcept : contents_(vals), offsets_(offs) {}

joined_chunks_iterator_gt& operator++() noexcept {
++offsets_;
return *this;
}

joined_chunks_iterator_gt operator++(int) const noexcept { return {contents_, offsets_ + 1}; }
joined_chunks_iterator_gt operator--(int) const noexcept { return {contents_, offsets_ - 1}; }
chunk_t operator*() const noexcept { return {contents_ + offsets_[0], offsets_[1] - offsets_[0]}; }
chunk_t operator[](std::size_t i) const noexcept {
return {contents_ + offsets_[i], offsets_[i + 1] - offsets_[i]};
}

bool operator==(joined_chunks_iterator_gt const& other) const noexcept { return offsets_ == other.offsets_; }
bool operator!=(joined_chunks_iterator_gt const& other) const noexcept { return offsets_ != other.offsets_; }
};

using joined_strs_iterator_t = joined_chunks_iterator_gt<std::string_view>;
using joined_bins_iterator_t = joined_chunks_iterator_gt<value_view_t>;

template <typename chunk_at>
class joined_chunks_gt {

using chunk_t = chunk_at;
using element_t = typename chunk_t::value_type;

element_t* contents_ = nullptr;
ukv_val_len_t* offsets_ = nullptr;
ukv_size_t count_ = 0;

public:
using value_type = chunk_t;

joined_chunks_gt() = default;

template <typename same_size_at>
joined_chunks_gt(same_size_at* vals, ukv_val_len_t* offs, ukv_size_t elements) noexcept
: contents_((element_t*)(vals)), offsets_(offs), count_(elements) {
static_assert(sizeof(same_size_at) == sizeof(element_t));
}

joined_chunks_iterator_gt<chunk_at> begin() const noexcept { return {contents_, offsets_}; }
joined_chunks_iterator_gt<chunk_at> end() const noexcept { return {contents_, offsets_ + count_}; }
std::size_t size() const noexcept { return count_; }
chunk_t operator[](std::size_t i) const noexcept {
return {contents_ + offsets_[i], offsets_[i + 1] - offsets_[i]};
}

ukv_val_len_t* offsets() const noexcept { return offsets_; }
element_t* contents() const noexcept { return contents_; }
};

using joined_strs_t = joined_chunks_gt<std::string_view>;
using joined_bins_t = joined_chunks_gt<value_view_t>;

/**
* @brief A read-only iterator for values packed into a
* contiguous memory range. Doesn't own underlying memory.
*/
template <typename chunk_at>
class embedded_chunks_iterator_gt {

using chunk_t = chunk_at;
using element_t = typename chunk_t::value_type;

element_t* contents_ = nullptr;
ukv_val_len_t* offsets_ = nullptr;
ukv_val_len_t* lengths_ = nullptr;

public:
using iterator_category = std::random_access_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = value_view_t;
using value_type = chunk_t;
using pointer = void;
using reference = void;

inline joined_values_iterator_t(ukv_val_ptr_t vals, ukv_val_len_t* offs, ukv_val_len_t* lens) noexcept
embedded_chunks_iterator_gt(element_t* vals, ukv_val_len_t* offs, ukv_val_len_t* lens) noexcept
: contents_(vals), offsets_(offs), lengths_(lens) {}

inline joined_values_iterator_t& operator++() noexcept {
embedded_chunks_iterator_gt& operator++() noexcept {
++lengths_;
++offsets_;
return *this;
}

inline joined_values_iterator_t operator++(int) const noexcept { return {contents_, lengths_ + 1, offsets_ + 1}; }
inline joined_values_iterator_t operator--(int) const noexcept { return {contents_, lengths_ - 1, offsets_ - 1}; }
inline value_view_t operator*() const noexcept { return {contents_ + *offsets_, *lengths_}; }
inline value_view_t operator[](std::size_t i) const noexcept { return {contents_ + offsets_[i], lengths_[i]}; }
embedded_chunks_iterator_gt operator++(int) const noexcept { return {contents_, lengths_ + 1, offsets_ + 1}; }
embedded_chunks_iterator_gt operator--(int) const noexcept { return {contents_, lengths_ - 1, offsets_ - 1}; }
chunk_t operator*() const noexcept { return {contents_ + *offsets_, *lengths_}; }
chunk_t operator[](std::size_t i) const noexcept { return {contents_ + offsets_[i], lengths_[i]}; }

inline bool operator==(joined_values_iterator_t const& other) const noexcept { return lengths_ == other.lengths_; }
inline bool operator!=(joined_values_iterator_t const& other) const noexcept { return lengths_ != other.lengths_; }
bool operator==(embedded_chunks_iterator_gt const& other) const noexcept { return lengths_ == other.lengths_; }
bool operator!=(embedded_chunks_iterator_gt const& other) const noexcept { return lengths_ != other.lengths_; }
};

class joined_values_t {
ukv_val_ptr_t contents_ = nullptr;
using embedded_strs_iterator_t = embedded_chunks_iterator_gt<std::string_view>;
using embedded_bins_iterator_t = embedded_chunks_iterator_gt<value_view_t>;

template <typename chunk_at>
class embedded_chunks_gt {

using chunk_t = chunk_at;
using element_t = typename chunk_t::value_type;

element_t* contents_ = nullptr;
ukv_val_len_t* offsets_ = nullptr;
ukv_val_len_t* lengths_ = nullptr;
ukv_size_t count_ = 0;

public:
using value_type = value_view_t;
using value_type = chunk_t;

inline joined_values_t() = default;
inline joined_values_t(ukv_val_ptr_t vals, ukv_val_len_t* offs, ukv_val_len_t* lens, ukv_size_t elements) noexcept
: contents_(vals), offsets_(offs), lengths_(lens), count_(elements) {}
embedded_chunks_gt() = default;

inline joined_values_iterator_t begin() const noexcept { return {contents_, offsets_, lengths_}; }
inline joined_values_iterator_t end() const noexcept { return {contents_, offsets_ + count_, lengths_ + count_}; }
inline std::size_t size() const noexcept { return count_; }
inline value_view_t operator[](std::size_t i) const noexcept { return {contents_ + offsets_[i], lengths_[i]}; }
template <typename same_size_at>
embedded_chunks_gt(same_size_at* vals, ukv_val_len_t* offs, ukv_val_len_t* lens, ukv_size_t elements) noexcept
: contents_((element_t*)(vals)), offsets_(offs), lengths_(lens), count_(elements) {
static_assert(sizeof(same_size_at) == sizeof(element_t));
}

inline ukv_val_len_t* offsets() const noexcept { return offsets_; }
inline ukv_val_len_t* lengths() const noexcept { return lengths_; }
inline ukv_val_ptr_t contents() const noexcept { return contents_; }
embedded_chunks_iterator_gt<chunk_at> begin() const noexcept { return {contents_, offsets_, lengths_}; }
embedded_chunks_iterator_gt<chunk_at> end() const noexcept {
return {contents_, offsets_ + count_, lengths_ + count_};
}
std::size_t size() const noexcept { return count_; }
chunk_t operator[](std::size_t i) const noexcept { return {contents_ + offsets_[i], lengths_[i]}; }

ukv_val_len_t* offsets() const noexcept { return offsets_; }
ukv_val_len_t* lengths() const noexcept { return lengths_; }
element_t* contents() const noexcept { return contents_; }
};

using embedded_strs_t = embedded_chunks_gt<std::string_view>;
using embedded_bins_t = embedded_chunks_gt<value_view_t>;

/**
* @brief Iterates through a predetermined number of NULL-delimited
* strings joined one after another in continuous memory.
Expand Down
6 changes: 4 additions & 2 deletions include/ukv/cpp/ranges_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,10 @@ using edges_view_t = edges_range_gt<ukv_key_t const>;

template <typename tuples_at>
auto edges(tuples_at&& tuples) noexcept {
using value_type = typename std::remove_reference_t<tuples_at>::value_type;
using result_t = std::conditional_t<std::is_const_v<value_type>, edges_view_t, edges_span_t>;
using tuples_t = std::remove_reference_t<tuples_at>;
using element_t = typename tuples_t::value_type;
constexpr bool immutable_k = std::is_const_v<element_t> || std::is_const_v<tuples_t>;
using result_t = std::conditional_t<immutable_k, edges_view_t, edges_span_t>;
auto ptr = std::data(tuples);
auto count = std::size(tuples);
return result_t(ptr, ptr + count);
Expand Down
Loading

0 comments on commit 6b59392

Please sign in to comment.