Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add start, stop, and temporal_policy to FeatureVector and FeatureVectorArray #549

Merged
merged 5 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion apis/python/src/tiledb/vector_search/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,12 @@ def ingest_type_erased(

ctx = vspy.Ctx(config)
data = vspy.FeatureVectorArray(
ctx, parts_array_uri, ids_array_uri, 0, to_temporal_policy(index_timestamp)
ctx,
parts_array_uri,
ids_array_uri,
0,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For better readability, it would be nice to follow the pattern bellow for such arguments

 data = vspy.FeatureVectorArray(
            ctx,
            parts_array_uri,
            ids_array_uri,
            /* first_col */ 0,
            /* last_col */ 0,
            to_temporal_policy(index_timestamp),
)

Copy link
Collaborator Author

@jparismorgan jparismorgan Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, thanks, done (though note it's a little different than this b/c a Python comment).

0,
to_temporal_policy(index_timestamp),
)
if index_type == "VAMANA":
index = vspy.IndexVamana(ctx, index_group_uri)
Expand Down
27 changes: 21 additions & 6 deletions apis/python/src/tiledb/vector_search/type_erased_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,22 @@ void init_type_erased_module(py::module_& m) {

py::class_<FeatureVector>(m, "FeatureVector", py::buffer_protocol())
.def(
py::init<const tiledb::Context&, const std::string&>(),
py::keep_alive<1, 2>() // IndexIVFFlat should keep ctx alive.
)
"__init__",
[](FeatureVector& instance,
const tiledb::Context& ctx,
const std::string& uri,
size_t first_col,
size_t last_col,
std::optional<TemporalPolicy> temporal_policy) {
new (&instance)
FeatureVector(ctx, uri, first_col, last_col, temporal_policy);
},
py::keep_alive<1, 2>(),
py::arg("ctx"),
py::arg("uri"),
py::arg("first_col") = 0,
py::arg("first_col") = 0,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

last_col

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, done.

py::arg("temporal_policy") = std::nullopt)
.def(py::init<size_t, const std::string&>())
.def(py::init<size_t, void*, const std::string&>())
.def("dimensions", &FeatureVector::dimensions)
Expand Down Expand Up @@ -244,16 +257,18 @@ void init_type_erased_module(py::module_& m) {
const tiledb::Context& ctx,
const std::string& uri,
const std::string& ids_uri,
size_t num_vectors,
size_t first_col,
size_t last_col,
std::optional<TemporalPolicy> temporal_policy) {
new (&instance) FeatureVectorArray(
ctx, uri, ids_uri, num_vectors, temporal_policy);
ctx, uri, ids_uri, first_col, last_col, temporal_policy);
},
py::keep_alive<1, 2>(), // FeatureVectorArray should keep ctx alive.
py::arg("ctx"),
py::arg("uri"),
py::arg("ids_uri") = "",
py::arg("num_vectors") = 0,
py::arg("first_col") = 0,
py::arg("first_col") = 0,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

last_col

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, done.

py::arg("temporal_policy") = std::nullopt)
.def(py::init<size_t, size_t, const std::string&, const std::string&>())
.def("dimensions", &FeatureVectorArray::dimensions)
Expand Down
46 changes: 34 additions & 12 deletions src/include/api/feature_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,20 @@ class FeatureVector {
* @param ctx
* @param uri
*/
FeatureVector(const tiledb::Context& ctx, const std::string& uri) {
auto array = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ);
FeatureVector(
const tiledb::Context& ctx,
const std::string& uri,
size_t start = 0,
size_t end = 0,
std::optional<TemporalPolicy> temporal_policy_input = std::nullopt) {
auto temporal_policy = temporal_policy_input.value_or(TemporalPolicy{});
auto array = tiledb_helpers::open_array(
tdb_func__, ctx, uri, TILEDB_READ, temporal_policy);

feature_type_ = get_array_datatype(*array);
array->close();

tdb_vector_from_datatype(ctx, uri);
tdb_vector_from_datatype(ctx, uri, start, end, temporal_policy);
}

/*
Expand Down Expand Up @@ -126,25 +133,35 @@ class FeatureVector {
* Dispatch to the appropriate concrete class based on the datatype.
*/
void tdb_vector_from_datatype(
const tiledb::Context& ctx, const std::string& uri) {
const tiledb::Context& ctx,
const std::string& uri,
size_t start,
size_t end,
TemporalPolicy temporal_policy) {
switch (feature_type_) {
case TILEDB_FLOAT32:
vector_ = std::make_unique<vector_impl<tdbVector<float>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<float>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_INT8:
vector_ = std::make_unique<vector_impl<tdbVector<int8_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<int8_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT8:
vector_ = std::make_unique<vector_impl<tdbVector<uint8_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint8_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_INT32:
vector_ = std::make_unique<vector_impl<tdbVector<int32_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<int32_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT32:
vector_ = std::make_unique<vector_impl<tdbVector<uint32_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint32_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT64:
vector_ = std::make_unique<vector_impl<tdbVector<uint64_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint64_t>>>(
ctx, uri, start, end, temporal_policy);
break;
default:
throw std::runtime_error("Unsupported attribute type");
Expand Down Expand Up @@ -202,8 +219,13 @@ class FeatureVector {
explicit vector_impl(size_t size)
: vector_(size) {
}
vector_impl(const tiledb::Context& ctx, const std::string& uri)
: vector_(ctx, uri) {
vector_impl(
const tiledb::Context& ctx,
const std::string& uri,
size_t start,
size_t end,
TemporalPolicy temporal_policy)
: vector_(ctx, uri, start, end, temporal_policy) {
}
//[[nodiscard]] void* data() override {
// return _cpo::data(vector_);
Expand Down
Loading
Loading