diff --git a/apis/python/src/tiledb/vector_search/ingestion.py b/apis/python/src/tiledb/vector_search/ingestion.py index 847cb772d..6131c0732 100644 --- a/apis/python/src/tiledb/vector_search/ingestion.py +++ b/apis/python/src/tiledb/vector_search/ingestion.py @@ -1190,7 +1190,6 @@ def centralised_kmeans( with tiledb.scope_ctx(ctx_or_config=config): logger = setup(config, verbose) group = tiledb.Group(index_group_uri) - centroids_uri = group[CENTROIDS_ARRAY_NAME].uri if training_sample_size >= partitions: if training_source_uri: if training_source_type is None: @@ -1261,6 +1260,7 @@ def centralised_kmeans( # raise ValueError(f"We have a training_sample_size of {training_sample_size} but {partitions} partitions - training_sample_size must be >= partitions") centroids = np.random.rand(dimensions, partitions) + centroids_uri = group[CENTROIDS_ARRAY_NAME].uri logger.debug("Writing centroids to array %s", centroids_uri) with tiledb.open(centroids_uri, mode="w", timestamp=index_timestamp) as A: A[0:dimensions, 0:partitions] = centroids diff --git a/apis/python/src/tiledb/vector_search/type_erased_module.cc b/apis/python/src/tiledb/vector_search/type_erased_module.cc index 24bc32758..e323ef979 100644 --- a/apis/python/src/tiledb/vector_search/type_erased_module.cc +++ b/apis/python/src/tiledb/vector_search/type_erased_module.cc @@ -390,13 +390,13 @@ void init_type_erased_module(py::module_& m) { "__init__", [](IndexVamana& instance, const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, std::optional temporal_policy) { - new (&instance) IndexVamana(ctx, group_uri, temporal_policy); + new (&instance) IndexVamana(ctx, index_uri, temporal_policy); }, py::keep_alive<1, 2>(), // IndexVamana should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("temporal_policy") = std::nullopt) .def( "__init__", @@ -432,14 +432,14 @@ void init_type_erased_module(py::module_& m) { "write_index", [](IndexVamana& index, const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, std::optional temporal_policy, const std::string& storage_version) { - index.write_index(ctx, group_uri, temporal_policy, storage_version); + index.write_index(ctx, index_uri, temporal_policy, storage_version); }, py::keep_alive<1, 2>(), // IndexVamana should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("temporal_policy") = std::nullopt, py::arg("storage_version") = "") .def("feature_type_string", &IndexVamana::feature_type_string) @@ -450,13 +450,13 @@ void init_type_erased_module(py::module_& m) { .def_static( "clear_history", [](const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, uint64_t timestamp) { - IndexVamana::clear_history(ctx, group_uri, timestamp); + IndexVamana::clear_history(ctx, index_uri, timestamp); }, py::keep_alive<1, 2>(), // IndexVamana should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("timestamp")); py::class_(m, "IndexIVFPQ") @@ -464,20 +464,20 @@ void init_type_erased_module(py::module_& m) { "__init__", [](IndexIVFPQ& instance, const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, IndexLoadStrategy index_load_strategy, size_t memory_budget, std::optional temporal_policy) { new (&instance) IndexIVFPQ( ctx, - group_uri, + index_uri, index_load_strategy, memory_budget, temporal_policy); }, py::keep_alive<1, 2>(), // IndexIVFPQ should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("index_load_strategy") = IndexLoadStrategy::PQ_INDEX, py::arg("memory_budget") = 0, py::arg("temporal_policy") = std::nullopt) @@ -518,14 +518,14 @@ void init_type_erased_module(py::module_& m) { "write_index", [](IndexIVFPQ& index, const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, std::optional temporal_policy, const std::string& storage_version) { - index.write_index(ctx, group_uri, temporal_policy, storage_version); + index.write_index(ctx, index_uri, temporal_policy, storage_version); }, py::keep_alive<1, 2>(), // IndexIVFPQ should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("temporal_policy") = std::nullopt, py::arg("storage_version") = "") .def("feature_type_string", &IndexIVFPQ::feature_type_string) @@ -537,13 +537,13 @@ void init_type_erased_module(py::module_& m) { .def_static( "clear_history", [](const tiledb::Context& ctx, - const std::string& group_uri, + const std::string& index_uri, uint64_t timestamp) { - IndexIVFPQ::clear_history(ctx, group_uri, timestamp); + IndexIVFPQ::clear_history(ctx, index_uri, timestamp); }, py::keep_alive<1, 2>(), // IndexIVFPQ should keep ctx alive. py::arg("ctx"), - py::arg("group_uri"), + py::arg("index_uri"), py::arg("timestamp")); py::class_(m, "IndexIVFFlat") diff --git a/src/include/api/ivf_pq_index.h b/src/include/api/ivf_pq_index.h index 20959c798..5b437a4a6 100644 --- a/src/include/api/ivf_pq_index.h +++ b/src/include/api/ivf_pq_index.h @@ -573,7 +573,7 @@ class IndexIVFPQ { } uint64_t nlist() const override { - return impl_index_.nlist(); + return impl_index_.partitions(); } uint32_t num_subspaces() const override { diff --git a/src/include/detail/linalg/tdb_io.h b/src/include/detail/linalg/tdb_io.h index d1807be66..4913b3177 100644 --- a/src/include/detail/linalg/tdb_io.h +++ b/src/include/detail/linalg/tdb_io.h @@ -373,6 +373,59 @@ std::vector read_vector( return read_vector_helper(ctx, uri, 0, 0, temporal_policy, true); } +/** + * Read the contents of a TileDB array into a std::vector. + * @tparam T Type of data element stored. + * @param ctx The TileDB context. + * @param uri The URI of the TileDB array. + * @param slices The slices to read. Each slice is a pair of [start, end] (i.e. + * they are inclusive). + * @param temporal_policy The temporal policy for the read. + * @return The vector of data. + */ +template +std::vector read_vector( + const tiledb::Context& ctx, + const std::string& uri, + const std::vector>& slices, + size_t total_slices_size, + TemporalPolicy temporal_policy = {}) { + if (total_slices_size == 0) { + return {}; + } + scoped_timer _{tdb_func__ + " " + std::string{uri}}; + + auto array_ = tiledb_helpers::open_array( + tdb_func__, ctx, uri, TILEDB_READ, temporal_policy); + auto schema_ = array_->schema(); + + const size_t idx = 0; + auto attr = schema_.attribute(idx); + + std::string attr_name = attr.name(); + + // Create a subarray that reads the array up to the specified subset. + tiledb::Subarray subarray(ctx, *array_); + for (const auto& slice : slices) { + subarray.add_range( + 0, static_cast(slice.first), static_cast(slice.second)); + } + + // @todo: use something non-initializing + std::vector data_(total_slices_size); + + tiledb::Query query(ctx, *array_); + query.set_subarray(subarray).set_data_buffer( + attr_name, data_.data(), total_slices_size); + tiledb_helpers::submit_query(tdb_func__, uri, query); + _memory_data.insert_entry(tdb_func__, total_slices_size * sizeof(T)); + + array_->close(); + assert(tiledb::Query::Status::COMPLETE == query.query_status()); + + return data_; +} + template auto sizes_to_indices(const std::vector& sizes) { std::vector indices(size(sizes) + 1); diff --git a/src/include/index/index_group.h b/src/include/index/index_group.h index 10d1b7468..f7ff07e42 100644 --- a/src/include/index/index_group.h +++ b/src/include/index/index_group.h @@ -318,6 +318,12 @@ class base_index_group { if (opened_for_ == TILEDB_WRITE) { set_dimensions(dimensions); } + if (empty(this->version_)) { + this->version_ = current_storage_version; + } + if (storage_formats.find(this->version_) == storage_formats.end()) { + throw std::runtime_error("Invalid storage version: " + this->version_); + } } /** diff --git a/src/include/index/ivf_flat_group.h b/src/include/index/ivf_flat_group.h index a70fdbccc..9fec52929 100644 --- a/src/include/index/ivf_flat_group.h +++ b/src/include/index/ivf_flat_group.h @@ -136,9 +136,6 @@ class ivf_flat_group : public base_index_group { } void create_default_impl() { - if (empty(this->version_)) { - this->version_ = current_storage_version; - } this->init_valid_array_names(); static const int32_t tile_size{ diff --git a/src/include/index/ivf_pq_group.h b/src/include/index/ivf_pq_group.h index b3ce93f0a..8d8b20b2e 100644 --- a/src/include/index/ivf_pq_group.h +++ b/src/include/index/ivf_pq_group.h @@ -245,9 +245,6 @@ class ivf_pq_group : public base_index_group { * Create a ready-to-use group with default arrays ****************************************************************************/ void create_default_impl() { - if (empty(this->version_)) { - this->version_ = current_storage_version; - } this->init_valid_array_names(); static const int32_t tile_size{ diff --git a/src/include/index/ivf_pq_index.h b/src/include/index/ivf_pq_index.h index 3bda21a80..f8c6034b5 100644 --- a/src/include/index/ivf_pq_index.h +++ b/src/include/index/ivf_pq_index.h @@ -1415,7 +1415,7 @@ class ivf_pq_index { return reassign_ratio_; } - uint64_t nlist() const { + uint64_t partitions() const { return num_partitions_; } diff --git a/src/include/index/vamana_group.h b/src/include/index/vamana_group.h index b9a5482f6..a80b0b65b 100644 --- a/src/include/index/vamana_group.h +++ b/src/include/index/vamana_group.h @@ -198,9 +198,6 @@ class vamana_index_group : public base_index_group { } void create_default_impl() { - if (empty(this->version_)) { - this->version_ = current_storage_version; - } this->init_valid_array_names(); static const int32_t tile_size{ diff --git a/src/include/test/unit_api_ivf_pq_index.cc b/src/include/test/unit_api_ivf_pq_index.cc index bc2cda8df..9936248d1 100644 --- a/src/include/test/unit_api_ivf_pq_index.cc +++ b/src/include/test/unit_api_ivf_pq_index.cc @@ -658,7 +658,7 @@ TEST_CASE("storage_version", "[api_ivf_pq_index]") { // Throw with the wrong version. CHECK_THROWS_WITH( index.write_index(ctx, index_uri, std::nullopt, "0.4"), - "Version mismatch. Requested 0.4 but found 0.3"); + "Invalid storage version: 0.4"); // Succeed without a version. index.write_index(ctx, index_uri); // Succeed with the same version. diff --git a/src/include/test/unit_api_vamana_index.cc b/src/include/test/unit_api_vamana_index.cc index 96c84e281..cfb52ce01 100644 --- a/src/include/test/unit_api_vamana_index.cc +++ b/src/include/test/unit_api_vamana_index.cc @@ -476,7 +476,7 @@ TEST_CASE("storage_version", "[api_vamana_index]") { // Throw with the wrong version. CHECK_THROWS_WITH( index.write_index(ctx, index_uri, std::nullopt, "0.4"), - "Version mismatch. Requested 0.4 but found 0.3"); + "Invalid storage version: 0.4"); // Succeed without a version. index.write_index(ctx, index_uri); // Succeed with the same version. diff --git a/src/include/test/unit_ivf_flat_group.cc b/src/include/test/unit_ivf_flat_group.cc index 1f503cb15..699a224e4 100644 --- a/src/include/test/unit_ivf_flat_group.cc +++ b/src/include/test/unit_ivf_flat_group.cc @@ -444,7 +444,7 @@ TEST_CASE("mismatched storage version", "[ivf_flat_group]") { TemporalPolicy{TimeTravel, 0}, "different_version", 10), - "Version mismatch. Requested different_version but found 0.3"); + "Invalid storage version: different_version"); } TEST_CASE("clear history", "[ivf_flat_group]") { diff --git a/src/include/test/unit_ivf_pq_group.cc b/src/include/test/unit_ivf_pq_group.cc index 925a9026e..c029bd1d4 100644 --- a/src/include/test/unit_ivf_pq_group.cc +++ b/src/include/test/unit_ivf_pq_group.cc @@ -601,7 +601,7 @@ TEST_CASE("mismatched storage version", "[ivf_pq_group]") { TemporalPolicy{TimeTravel, 0}, "different_version", 10), - "Version mismatch. Requested different_version but found 0.3"); + "Invalid storage version: different_version"); } TEST_CASE("clear history", "[ivf_pq_group]") { diff --git a/src/include/test/unit_ivf_pq_index.cc b/src/include/test/unit_ivf_pq_index.cc index 32768e04d..b726f2bd1 100644 --- a/src/include/test/unit_ivf_pq_index.cc +++ b/src/include/test/unit_ivf_pq_index.cc @@ -65,17 +65,6 @@ struct dummy_pq_index { } }; -void debug_flat_ivf_centroids(const auto& index) { - std::cout << "\nDebug Centroids:\n" << std::endl; - for (size_t j = 0; j < index.get_flat_ivf_centroids().num_rows(); ++j) { - for (size_t i = 0; i < index.get_flat_ivf_centroids().num_cols(); ++i) { - std::cout << index.get_flat_ivf_centroids()(j, i) << " "; - } - std::cout << std::endl; - } - std::cout << std::endl; -} - TEST_CASE("construct different types", "[ivf_pq_index]") { ivf_pq_index index1{}; ivf_pq_index index2{}; @@ -212,9 +201,6 @@ TEST_CASE("debug w/ sk", "[ivf_pq_index]") { 1e-4); index.set_flat_ivf_centroids(sklearn_centroids); index.train_ivf(training_data, kmeans_init::none); - if (debug) { - debug_flat_ivf_centroids(index); - } } SECTION("two iterations") { @@ -229,10 +215,6 @@ TEST_CASE("debug w/ sk", "[ivf_pq_index]") { 1e-4); index.set_flat_ivf_centroids(sklearn_centroids); index.train_ivf(training_data, kmeans_init::none); - if (debug) { - debug_flat_ivf_centroids(index); - } - // debug_centroids(index); } SECTION("five iterations") { @@ -247,10 +229,6 @@ TEST_CASE("debug w/ sk", "[ivf_pq_index]") { 1e-4); index.set_flat_ivf_centroids(sklearn_centroids); index.train_ivf(training_data, kmeans_init::none); - if (debug) { - debug_flat_ivf_centroids(index); - } - // debug_centroids(index); } SECTION("five iterations, perturbed") { @@ -272,10 +250,6 @@ TEST_CASE("debug w/ sk", "[ivf_pq_index]") { 1e-4); index.set_flat_ivf_centroids(sklearn_centroids); index.train_ivf(training_data, kmeans_init::none); - if (debug) { - debug_flat_ivf_centroids(index); - } - // debug_centroids(index); } SECTION("five iterations") { @@ -289,16 +263,12 @@ TEST_CASE("debug w/ sk", "[ivf_pq_index]") { 5, 1e-4); index.train_ivf(training_data, kmeans_init::random); - if (debug) { - debug_flat_ivf_centroids(index); - } - // debug_centroids(index); } } TEST_CASE("ivf_index write and read", "[ivf_pq_index]") { size_t dimension = 128; - size_t nlist = 100; + size_t partitions = 100; uint32_t num_subspaces = 16; uint32_t max_iterations = 4; size_t nprobe = 10; @@ -318,7 +288,7 @@ TEST_CASE("ivf_index write and read", "[ivf_pq_index]") { std::vector ids(num_vectors(training_set)); std::iota(begin(ids), end(ids), 0); auto idx = ivf_pq_index( - nlist, num_subspaces, max_iterations); + partitions, num_subspaces, max_iterations); CHECK(idx.num_vectors() == 0); idx.train_ivf(training_set, kmeans_init::kmeanspp); idx.add(training_set, ids); @@ -406,16 +376,16 @@ TEMPLATE_TEST_CASE( hypercube4(j + 9, i) = hypercube1(j, i); } } - SECTION("nlist = 1") { + SECTION("partitions = 1") { size_t k_nn = 6; - size_t nlist = 1; + size_t partitions = 1; auto ivf_idx2 = ivf_pq_index( - /*128,*/ nlist, 2, 4, 1.e-4); // dim nlist maxiter eps nthreads + /*128,*/ partitions, 2, 4, 1.e-4); // dim partitions maxiter eps nthreads ivf_idx2.train_ivf(hypercube2); ivf_idx2.add(hypercube2, ids); auto ivf_idx4 = ivf_pq_index( - /*128,*/ nlist, 2, 4, 1.e-4); + /*128,*/ partitions, 2, 4, 1.e-4); ivf_idx4.train_ivf(hypercube4); ivf_idx4.add(hypercube4, ids); @@ -477,12 +447,12 @@ TEMPLATE_TEST_CASE( TEST_CASE("build index and infinite query in place", "[ivf_pq_index]") { tiledb::Context ctx; - // size_t nlist = GENERATE(1, 100); - size_t nlist = 20; + // size_t partitions = GENERATE(1, 100); + size_t partitions = 20; using s = siftsmall_test_init_defaults; using index = ivf_pq_index; - auto init = siftsmall_test_init(ctx, nlist, 16); + auto init = siftsmall_test_init(ctx, partitions, 16); auto&& [nprobe, k_nn, nthreads, max_iterations, convergence_tolerance] = std::tie( @@ -581,9 +551,9 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { tiledb::VFS vfs(ctx); size_t num_vectors = 0; uint64_t dimensions = 10; - size_t nlist = 1; + size_t partitions = 1; auto index = ivf_pq_index( - nlist, dimensions / 2); + partitions, dimensions / 2); auto queries = ColMajorMatrix{{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}}; @@ -599,7 +569,7 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { // We can query an empty index. { size_t k_nn = 1; - auto&& [scores, ids] = index.query(queries, k_nn, nlist); + auto&& [scores, ids] = index.query(queries, k_nn, partitions); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -625,7 +595,7 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { ctx, ivf_index_uri); CHECK(index_infinite.num_vectors() == num_vectors); size_t k_nn = 1; - auto&& [scores, ids] = index_infinite.query(queries, k_nn, nlist); + auto&& [scores, ids] = index_infinite.query(queries, k_nn, partitions); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -640,7 +610,7 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); CHECK(index_finite.num_vectors() == num_vectors); size_t k_nn = 1; - auto&& [scores, ids] = index_finite.query(queries, k_nn, nlist, 9); + auto&& [scores, ids] = index_finite.query(queries, k_nn, partitions, 9); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -656,7 +626,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { size_t num_vectors = 4; uint64_t dimensions = 4; - size_t nlist = 1; + size_t partitions = 1; uint32_t num_subspaces = 2; uint32_t max_iterations = 1; float convergence_tolerance = 0.000025f; @@ -665,7 +635,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { using feature_type = float; using id_type = uint32_t; auto index = ivf_pq_index( - nlist, + partitions, num_subspaces, max_iterations, convergence_tolerance, @@ -676,7 +646,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { (std::filesystem::temp_directory_path() / "ivf_index").string(); CHECK(index.num_vectors() == 0); - CHECK(index.nlist() == nlist); + CHECK(index.partitions() == partitions); // We can train, add, query, and then write the index. { @@ -689,7 +659,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { CHECK(index.num_vectors() == ::num_vectors(training)); size_t k_nn = 1; - size_t nprobe = nlist; + size_t nprobe = partitions; for (int i = 1; i <= 4; ++i) { auto value = static_cast(i); auto queries = @@ -722,7 +692,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { CHECK(index2->num_vectors() == 4); size_t k_nn = 1; - size_t nprobe = nlist; + size_t nprobe = partitions; for (int i = 1; i <= 4; ++i) { auto value = static_cast(i); auto queries = @@ -744,13 +714,13 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { size_t num_vectors = 500; uint64_t dimensions = 4; - size_t nlist = 4; + size_t partitions = 4; uint32_t num_subspaces = 1; uint32_t max_iterations = 1; float convergence_tolerance = 0.000025f; float reassign_ratio = 0.09f; - size_t nprobe = nlist; + size_t nprobe = partitions; size_t k_nn = 40; float k_factor = 2.f; size_t upper_bound = 350; @@ -759,7 +729,7 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { using feature_type = float; using id_type = uint32_t; auto index = ivf_pq_index( - nlist, + partitions, num_subspaces, max_iterations, convergence_tolerance, @@ -769,7 +739,7 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { auto ivf_index_uri = (std::filesystem::temp_directory_path() / "ivf_index").string(); CHECK(index.num_vectors() == 0); - CHECK(index.nlist() == nlist); + CHECK(index.partitions() == partitions); // We can train, add, query, and then write the index. std::vector ids(num_vectors); @@ -876,7 +846,7 @@ TEST_CASE("ivf_pq_index query index written twice", "[ivf_pq_index]") { auto id_type = "uint32"; auto partitioning_index_type = "uint32"; uint64_t dimensions = 3; - size_t n_list = 1; + size_t partitions = 1; uint32_t num_subspaces = 1; uint32_t max_iterations = 3; @@ -885,7 +855,7 @@ TEST_CASE("ivf_pq_index query index written twice", "[ivf_pq_index]") { auto index = ivf_pq_index< feature_type_type, id_type_type, - partitioning_index_type_type>(n_list, dimensions / 2); + partitioning_index_type_type>(partitions, dimensions / 2); auto data = ColMajorMatrixWithIds(dimensions, 0); index.train(data, data.raveled_ids()); @@ -932,7 +902,7 @@ TEST_CASE("ivf_pq_index query index written twice", "[ivf_pq_index]") { auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; - auto&& [scores, ids] = index->query(queries, 1, n_list, 5); + auto&& [scores, ids] = index->query(queries, 1, partitions, 5); CHECK(std::equal( scores.data(), scores.data() + 4, @@ -947,12 +917,12 @@ TEST_CASE("pq encoding has no error with <= 256 vectors", "[ivf_pq_index]") { using id_type = uint32_t; using partitioning_index_type = uint32_t; - size_t n_list = 16; + size_t partitions = 16; size_t dimensions = 300; uint32_t num_subspaces = dimensions; for (auto num_vectors : std::vector{255, 256, 257}) { auto index = ivf_pq_index( - n_list, num_subspaces); + partitions, num_subspaces); std::vector> vectors(num_vectors); std::vector vector_ids(num_vectors); for (int i = 0; i < num_vectors; ++i) { diff --git a/src/include/test/unit_tdb_io.cc b/src/include/test/unit_tdb_io.cc index 4b285e65d..c6dcafa36 100644 --- a/src/include/test/unit_tdb_io.cc +++ b/src/include/test/unit_tdb_io.cc @@ -275,3 +275,47 @@ TEST_CASE("create group", "[tdb_io]") { read_group.close(); } + +TEST_CASE("read vector slices", "[tdb_io]") { + tiledb::Context ctx; + std::string uri = + (std::filesystem::temp_directory_path() / "tmp_vector").string(); + + tiledb::VFS vfs(ctx); + if (vfs.is_dir(uri)) { + vfs.remove_dir(uri); + } + + size_t n = 100; + std::vector vector(n); + std::iota(begin(vector), end(vector), 0); + write_vector(ctx, vector, uri); + + // We can read the entire vector. + { + auto result = read_vector(ctx, uri); + CHECK(vector == result); + } + + // We can read slices of the vector. + { + std::vector> slices; + slices.push_back({0, 1}); // 2 elements. + slices.push_back({3, 3}); // 1 element. + slices.push_back({50, 60}); // 11 elements + size_t total_slices_size = 14; + + auto result_slice = read_vector(ctx, uri, slices, total_slices_size); + auto expected = + std::vector{0, 1, 3, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60}; + CHECK(result_slice == expected); + } + + // We don't crash with empty slices. + { + std::vector> slices; + size_t total_slices_size = 0; + auto result_slice = read_vector(ctx, uri, slices, total_slices_size); + CHECK(result_slice.empty()); + } +} diff --git a/src/include/test/unit_vamana_group.cc b/src/include/test/unit_vamana_group.cc index f3e5a976d..d5740f907 100644 --- a/src/include/test/unit_vamana_group.cc +++ b/src/include/test/unit_vamana_group.cc @@ -453,7 +453,7 @@ TEST_CASE("mismatched storage version", "[vamana_group]") { TemporalPolicy{TimeTravel, 0}, "different_version", 10), - "Version mismatch. Requested different_version but found 0.3"); + "Invalid storage version: different_version"); } TEST_CASE("clear history", "[vamana_group]") {