diff --git a/include/pisa/temporary_directory.hpp b/include/pisa/temporary_directory.hpp index 941cfa942..742ae08bc 100644 --- a/include/pisa/temporary_directory.hpp +++ b/include/pisa/temporary_directory.hpp @@ -21,7 +21,7 @@ struct TemporaryDirectory { ~TemporaryDirectory(); /** Returns the path to the created directory. */ - [[nodiscard]] auto path() -> std::filesystem::path const&; + [[nodiscard]] auto path() const -> std::filesystem::path const&; private: std::filesystem::path dir_; diff --git a/src/temporary_directory.cpp b/src/temporary_directory.cpp index a9b134da6..798dce56b 100644 --- a/src/temporary_directory.cpp +++ b/src/temporary_directory.cpp @@ -37,7 +37,7 @@ TemporaryDirectory::~TemporaryDirectory() { spdlog::debug("Removed a tmp dir {}", dir_.c_str()); } -auto TemporaryDirectory::path() -> std::filesystem::path const& { +auto TemporaryDirectory::path() const -> std::filesystem::path const& { return dir_; } diff --git a/test/test_compress.cpp b/test/test_compress.cpp index 5f14e05ef..5fa188bdc 100644 --- a/test/test_compress.cpp +++ b/test/test_compress.cpp @@ -1,15 +1,45 @@ #define CATCH_CONFIG_MAIN #include "catch2/catch.hpp" +#include +#include + +#include + +#include "forward_index_builder.hpp" +#include "invert.hpp" +#include "parser.hpp" #include "pisa/compress.hpp" #include "pisa/scorer/scorer.hpp" #include "pisa/wand_data.hpp" #include "pisa_config.hpp" #include "temporary_directory.hpp" +#include "text_analyzer.hpp" +#include "token_filter.hpp" +#include "tokenizer.hpp" #include "type_safe.hpp" #include "wand_utils.hpp" +void build_index(pisa::TemporaryDirectory const& tmp) { + auto fwd_base_path = (tmp.path() / "tiny.fwd"); + auto inv_base_path = (tmp.path() / "tiny.inv"); + { + std::ifstream is(PISA_SOURCE_DIR "/test/test_data/tiny/tiny.plaintext"); + pisa::Forward_Index_Builder builder; + auto analyzer = + std::make_shared(std::make_unique()); + analyzer->emplace_token_filter(); + builder.build( + is, fwd_base_path.string(), pisa::record_parser("plaintext", is), analyzer, 10, 2 + ); + } + pisa::invert::invert_forward_index(fwd_base_path.string(), inv_base_path.string(), {}); +} + TEST_CASE("Compress index", "[index][compress]") { + pisa::TemporaryDirectory tmp; + build_index(tmp); + std::string encoding = GENERATE( "ef", "single", @@ -30,9 +60,8 @@ TEST_CASE("Compress index", "[index][compress]") { bool in_memory = GENERATE(true, false); CAPTURE(in_memory); - pisa::TemporaryDirectory tmp; pisa::compress( - PISA_SOURCE_DIR "/test/test_data/test_collection", + tmp.path() / "tiny.inv", std::nullopt, // no wand encoding, (tmp.path() / encoding).string(), @@ -44,16 +73,18 @@ TEST_CASE("Compress index", "[index][compress]") { } TEST_CASE("Compress quantized index", "[index][compress]") { - auto input = PISA_SOURCE_DIR "/test/test_data/test_collection"; + pisa::TemporaryDirectory tmp; + build_index(tmp); std::string scorer = GENERATE("bm25", "qld"); CAPTURE(scorer); auto scorer_params = ScorerParams(scorer); + auto inv_path = (tmp.path() / "tiny.inv").string(); + auto wand_path = (tmp.path() / fmt::format("tiny.wand.{}", scorer)).string(); - pisa::TemporaryDirectory tmp; pisa::create_wand_data( - (tmp.path() / "wand").string(), - input, + wand_path, + (tmp.path() / "tiny.inv").string(), pisa::FixedBlock(64), scorer_params, false, @@ -83,8 +114,8 @@ TEST_CASE("Compress quantized index", "[index][compress]") { CAPTURE(in_memory); pisa::compress( - input, - (tmp.path() / "wand").string(), + inv_path, + wand_path, encoding, (tmp.path() / encoding).string(), scorer_params, diff --git a/test/test_data/tiny/tiny.plaintext b/test/test_data/tiny/tiny.plaintext new file mode 100644 index 000000000..970f60126 --- /dev/null +++ b/test/test_data/tiny/tiny.plaintext @@ -0,0 +1,9 @@ +tiny1 Lorem ipsum odor amet, consectetuer adipiscing elit. Molestie luctus rhoncus; conubia scelerisque tellus eros ullamcorper aliquam. Malesuada dolor felis tempus ullamcorper nibh himenaeos. Cursus senectus senectus varius felis amet porta aliquam dis amet? Donec at porttitor ad dolor congue fringilla maximus tincidunt potenti. Ornare congue quis tristique, quam elit nisi. Quisque lectus mollis pharetra viverra, habitasse nunc odio arcu. + +tiny2 Molestie aptent ligula duis facilisis himenaeos; vestibulum ultricies. Odio nam id hendrerit torquent auctor. Proin penatibus felis feugiat adipiscing dictumst mollis. Habitant faucibus amet cursus et ad; imperdiet condimentum sapien. Vestibulum curabitur litora inceptos nibh ridiculus dolor. Eu auctor lobortis nibh lacus consectetur volutpat. Turpis pulvinar fermentum tellus nec non eleifend. Proin proin fringilla fusce lacinia lectus sed placerat. + +tiny3 Et leo nulla ex neque mus commodo nec rutrum. Tortor scelerisque litora gravida hendrerit taciti ultrices. Eleifend hac dis egestas ipsum ante! Nunc habitasse aliquam convallis mattis dictum nostra eros eleifend. Vivamus himenaeos adipiscing nunc tincidunt duis porttitor ipsum vitae. Maximus lacus elementum; torquent gravida est porta aliquet volutpat. Fusce semper phasellus, imperdiet convallis nec facilisi sociosqu feugiat mattis. + +tiny4 Suspendisse scelerisque himenaeos maecenas taciti dictum. Nisi felis non pharetra tristique hac. Finibus consectetur cursus molestie curabitur pharetra rutrum blandit. Ligula maximus purus pellentesque convallis ullamcorper primis tristique. Justo netus facilisis at finibus torquent. Ante id turpis semper facilisis orci suspendisse aptent fusce. Consequat venenatis mattis suscipit scelerisque maximus, duis at. + +tiny5 Cursus accumsan nec in erat dignissim. Etiam litora tristique ligula pellentesque orci. Cursus nisl leo posuere finibus vel at tortor ante. In odio montes et nisi mi efficitur quis condimentum. Etiam finibus semper scelerisque efficitur phasellus suspendisse habitant. Facilisi vehicula blandit nostra enim leo. Gravida commodo ligula metus ullamcorper fermentum parturient. Consequat nostra pretium eget quis massa efficitur fusce purus. Ipsum integer sodales fames mollis laoreet natoque.