From 4aaa615df0c1622ee43f274850f0fab49ddd8c96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B8=D0=BB=D1=8F=D0=BD=20=D0=9F=D0=B0=D0=BB=D0=B0?= =?UTF-8?q?=D1=83=D0=B7=D0=BE=D0=B2?= Date: Sat, 9 Nov 2024 13:23:28 +0200 Subject: [PATCH 1/4] xapian_wrap:CyrusMetadataCompactor: add override --- imap/xapian_wrap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index d537bb6111..8e0a4adfdc 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -452,7 +452,7 @@ class CyrusMetadataCompactor : public Xapian::Compactor std::string resolve_duplicate_metadata(const std::string &key, size_t num_tags, - const std::string tags[]) + const std::string tags[]) override { if (key.rfind("cyrusid.", 0) == 0) { uint8_t indexlevel = parse_indexlevel(tags[0]); From ac92be0d880de89b5df71642b076d558b551d581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B8=D0=BB=D1=8F=D0=BD=20=D0=9F=D0=B0=D0=BB=D0=B0?= =?UTF-8?q?=D1=83=D0=B7=D0=BE=D0=B2?= Date: Mon, 11 Nov 2024 15:06:24 +0200 Subject: [PATCH 2/4] xapian_wrap: skip move assignment by using emplace_back() --- imap/xapian_wrap.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index 8e0a4adfdc..8cf239afc1 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -1154,7 +1154,7 @@ static int add_text_part(xapian_dbw_t *dbw, const struct buf *part, enum search_ } else if (partnum == SEARCH_PART_SUBJECT) { // Keep subject text to index by language later. - dbw->subjects->push_back(buf_cstring(part)); + dbw->subjects->emplace_back(buf_cstring(part)); } #endif /* HAVE_CLD2 */ } @@ -1805,9 +1805,9 @@ static Xapian::Query *query_new_email(const xapian_db_t *db, } if (!domain_queries.empty()) { - queries.push_back(Xapian::Query(Xapian::Query::OP_OR, + queries.emplace_back(Xapian::Query::OP_OR, domain_queries.begin(), - domain_queries.end())); + domain_queries.end()); } free(utf8_domain); @@ -2560,8 +2560,7 @@ EXPORTED int xapian_filter(const char *dest, const char **sources, // Open databases and aggregate database-level metadata. while (*sources) { thispath = *sources++; - const Xapian::Database srcdb {thispath}; - srcdbs.push_back(srcdb); + srcdbs.emplace_back(thispath); } // Copy all matching documents. From 52605a9026b3a7af399d16291bc7a689cf4d1c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B8=D0=BB=D1=8F=D0=BD=20=D0=9F=D0=B0=D0=BB=D0=B0?= =?UTF-8?q?=D1=83=D0=B7=D0=BE=D0=B2?= Date: Mon, 11 Nov 2024 21:47:17 +0200 Subject: [PATCH 3/4] xapian_wrap:parse_langcode() accept a copy of std::string as parameter --- imap/xapian_wrap.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index 8cf239afc1..7dafb84835 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -245,9 +245,8 @@ static std::string format_doclangs(const std::set& doclangs) return val.str(); } -static std::string parse_langcode(const char *str) +static std::string parse_langcode(std::string lstr) { - std::string lstr(str); std::transform(lstr.begin(), lstr.end(), lstr.begin(), ::tolower); // accept syntax for two and three letter ISO 639 codes if (!(isalpha(lstr[0]) && isalpha(lstr[1]) && @@ -408,7 +407,7 @@ static std::string detect_language(const struct buf *part) else if (code == "xxx") { code = ""; } - iso_lang = parse_langcode(code.c_str()); + iso_lang = parse_langcode(code); } return iso_lang; From e83bca8977dcfd6b15dc0d7adb4337e64901b317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=B8=D0=BB=D1=8F=D0=BD=20=D0=9F=D0=B0=D0=BB=D0=B0?= =?UTF-8?q?=D1=83=D0=B7=D0=BE=D0=B2?= Date: Mon, 11 Nov 2024 20:50:34 +0200 Subject: [PATCH 4/4] xapian_wrap: convert struct xapian_snipgen to use objects instead of pointers --- imap/xapian_wrap.cpp | 79 +++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 49 deletions(-) diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index 7dafb84835..0b002c7d29 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -2304,32 +2304,28 @@ EXPORTED int xapian_query_run(const xapian_db_t *db, const xapian_query_t *qq, struct xapian_snipgen { - Xapian::Stem *default_stemmer; + Xapian::Stem default_stemmer{new CyrusSearchStemmer}; xapian_db_t *db; - Xapian::Database *memdb; - std::vector *loose_terms; - std::vector *queries; - char *cyrusid; - char doctype; - struct buf *buf; + Xapian::WritableDatabase memdb{std::string(), Xapian::DB_BACKEND_INMEMORY}; + std::vector loose_terms; + std::vector queries; + char *cyrusid = nullptr; + char doctype = 0; + struct buf *buf = buf_new(); const char *hi_start; const char *hi_end; const char *omit; - size_t max_len; + size_t max_len = (size_t) config_getint(IMAPOPT_SEARCH_SNIPPET_LENGTH); }; EXPORTED xapian_snipgen_t * xapian_snipgen_new(xapian_db_t *db, const struct search_snippet_markup *markup) { - xapian_snipgen_t *snipgen = (xapian_snipgen_t *)xzmalloc(sizeof(xapian_snipgen_t)); - snipgen->default_stemmer = new Xapian::Stem(new CyrusSearchStemmer); + xapian_snipgen *snipgen = new xapian_snipgen; snipgen->db = db; - snipgen->memdb = new Xapian::WritableDatabase(std::string(), Xapian::DB_BACKEND_INMEMORY); - snipgen->buf = buf_new(); snipgen->hi_start = markup->hi_start; snipgen->hi_end = markup->hi_end; snipgen->omit = markup->omit; - snipgen->max_len = (size_t) config_getint(IMAPOPT_SEARCH_SNIPPET_LENGTH); return snipgen; } @@ -2337,21 +2333,17 @@ xapian_snipgen_new(xapian_db_t *db, const struct search_snippet_markup *markup) EXPORTED void xapian_snipgen_free(xapian_snipgen_t *snipgen) { if (!snipgen) return; - delete snipgen->default_stemmer; - delete snipgen->loose_terms; - delete snipgen->queries; - delete snipgen->memdb; free(snipgen->cyrusid); buf_destroy(snipgen->buf); - free(snipgen); + delete snipgen; } -static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, Xapian::Stem& stemmer) +static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, const Xapian::Stem& stemmer) { Xapian::TermGenerator term_generator; Xapian::Query q; - if (snipgen->loose_terms) { + if (!snipgen->loose_terms.empty()) { /* Add loose query terms */ term_generator.set_stemmer(stemmer); #if defined(USE_XAPIAN_WORD_BREAKS) @@ -2365,16 +2357,16 @@ static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, Xapia ~Xapian::TermGenerator::FLAG_CJK_NGRAM); #endif - for(size_t i = 0; i < snipgen->loose_terms->size(); ++i) + for(const std::string& term : snipgen->loose_terms) { - term_generator.index_text(Xapian::Utf8Iterator((*snipgen->loose_terms)[i])); + term_generator.index_text(Xapian::Utf8Iterator(term)); } const Xapian::Document& doc = term_generator.get_document(); q = Xapian::Query(Xapian::Query::OP_OR, doc.termlist_begin(), doc.termlist_end()); } - if (snipgen->queries) { + if (!snipgen->queries.empty()) { /* Add phrase queries */ unsigned flags = Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_WILDCARD| @@ -2387,8 +2379,8 @@ static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, Xapia #endif Xapian::QueryParser queryparser; queryparser.set_stemmer(stemmer); - for(size_t i = 0; i < snipgen->queries->size(); ++i) { - q |= queryparser.parse_query((*snipgen->queries)[i], flags);; + for(const std::string& query: snipgen->queries) { + q |= queryparser.parse_query(query, flags); } } @@ -2403,15 +2395,9 @@ EXPORTED int xapian_snipgen_add_match(xapian_snipgen_t *snipgen, (strchr(match, '*') != NULL)); if (is_query) { - if (!snipgen->queries) { - snipgen->queries = new std::vector; - } - snipgen->queries->push_back(match); + snipgen->queries.emplace_back(match); } else { - if (!snipgen->loose_terms) { - snipgen->loose_terms = new std::vector; - } - snipgen->loose_terms->push_back(match); + snipgen->loose_terms.emplace_back(match); } return 0; @@ -2430,16 +2416,16 @@ EXPORTED int xapian_snipgen_begin_doc(xapian_snipgen_t *snipgen, return 0; } -EXPORTED int xapian_snipgen_make_snippet(xapian_snipgen_t *snipgen, - const struct buf *part, - Xapian::Stem* stemmer) +static int xapian_snipgen_make_snippet(xapian_snipgen_t *snipgen, + const struct buf *part, + const Xapian::Stem& stemmer) { int r = 0; try { - std::string text {buf_base(part), buf_len(part)}; - Xapian::Enquire enquire(*snipgen->memdb); - Xapian::Query qq = xapian_snipgen_build_query(snipgen, *stemmer); + Xapian::Query qq = xapian_snipgen_build_query(snipgen, stemmer); if (qq.empty()) return 0; + std::string text {buf_base(part), buf_len(part)}; + Xapian::Enquire enquire(snipgen->memdb); enquire.set_query(qq); unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE | @@ -2452,7 +2438,7 @@ EXPORTED int xapian_snipgen_make_snippet(xapian_snipgen_t *snipgen, const std::string snippet = enquire.get_mset(0, 0).snippet(text, snipgen->max_len - buf_len(snipgen->buf), - *stemmer, flags, + stemmer, flags, snipgen->hi_start, snipgen->hi_end, snipgen->omit); @@ -2475,7 +2461,7 @@ EXPORTED int xapian_snipgen_doc_part(xapian_snipgen_t *snipgen, const struct buf *part) { // Ignore empty queries. - if (!snipgen->loose_terms && !snipgen->queries) return 0; + if (snipgen->loose_terms.empty() && snipgen->queries.empty()) return 0; // Don't exceed allowed snippet length. if (buf_len(snipgen->buf) >= snipgen->max_len) return 0; @@ -2498,8 +2484,7 @@ EXPORTED int xapian_snipgen_doc_part(xapian_snipgen_t *snipgen, for (const std::string& iso_lang : doclangs) { if (iso_lang != "en") { try { - Xapian::Stem stemmer = get_stemmer(iso_lang); - int r = xapian_snipgen_make_snippet(snipgen, part, &stemmer); + int r = xapian_snipgen_make_snippet(snipgen, part, get_stemmer(iso_lang)); if (!r && prev_size != buf_len(snipgen->buf)) { return 0; } @@ -2523,12 +2508,8 @@ EXPORTED int xapian_snipgen_end_doc(xapian_snipgen_t *snipgen, struct buf *buf) buf_copy(buf, snipgen->buf); buf_cstring(buf); buf_reset(snipgen->buf); - - delete snipgen->loose_terms; - snipgen->loose_terms = NULL; - - delete snipgen->queries; - snipgen->queries = NULL; + snipgen->loose_terms.clear(); + snipgen->queries.clear(); free(snipgen->cyrusid); snipgen->cyrusid = NULL;