Skip to content

Commit

Permalink
Merge pull request #1813 from UlrichB22/itemlist_perf
Browse files Browse the repository at this point in the history
ItemList performance: mv regex handling to search_meta
  • Loading branch information
UlrichB22 authored Nov 30, 2024
2 parents 7ea02f3 + ab24a26 commit bd84149
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 18 deletions.
4 changes: 2 additions & 2 deletions src/moin/items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def build_index_query(self, startswith=None, selected_groups=None, isglobalindex

return query

def get_index(self, startswith=None, selected_groups=None):
def get_index(self, startswith=None, selected_groups=None, regex=None):
"""
Get index enties for descendents of the matching items
Expand All @@ -1378,7 +1378,7 @@ def get_index(self, startswith=None, selected_groups=None):
)
if not fqname.value.startswith(NAMESPACE_ALL + "/") and fqname.value != NAMESPACE_ALL:
query = Term(NAMESPACE, fqname.namespace) & query
revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None)
revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None, regex=regex)
return self.make_flat_index(revs, isglobalindex)


Expand Down
11 changes: 3 additions & 8 deletions src/moin/macros/ItemList.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,15 @@ def macro(self, content, arguments, page_url, alternative):
err_msg = _("Item does not exist or read access blocked by ACLs: {0}").format(item)
return fail_message(err_msg, alternative)

# process subitems
children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag)
if regex:
try:
regex_re = re.compile(regex, re.IGNORECASE)
re.compile(regex, re.IGNORECASE)
except re.error as err:
err_msg = _("Error in regex {0!r}: {1}").format(regex, err)
return fail_message(err_msg, alternative)

newlist = []
for child in children:
if regex_re.search(child.fullname):
newlist.append(child)
children = newlist
children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag, regex=regex)

if not children:
return fail_message(_("No matching items were found"), alternative, severity="attention")

Expand Down
12 changes: 7 additions & 5 deletions src/moin/macros/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from moin.constants.keys import TAGS


def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""):
def get_item_names(name="", startswith="", kind="files", skiptag="", tag="", regex=None):
"""
For the specified item, return the fullname of matching descendents.
Expand Down Expand Up @@ -49,7 +49,7 @@ def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""):
item = Item.create(name)
except AccessDenied:
abort(403)
dirs, files = item.get_index(startswith)
dirs, files = item.get_index(startswith, regex=regex)
item_names = []
if not kind or kind == "files" or kind == "both":
for item in files:
Expand Down Expand Up @@ -213,8 +213,7 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F
ItemTitle : Use the title from the first header in the linked page
"""

page_list = moin_page.list(attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"})

children = []
for pagename in pagenames:

fqname = pagename.fullname
Expand Down Expand Up @@ -245,7 +244,10 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F
pagelink = moin_page.a(attrib={xlink.href: url}, children=[linkname])
item_body = moin_page.list_item_body(children=[pagelink])
item = moin_page.list_item(children=[item_body])
page_list.append(item)
children.append(item)
page_list = moin_page.list(
attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"}, children=children
)

return page_list

Expand Down
7 changes: 6 additions & 1 deletion src/moin/storage/middleware/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

import gc
import os
import re
import sys
import shutil
import time
Expand Down Expand Up @@ -889,14 +890,18 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):
item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
yield item.get_revision(doc[REVID], doc=doc)

def search_meta(self, q, idx_name=LATEST_REVS, **kw):
def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw):
"""
Search with query q, yield Revision metadata from index.
"""
with self.ix[idx_name].searcher() as searcher:
# Note: callers must consume everything we yield, so the for loop
# ends and the "with" is left to close the index files.
if regex:
regex_re = re.compile(regex, re.IGNORECASE)
for hit in searcher.search(q, **kw):
if regex and not regex_re.search(hit[NAME][0]):
continue
meta = hit.fields()
yield meta

Expand Down
4 changes: 2 additions & 2 deletions src/moin/storage/middleware/protecting.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,15 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):
if rev.allows(READ) or rev.allows(PUBREAD):
yield rev

def search_meta(self, q, idx_name=LATEST_REVS, **kw):
def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw):
"""
Yield an item's metadata, skipping any items where read permission is denied.
The intended use of this method is to return the current rev metadata for all
of the items in namespace subject to query restrictions. This is useful for reports
such as Global Index, Global Tags, Wanted Items, Orphaned Items, etc.
"""
for meta in self.indexer.search_meta(q, idx_name, **kw):
for meta in self.indexer.search_meta(q, idx_name, regex=regex, **kw):
meta[FQNAMES] = gen_fqnames(meta)
result = self.may_read_rev(meta)
if result:
Expand Down

0 comments on commit bd84149

Please sign in to comment.