Skip to content

Commit

Permalink
Merge pull request #1 from sdelements/feature/enable-whoosh-faceting
Browse files Browse the repository at this point in the history
Copy of upstream PR#1366
lepsalex authored Feb 8, 2019
2 parents 802b0f6 + ec5fbe0 commit 1a1dcae
Showing 2 changed files with 38 additions and 15 deletions.
23 changes: 12 additions & 11 deletions docs/backend_support.rst
Original file line number Diff line number Diff line change
@@ -63,6 +63,7 @@ Whoosh
* Term Boosting
* Stored (non-indexed) fields
* Highlighting
* Faceting (no dates or queries)
* Requires: whoosh (2.0.0+)

Xapian
@@ -83,17 +84,17 @@ Xapian
Backend Support Matrix
======================

+----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
| Backend | SearchQuerySet Support | Auto Query Building | More Like This | Term Boost | Faceting | Stored Fields | Highlighting | Spatial |
+================+========================+=====================+================+============+==========+===============+==============+=========+
| Solr | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
| ElasticSearch | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
| Whoosh | Yes | Yes | Yes | Yes | No | Yes | Yes | No |
+----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
| Xapian | Yes | Yes | Yes | Yes | Yes | Yes | Yes (plugin) | No |
+----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+----------------+------------------------+---------------------+----------------+------------+-------------+---------------+--------------+---------+
| Backend | SearchQuerySet Support | Auto Query Building | More Like This | Term Boost | Faceting | Stored Fields | Highlighting | Spatial |
+================+========================+=====================+================+============+=============+===============+==============+=========+
| Solr | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+----------------+------------------------+---------------------+----------------+------------+-------------+---------------+--------------+---------+
| ElasticSearch | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+----------------+------------------------+---------------------+----------------+------------+-------------+---------------+--------------+---------+
| Whoosh | Yes | Yes | Yes | Yes | Yes (basic) | Yes | Yes | No |
+----------------+------------------------+---------------------+----------------+------------+-------------+---------------+--------------+---------+
| Xapian | Yes | Yes | Yes | Yes | Yes | Yes | Yes (plugin) | No |
+----------------+------------------------+---------------------+----------------+------------+-------------+---------------+--------------+---------+


Unsupported Backends & Alternatives
30 changes: 26 additions & 4 deletions haystack/backends/whoosh_backend.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
import os
import re
import shutil
import operator
import threading
import warnings

@@ -68,7 +69,7 @@
from whoosh.qparser import QueryParser, FuzzyTermPlugin
from whoosh.searching import ResultsPage
from whoosh.writing import AsyncWriter

from whoosh.sorting import FieldFacet

DATETIME_REGEX = re.compile(
"^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$"
@@ -469,7 +470,7 @@ def search(
sort_by = sort_by_list

if facets is not None:
warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
facets = [FieldFacet(facet, allow_overlap=True) for facet in facets]

if date_facets is not None:
warnings.warn(
@@ -520,7 +521,7 @@ def search(
if len(recent_narrowed_results) <= 0:
return {"results": [], "hits": 0}

if narrowed_results:
if narrowed_results is not None:
narrowed_results.filter(recent_narrowed_results)
else:
narrowed_results = recent_narrowed_results
@@ -541,6 +542,7 @@ def search(
"pagelen": page_length,
"sortedby": sort_by,
"reverse": reverse,
'groupedby': facets,
}

# Handle the case where the results have been narrowed.
@@ -714,10 +716,30 @@ def _process_results(
if result_class is None:
result_class = SearchResult

facets = {}
spelling_suggestion = None
unified_index = connections[self.connection_alias].get_unified_index()
indexed_models = unified_index.get_indexed_models()

facets = {}

if len(raw_page.results.facet_names()):
facets = {
'fields': {},
'dates': {},
'queries': {},
}
for facet_fieldname in raw_page.results.facet_names():
# split up the list and filter out None-names so we can
# sort them in python3 without getting a type error
facet_items = []
facet_none = []
for name, value in raw_page.results.groups(facet_fieldname).items():
if name is not None:
facet_items.append((name, len(value)))
else:
facet_none.append((name, len(value)))
facet_items.sort(key=operator.itemgetter(1, 0), reverse=True)
facets['fields'][facet_fieldname] = facet_items + facet_none

for doc_offset, raw_result in enumerate(raw_page):
score = raw_page.score(doc_offset) or 0

0 comments on commit 1a1dcae

Please sign in to comment.