Skip to content

Commit

Permalink
Support for limiting max search sounds dynamically
Browse files Browse the repository at this point in the history
  • Loading branch information
ffont committed Jan 26, 2024
1 parent c2c228b commit b8a6e98
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
4 changes: 3 additions & 1 deletion sounds/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,9 @@ def similar(request, username, sound_id):
else:
# Get similar sounds from solr
try:
results = get_search_engine().search_sounds(similar_to=sound.id)
results = get_search_engine().search_sounds(similar_to=sound.id,
similar_to_max_num_sounds=settings.NUM_SIMILAR_SOUNDS_PER_PAGE * settings.NUM_SIMILAR_SOUNDS_PAGES,
num_sounds=settings.NUM_SIMILAR_SOUNDS_PER_PAGE * settings.NUM_SIMILAR_SOUNDS_PAGES)
similarity_results = [(result['id'], result['score']) for result in results.docs]
except SearchEngineException:
# Search engine not available, return empty list
Expand Down
4 changes: 3 additions & 1 deletion utils/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', of
num_sounds=settings.SOUNDS_PER_PAGE, sort=settings.SEARCH_SOUNDS_SORT_OPTION_AUTOMATIC,
group_by_pack=False, num_sounds_per_pack_group=1, facets=None, only_sounds_with_pack=False,
only_sounds_within_ids=False, group_counts_as_one_in_facets=False,
simialr_to=None, similar_to_analyzer=None):
simialr_to=None, similar_to_max_num_sounds=settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY,
similar_to_analyzer=settings.SEARCH_ENGINE_DEFAULT_SIMILARITY_ANALYZER):
"""Search for sounds that match specific criteria and return them in a SearchResults object
Args:
Expand Down Expand Up @@ -274,6 +275,7 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', of
search. Note that when this parameter is passed, some of the other parameters will be ignored
('textual_query', 'facets', 'group_by_pack', 'num_sounds_per_pack_group', 'group_counts_as_one_in_facets').
'query_filter' should still be usable, although this remains to be throughly tested.
similar_to_max_num_sounds (int, optional): max number of sounds to return in a similarity search query.
similar_to_analyzer (str, optional): analyzer name from which to select similarity vectors for similarity search.
It defaults to settings.SEARCH_ENGINE_DEFAULT_SIMILARITY_ANALYZER, but it could be change to something else
if we want to use a different type of similarity vectors for a similarity search query.
Expand Down
20 changes: 10 additions & 10 deletions utils/search/backends/solr555pysolr.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,8 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', of
num_sounds=settings.SOUNDS_PER_PAGE, sort=settings.SEARCH_SOUNDS_SORT_OPTION_AUTOMATIC,
group_by_pack=False, num_sounds_per_pack_group=1, facets=None, only_sounds_with_pack=False,
only_sounds_within_ids=False, group_counts_as_one_in_facets=False,
similar_to=None, similar_to_analyzer=settings.SEARCH_ENGINE_DEFAULT_SIMILARITY_ANALYZER):
similar_to=None, similar_to_max_num_sounds=settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY ,
similar_to_analyzer=settings.SEARCH_ENGINE_DEFAULT_SIMILARITY_ANALYZER):

query = SolrQuery()

Expand Down Expand Up @@ -539,7 +540,7 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', of
vector = vector_raw[0:config_options['vector_size']]

if vector is not None and vector_field_name is not None:
max_similar_sounds = settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY # Max number of results for similarity search search. Filters are applied before the similarity search, so this number will usually be the total number of results (unless filters are more restrictive)
max_similar_sounds = similar_to_max_num_sounds # Max number of results for similarity search search. Filters are applied before the similarity search, so this number will usually be the total number of results (unless filters are more restrictive)
serialized_vector = ','.join([str(n) for n in vector])
query.set_query(f'{{!knn f={vector_field_name} topK={max_similar_sounds}}}[{serialized_vector}]')

Expand All @@ -557,20 +558,19 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', of
# Also if target is specified as a sound ID, remove it from the list
query_filter_modified.append(f'-_nest_parent_:{int(similar_to)}')
# Update the top_similar_sounds_as_filter so we compensate for the fact that we are removing the target sound from the results
top_similar_sounds_as_filter=top_similar_sounds_as_filter.replace(f'topK={settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY}', f'topK={settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY + 1}')
top_similar_sounds_as_filter=top_similar_sounds_as_filter.replace(f'topK={similar_to_max_num_sounds}', f'topK={similar_to_max_num_sounds + 1}')
except ValueError:
# Target is not a sound id, so we don't need to add the filter
pass

# Also add the NN query as a filter so we don't get past the first settings.SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY results when applying extra filters
# Also add the NN query as a filter so we don't get past the first similar_to_max_num_sounds results when applying extra filters
query_filter_modified += [top_similar_sounds_as_filter]

# Now add all "usual" filters
for part in query_filter.split('+'):
if part:
# Add extra query filters to the search query, but using the approptiate prefix to make sure they are applied to the root documents
modified_filter_part = f'{{!child of=\"content_type:{SOLR_DOC_CONTENT_TYPES["sound"]}\"}}' + part
query_filter_modified.append(modified_filter_part)
# Now add the usual filter, but wrap it in "child of" modifier so it filters on parent documents instead of child documents
if query_filter:
query_filter_modified.append(f'{{!child of=\"content_type:{SOLR_DOC_CONTENT_TYPES["sound"]}\"}}({query_filter})')

# Replace query_filter with the modified version
query_filter = query_filter_modified

# Set query options
Expand Down

0 comments on commit b8a6e98

Please sign in to comment.