diff --git a/DEVELOPERS.md b/DEVELOPERS.md
index 7e4a6aa4d..a3f001329 100644
--- a/DEVELOPERS.md
+++ b/DEVELOPERS.md
@@ -71,6 +71,7 @@ Currently, we only use the following custom permissions:
* `tickets.can_moderate` (in `Ticket` model, used to allow sound moderation)
* `forum.can_moderate_forum` (in `Post` model, used to allow forum moderation)
* `sounds.can_describe_in_bulk` (in `BulkUploadProgress` model, used to allow bulk upload for users who don't meet the other common requirements)
+* `profile.show_beta_search_options` (in `Profile` model, used to allow using beta search features)
### URLs that include a username
@@ -131,6 +132,33 @@ creating `DeletedSound` objects in the `sounds-models.on_delete_sound` function
signal of the `Sound` model.
+### Adding new search options in the search page
+
+The available options for searching and filtering sounds in the search page ara managed using a `SearchQueryProcessor`
+object (implemented in `/utils/search/search_query_processor.py`). The `SearchQueryProcessor` class is used to parse and
+process search query information from a Django `request` object, and compute a number of useful items for displaying search
+information in templates, constructing search URLs, and preparing search options to be passed to the backend search engine.
+
+To add a new option to the search page, a new member of a specific `SearchOption` class should be added to the `SearchQueryProcessor`
+class (see `SearchQueryProcessor` definion for examples). There are a number of already existing types of `SearchOption`s
+as you can see by looking at the search options which are already implemented in `SearchQueryProcessor`. If the newly added search
+option implies doing some calcualtions for determining the `query_params` to be sent to the `search_sounds` function of the search
+engine backend, this should be done in the `SearchQueryProcessor.as_query_params` method.
+
+Adding a new search option to `SearchQueryProcessor` will make the option work with the search engine backend and with search URLs,
+but it will NOT automatically add the option to the form in the search page. This will need to be done manually by adding the
+search option in the desired place in `templates/search/search.html` (see how other search options are implemented for inspiration,
+there is a `display_search_option` templatetag which will facilitate things in most cases).
+
+All this will add the search option to the user interface and send corresponding information to the search backend. For example,
+if the new search option should apply a filter in the search backend of some `new_property`, this will be handled by the `SearchQueryProcessor`.
+However, it is expected that this `new_property` has been added to the search engine schema and indexed properly, otherwise there
+will be errors when running the queries.
+
+Please have a look at the documentation of `SearchQueryProcessor` and the various `SearchOption` classes to get a better
+understanding of how all this works.
+
+
### Search Engine Backends
The way in which Freesound communicates with a search engine to search for sounds and forum posts is abstracted through
@@ -149,7 +177,6 @@ the implementation of a search backend. You can run it like:
Please read carefully the documentation of the management command to better understand how it works and how is it
doing the testing.
-
### Freesound analysis pipeline
In February 2022 we released a refactoring of the analysis pipeline that allows us to more easily incorporate new audio
diff --git a/accounts/migrations/0041_alter_profile_options.py b/accounts/migrations/0041_alter_profile_options.py
new file mode 100644
index 000000000..7a2cda03b
--- /dev/null
+++ b/accounts/migrations/0041_alter_profile_options.py
@@ -0,0 +1,17 @@
+# Generated by Django 3.2.23 on 2024-02-23 22:08
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('accounts', '0040_auto_20230328_1205'),
+ ]
+
+ operations = [
+ migrations.AlterModelOptions(
+ name='profile',
+ options={'ordering': ('-user__date_joined',), 'permissions': (('can_beta_test', 'Show beta features to that user.'),)},
+ ),
+ ]
diff --git a/accounts/models.py b/accounts/models.py
index a5272f023..6e1b18cfd 100644
--- a/accounts/models.py
+++ b/accounts/models.py
@@ -226,7 +226,7 @@ def get_user_sounds_in_search_url(self):
return f'{reverse("sounds-search")}?f=username:"{ self.user.username }"&s=Date+added+(newest+first)&g=0'
def get_user_packs_in_search_url(self):
- return f'{reverse("sounds-search")}?f=username:"{ self.user.username }"&s=Date+added+(newest+first)&g=1&only_p=1'
+ return f'{reverse("sounds-search")}?f=username:"{ self.user.username }"&s=Date+added+(newest+first)&g=1&dp=1'
def get_latest_packs_for_profile_page(self):
latest_pack_ids = Pack.objects.select_related().filter(user=self.user, num_sounds__gt=0).exclude(is_deleted=True) \
@@ -649,6 +649,9 @@ def get_stats_for_profile_page(self):
class Meta:
ordering = ('-user__date_joined', )
+ permissions = (
+ ("can_beta_test", "Show beta features to that user."),
+ )
class GdprAcceptance(models.Model):
diff --git a/accounts/tests/test_views.py b/accounts/tests/test_views.py
index 2c6cd9e6a..612d0884a 100644
--- a/accounts/tests/test_views.py
+++ b/accounts/tests/test_views.py
@@ -262,14 +262,14 @@ def test_sounds_response(self):
reverse('pack-downloaders', kwargs={'username': user.username, "pack_id": self.pack.id}) + '?ajax=1')
self.assertEqual(resp.status_code, 200)
- @mock.patch('search.views.perform_search_engine_query')
+ @mock.patch('tags.views.perform_search_engine_query')
def test_tags_response(self, perform_search_engine_query):
perform_search_engine_query.return_value = (create_fake_perform_search_engine_query_results_tags_mode(), None)
# 200 response on tags page access
resp = self.client.get(reverse('tags'))
self.assertEqual(resp.status_code, 200)
- self.assertEqual(resp.context['tags_mode'], True)
+ self.assertEqual(resp.context['sqp'].tags_mode_active(), True)
def test_packs_response(self):
# 302 response (note that since BW, there will be a redirect to the search page in between)
diff --git a/accounts/urls.py b/accounts/urls.py
index 564f2968c..7b42fc756 100644
--- a/accounts/urls.py
+++ b/accounts/urls.py
@@ -27,7 +27,7 @@
import bookmarks.views as bookmarks
import follow.views as follow
import apiv2.views as api
-from utils.urlpatterns import redirect_inline
+from utils.url import redirect_inline
diff --git a/clustering/__init__.py b/clustering/__init__.py
index f862c5e1b..e69de29bb 100644
--- a/clustering/__init__.py
+++ b/clustering/__init__.py
@@ -1,23 +0,0 @@
-#
-# Freesound is (c) MUSIC TECHNOLOGY GROUP, UNIVERSITAT POMPEU FABRA
-#
-# Freesound is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# Freesound is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-# Authors:
-# See AUTHORS file.
-#
-
-# strings used for communicating the state of the clustering process
-CLUSTERING_RESULT_STATUS_PENDING = "pending"
-CLUSTERING_RESULT_STATUS_FAILED = "failed"
diff --git a/clustering/clustering.py b/clustering/clustering.py
index ccee9b738..c7f4891e8 100644
--- a/clustering/clustering.py
+++ b/clustering/clustering.py
@@ -33,21 +33,15 @@
import six
from time import time
-from . import clustering_settings as clust_settings
-
-# The following packages are only needed if the running process is configured to be a Celery worker.
-# We avoid importing them in appservers to avoid having to install unneeded dependencies.
-if settings.IS_CELERY_WORKER:
- import community as com
- import numpy as np
- import networkx as nx
- from networkx.readwrite import json_graph
- from networkx.algorithms.community import k_clique_communities, greedy_modularity_communities
- from sklearn import metrics
- from sklearn.feature_selection import mutual_info_classif
- from sklearn.neighbors import kneighbors_graph
-
- from .features_store import FeaturesStore
+import community as com
+import numpy as np
+import networkx as nx
+from networkx.readwrite import json_graph
+from networkx.algorithms.community import k_clique_communities, greedy_modularity_communities
+from sklearn import metrics
+from sklearn.feature_selection import mutual_info_classif
+from sklearn.neighbors import kneighbors_graph
+
logger = logging.getLogger('clustering')
@@ -65,8 +59,6 @@ class ClusteringEngine(object):
method. Moreover, a few unsued alternative methods for performing some intermediate steps are left
here for developement and research purpose.
"""
- def __init__(self):
- self.feature_store = FeaturesStore()
def _prepare_clustering_result_and_reference_features_for_evaluation(self, partition):
"""Formats the clustering classes and some reference features in order to then estimate how good is the
@@ -157,6 +149,9 @@ def _evaluation_metrics(self, partition):
"""
# we compute the evaluation metrics only if some reference features are available for evaluation
# we return None when they are not available not to break the following part of the code
+ '''
+ # NOTE: the following code is commented because the reference features are not available in the current version of the code
+ # If in the future we wan to perform further evaluation, we should re-implement some of these functions
if clust_settings.REFERENCE_FEATURES in clust_settings.AVAILABLE_FEATURES:
reference_features, clusters = self._prepare_clustering_result_and_reference_features_for_evaluation(partition)
ami = np.average(mutual_info_classif(reference_features, clusters, discrete_features=True))
@@ -165,6 +160,8 @@ def _evaluation_metrics(self, partition):
return ami, ss, ci
else:
return None, None, None
+ '''
+ return None, None, None
def _ratio_intra_community_edges(self, graph, communities):
"""Computes the ratio of the number of intra-community (cluster) edges to the total number of edges in the cluster.
@@ -212,55 +209,13 @@ def _point_centralities(self, graph, communities):
node_community_centralities = {k: old_div(v,max(d.values())) for d in communities_centralities for k, v in d.items()}
return node_community_centralities
-
- def _save_results_to_file(self, query_params, features, graph_json, sound_ids, modularity,
- num_communities, ratio_intra_community_edges, ami, ss, ci, communities):
- """Saves a json file to disk containing the clustering results information listed below.
- This is used when developing the clustering method. The results and the evaluation metrics are made accessible
- for post-analysis.
-
- Args:
- query_params (str): string representing the query parameters submited by the user to the search engine.
- features (str): name of the features used for clustering.
- graph_json: (dict) NetworkX graph representation of sounds data in node-link format that is suitable for JSON
- serialization.
- sound_ids (List[Int]): list of the sound ids.
- modularity (float): modularity of the graph partition.
- num_communities (Int): number of communities (clusters).
- ratio_intra_community_edges (List[Float]): intra-community edges ratio.
- ami (Numpy.float): Average Mutual Information score.
- ss (Numpy.float): Silhouette Coefficient score.
- ci (Numpy.float): Calinski and Harabaz Index score.
- communities (List[List[Int]]): List storing Lists containing the Sound ids that are in each community (cluster).
- """
- if clust_settings.SAVE_RESULTS_FOLDER:
- result = {
- 'query_params' : query_params,
- 'sound_ids': sound_ids,
- 'num_clusters': num_communities,
- 'graph': graph_json,
- 'features': features,
- 'modularity': modularity,
- 'ratio_intra_community_edges': ratio_intra_community_edges,
- 'average_mutual_information': ami,
- 'silouhette_coeff': ss,
- 'calinski_harabaz_score': ci,
- 'communities': communities
- }
- with open(os.path.join(
- clust_settings.SAVE_RESULTS_FOLDER,
- f'{query_params}.json'
- ), 'w') as f:
- json.dump(result, f)
-
- def create_knn_graph(self, sound_ids_list, features=clust_settings.DEFAULT_FEATURES):
+ def create_knn_graph(self, sound_ids_list, similarity_vectors_map):
"""Creates a K-Nearest Neighbors Graph representation of the given sounds.
Args:
sound_ids_list (List[str]): list of sound ids.
- features (str): name of the features to be used for nearest neighbors computation.
- Available features are listed in the clustering settings file.
+ similarity_vectors_map (Dict{int:List[float]}): dictionary with the similarity feature vectors for each sound.
Returns:
(nx.Graph): NetworkX graph representation of sounds.
@@ -272,58 +227,21 @@ def create_knn_graph(self, sound_ids_list, features=clust_settings.DEFAULT_FEATU
# neighbors for small collections, while limiting it for larger collections, which ensures low-computational complexity.
k = int(np.ceil(np.log2(len(sound_ids_list))))
- sound_features, sound_ids_out = self.feature_store.return_features(sound_ids_list)
+ features = []
+ sound_ids_out = []
+ for sound_id, feature_vector in similarity_vectors_map.items():
+ features.append(feature_vector)
+ sound_ids_out.append(sound_id)
+ sound_features = np.array(features).astype('float32')
+
A = kneighbors_graph(sound_features, k)
for idx_from, (idx_to, distance) in enumerate(zip(A.indices, A.data)):
idx_from = int(idx_from / k)
- if distance < clust_settings.MAX_NEIGHBORS_DISTANCE:
+ if distance < settings.CLUSTERING_MAX_NEIGHBORS_DISTANCE:
graph.add_edge(sound_ids_out[idx_from], sound_ids_out[idx_to])
# Remove isolated nodes
graph.remove_nodes_from(list(nx.isolates(graph)))
-
- return graph
-
- def create_common_nn_graph(self, sound_ids_list, features=clust_settings.DEFAULT_FEATURES):
- """Creates a Common Nearest Neighbors Graph representation of the given sounds.
-
- Args:
- sound_ids_list (List[str]): list of sound ids.
- features (str): name of the features to be used for nearest neighbors computation.
- Available features are listed in the clustering settings file.
-
- Returns:
- (nx.Graph): NetworkX graph representation of sounds.
- """
- # first create a knn graph
- knn_graph = self.create_knn_graph(sound_ids_list, features=features)
-
- # create the common nn graph
- graph = nx.Graph()
- graph.add_nodes_from(knn_graph.nodes)
-
- for i, node_i in enumerate(knn_graph.nodes):
- for j, node_j in enumerate(knn_graph.nodes):
- if j > i:
- num_common_neighbors = len(set(knn_graph.neighbors(node_i)).intersection(knn_graph.neighbors(node_j)))
- if num_common_neighbors > 0:
- graph.add_edge(node_i, node_j, weight=num_common_neighbors)
-
- # keep only k most weighted edges
- k = int(np.ceil(np.log2(len(graph.nodes))))
- # we iterate through the node ids and get all its corresponding edges using graph[node]
- # there seem to be no way to get node_id & edges in the for loop.
- for node in graph.nodes:
- ordered_neighbors = sorted(list(six.iteritems(graph[node])), key=lambda x: x[1]['weight'], reverse=True)
- try:
- neighbors_to_remove = [neighbor_distance[0] for neighbor_distance in ordered_neighbors[k:]]
- graph.remove_edges_from([(node, neighbor) for neighbor in neighbors_to_remove])
- except IndexError:
- pass
-
- # Remove isolated nodes
- graph.remove_nodes_from(list(nx.isolates(graph)))
-
return graph
def cluster_graph(self, graph):
@@ -349,7 +267,7 @@ def cluster_graph(self, graph):
modularity = com.modularity(partition , graph)
return partition, num_communities, communities, modularity
-
+
def cluster_graph_overlap(self, graph, k=5):
"""Applies overlapping community detection in the given graph.
@@ -371,7 +289,7 @@ def cluster_graph_overlap(self, graph, k=5):
partition = {sound_id: cluster_id for cluster_id, cluster in enumerate(communities) for sound_id in cluster}
return partition, num_communities, communities, None
-
+
def remove_lowest_quality_cluster(self, graph, partition, communities, ratio_intra_community_edges):
"""Removes the lowest quality cluster in the given graph.
@@ -404,13 +322,13 @@ def remove_lowest_quality_cluster(self, graph, partition, communities, ratio_int
partition[snd] -= 1
return graph, partition, communities, ratio_intra_community_edges
- def cluster_points(self, query_params, features, sound_ids):
+ def cluster_points(self, query_params, sound_ids, similarity_vectors_map):
"""Applies clustering on the requested sounds using the given features name.
Args:
query_params (str): string representing the query parameters submited by the user to the search engine.
- features (str): name of the features used for clustering the sounds.
sound_ids (List[int]): list containing the ids of the sound to cluster.
+ similarity_vectors_map (Dict{int:List[float]}): dictionary with the similarity feature vectors for each sound.
Returns:
Dict: contains the resulting clustering classes and the graph in node-link format suitable for JSON serialization.
@@ -420,17 +338,17 @@ def cluster_points(self, query_params, features, sound_ids):
logger.info('Request clustering of {} points: {} ... from the query "{}"'
.format(len(sound_ids), ', '.join(sound_ids[:20]), json.dumps(query_params)))
- graph = self.create_knn_graph(sound_ids, features=features)
+ graph = self.create_knn_graph(sound_ids, similarity_vectors_map=similarity_vectors_map)
if len(graph.nodes) == 0: # the graph does not contain any node
- return {'error': False, 'result': None, 'graph': None}
+ return {'clusters': None, 'graph': None}
partition, num_communities, communities, modularity = self.cluster_graph(graph)
ratio_intra_community_edges = self._ratio_intra_community_edges(graph, communities)
# Discard low quality cluster if there are more than NUM_MAX_CLUSTERS clusters
- num_exceeding_clusters = num_communities - clust_settings.NUM_MAX_CLUSTERS
+ num_exceeding_clusters = num_communities - settings.CLUSTERING_NUM_MAX_CLUSTERS
if num_exceeding_clusters > 0:
for _ in range(num_exceeding_clusters):
graph, partition, communities, ratio_intra_community_edges = self.remove_lowest_quality_cluster(
@@ -459,8 +377,4 @@ def cluster_points(self, query_params, features, sound_ids):
# Export graph as json
graph_json = json_graph.node_link_data(graph)
- # Save results to file if SAVE_RESULTS_FOLDER is configured in clustering settings
- self._save_results_to_file(query_params, features, graph_json, sound_ids, modularity,
- num_communities, ratio_intra_community_edges, ami, ss, ci, communities)
-
- return {'error': False, 'result': communities, 'graph': graph_json}
+ return {'clusters': communities, 'graph': graph_json}
diff --git a/clustering/clustering_settings.py b/clustering/clustering_settings.py
deleted file mode 100644
index 2e63cccbc..000000000
--- a/clustering/clustering_settings.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# Freesound is (c) MUSIC TECHNOLOGY GROUP, UNIVERSITAT POMPEU FABRA
-#
-# Freesound is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# Freesound is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-# Authors:
-# See AUTHORS file.
-#
-
-
-# Directory where the Gaia dataset index files are located.
-INDEX_DIR = '/freesound-data/clustering_index/'
-
-# Configuration of the features used for clustering or evaluation.
-# We define here for each features a json file index file, and possible additonal info.
-# The minimum requirement is to have one available set of features and set it as the default features
-# used for clustering (see variable bellow).
-AVAILABLE_FEATURES = {
- # AudioSet Features (feature vector of the frame of max energy)
- 'AUDIOSET_FEATURES': {
- 'DATASET_FILE': 'AS_features_max_nrg.json'
- },
- # tag-based features used as reference features (Bag of Words - LDA)
- 'TAG_DERIVED_FEATURES': None,
-}
-
-# Default features used for clustering
-DEFAULT_FEATURES = 'AUDIOSET_FEATURES'
-
-# Key of AVAILABLE_FEATURES used for evaluating the clustering results
-# Typically tag-derived features
-REFERENCE_FEATURES = None
-
-# Maximum number of results to cluster
-MAX_RESULTS_FOR_CLUSTERING = 1000
-
-# Cache settings
-# One day timeout for keeping clustering results. The cache timer is reset when the clustering is
-# requested so that popular queries that are performed once a day minimum will always stay in cache
-# and won't be recomputed.
-CLUSTERING_CACHE_TIME = 24*60*60*1
-# One minute timeout for keeping the pending state. When a clustering is being performed async in a
-# Celery worker, we consider the clustering as pending for only 1 minute. This may be useful if a
-# worker task got stuck. There should be a settings in celery to stop a worker task if it is running
-# for too long.
-CLUSTERING_PENDING_CACHE_TIME = 60*1
-
-# Folder for saving the clustering results with evaluation (dev/debug/research purpose)
-SAVE_RESULTS_FOLDER = None
-
-# Limit of distance when creating Nearest Neighbors graph
-MAX_NEIGHBORS_DISTANCE = 20
-
-# Number of sound examples extracted per cluster for cluster facet sound preview
-NUM_SOUND_EXAMPLES_PER_CLUSTER_FACET = 7
-
-# Number of most common tags extracted per cluster for clustering facet name
-NUM_TAGS_SHOWN_PER_CLUSTER_FACET = 3
-
-# Number of maximum clusters to show to the user
-NUM_MAX_CLUSTERS = 8
diff --git a/clustering/features_store.py b/clustering/features_store.py
deleted file mode 100644
index 51915ce2d..000000000
--- a/clustering/features_store.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#
-# Freesound is (c) MUSIC TECHNOLOGY GROUP, UNIVERSITAT POMPEU FABRA
-#
-# Freesound is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# Freesound is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-# Authors:
-# See AUTHORS file.
-#
-
-from __future__ import absolute_import
-
-from builtins import zip
-from builtins import str
-from builtins import object
-import json
-import logging
-import os
-
-from . import clustering_settings as clust_settings
-import numpy as np
-import redis
-from django.conf import settings
-
-logger = logging.getLogger('clustering')
-
-
-class RedisStore(object):
- def __init__(self):
- self.r = redis.StrictRedis(
- host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=settings.AUDIO_FEATURES_REDIS_STORE_ID)
-
- def set_feature(self, sound_id, feature):
- self.r.set(str(sound_id), json.dumps(feature))
-
- def get_feature(self, sound_id):
- feature = self.r.get(str(sound_id))
- if feature:
- return json.loads(feature)
-
- def set_features(self, d):
- self.r.mset({k: json.dumps(v) for k, v in d.items()})
-
- def get_features(self, sound_ids):
- return self.r.mget(sound_ids)
-
-
-class FeaturesStore(object):
- """Method for storing and retrieving audio features
- """
- def __init__(self):
- self.redis = RedisStore()
- self.__load_features()
-
- def __load_features(self):
- self.AS_features = json.load(open(os.path.join(
- clust_settings.INDEX_DIR,
- clust_settings.AVAILABLE_FEATURES[clust_settings.DEFAULT_FEATURES]['DATASET_FILE']
- ), 'r'))
- self.redis.set_features(self.AS_features)
-
- def return_features(self, sound_ids):
- features = []
- sound_ids_out = []
- output = self.redis.get_features(sound_ids)
- for sound_id, feature in zip(sound_ids, output):
- if feature:
- features.append(json.loads(feature))
- sound_ids_out.append(sound_id)
-
- return np.array(features).astype('float32'), sound_ids_out
diff --git a/clustering/interface.py b/clustering/interface.py
deleted file mode 100644
index ccbbca602..000000000
--- a/clustering/interface.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#
-# Freesound is (c) MUSIC TECHNOLOGY GROUP, UNIVERSITAT POMPEU FABRA
-#
-# Freesound is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# Freesound is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-# Authors:
-# See AUTHORS file.
-#
-from __future__ import absolute_import
-from builtins import str
-from django.conf import settings
-from django.core.cache import caches
-
-from .clustering_settings import DEFAULT_FEATURES, MAX_RESULTS_FOR_CLUSTERING
-from freesound.celery import app as celery_app
-from utils.encryption import create_hash
-from utils.search.search_sounds import perform_search_engine_query, search_prepare_parameters
-from . import CLUSTERING_RESULT_STATUS_PENDING, CLUSTERING_RESULT_STATUS_FAILED
-
-cache_clustering = caches["clustering"]
-
-
-def get_sound_ids_from_search_engine_query(query_params):
- """Performs Solr query and returns results as a list of sound ids.
-
- This method performs a single query to Solr with a very big page size argument so all results are
- returned at once. A very big page size will make the clustering take a lot of time to be performed.
- The number of results to retrieve is defined in the clustering settings file as MAX_RESULTS_FOR_CLUSTERING.
-
- Args:
- query_params (dict): contains the query parameters to replicate the user query.
-
- Returns
- List[int]: list containing the ids of the retrieved sounds.
- """
- # We set include_facets to False in order to reduce the amount of data that search engine will return.
- query_params.update({
- 'current_page': 1,
- 'num_sounds': MAX_RESULTS_FOR_CLUSTERING,
- })
- results, _ = perform_search_engine_query(query_params)
- resultids = [d.get("id") for d in results.docs]
-
- return resultids
-
-
-def cluster_sound_results(request, features=DEFAULT_FEATURES):
- """Performs clustering on the search results of the given search request with the requested features.
-
- This is the main entry to the clustering method. It will either get the clustering results from cache,
- or compute it (and store it in cache). When needed, the clustering will be performed async by a celery
- worker.
-
- Args:
- request (HttpRequest): request associated with the search query submitted by the user.
- features (str): name of the features to be used for clustering. The available features are defined in the
- clustering settings file.
-
- Returns:
- Dict: contains either the state of the clustering ('pending' or 'failed') or the resulting clustering classes
- and the graph in node-link format suitable for JSON serialization.
- """
- query_params, _, extra_vars = search_prepare_parameters(request)
- # We change filter_query to filter_query_non_facets in order to ensure that the clustering is always
- # done on the non faceted filtered results. Without that, people directly requesting a facet filtered
- # page would have a clustering performed on filtered results.
- query_params['query_filter'] = extra_vars['filter_query_non_facets']
- cache_key = 'cluster-results-{textual_query}-{query_filter}-{sort}-{group_by_pack}'\
- .format(**query_params).replace(' ', '')
- cache_key += f"-{str(query_params['query_fields'])}"
- cache_key += f'-{features}'
- cache_key_hashed = hash_cache_key(cache_key)
-
- # check if result is in cache
- result = cache_clustering.get(cache_key_hashed)
-
- if result and result not in (CLUSTERING_RESULT_STATUS_PENDING, CLUSTERING_RESULT_STATUS_FAILED):
- result.update({'finished': True, 'error': False})
- return result
-
- elif result == CLUSTERING_RESULT_STATUS_PENDING:
- return {'finished': False, 'error': False}
-
- elif result == CLUSTERING_RESULT_STATUS_FAILED:
- return {'finished': False, 'error': True}
-
- else:
- # if not in cache, query solr and perform clustering
- sound_ids = get_sound_ids_from_search_engine_query(query_params)
-
- # launch clustering with celery async task
- celery_app.send_task('cluster_sounds', kwargs={
- 'cache_key_hashed': cache_key_hashed,
- 'sound_ids': sound_ids,
- 'features': features
- }, queue='clustering')
-
- return {'finished': False, 'error': False}
-
-
-def get_ids_in_cluster(request, requested_cluster_id):
- """Get the sound ids in the requested cluster. Used for applying a filter by id when using a cluster facet.
- """
- try:
- requested_cluster_id = int(requested_cluster_id) - 1
-
- # results are cached in clustering_utilities, available features are defined in the clustering settings file.
- result = cluster_sound_results(request, features=DEFAULT_FEATURES)
- results = result['result']
-
- sounds_from_requested_cluster = results[int(requested_cluster_id)]
-
- except ValueError:
- return []
- except IndexError:
- return []
- except KeyError:
- # If the clustering is not in cache the 'result' key won't exist
- # This means that the clustering computation will be triggered asynchronously.
- # Moreover, the applied clustering filter will have no effect.
- # Somehow, we should inform the user that the clustering results were not available yet, and that
- # he should try again later to use a clustering facet.
- return []
-
- return sounds_from_requested_cluster
-
-
-def hash_cache_key(key):
- return create_hash(key, limit=32)
diff --git a/clustering/tasks.py b/clustering/tasks.py
index d9d79dc23..0558f882c 100644
--- a/clustering/tasks.py
+++ b/clustering/tasks.py
@@ -18,54 +18,27 @@
# See AUTHORS file.
#
-from __future__ import absolute_import
-
from django.conf import settings
-from django.core.cache import caches
from celery import shared_task
from celery import Task
-import logging
from .clustering import ClusteringEngine
-from .clustering_settings import CLUSTERING_CACHE_TIME, CLUSTERING_PENDING_CACHE_TIME
-from . import CLUSTERING_RESULT_STATUS_PENDING, CLUSTERING_RESULT_STATUS_FAILED
-
-logger = logging.getLogger('clustering')
-
-cache_clustering = caches["clustering"]
-
class ClusteringTask(Task):
""" Task Class used for defining the clustering engine only required in celery workers
"""
def __init__(self):
- if settings.IS_CELERY_WORKER:
- self.engine = ClusteringEngine()
+ self.engine = ClusteringEngine()
-@shared_task(name="cluster_sounds", base=ClusteringTask)
-def cluster_sounds(cache_key_hashed, sound_ids, features):
- """ Triggers the clustering of the sounds given as argument with the specified features.
-
- This is the task that is used for clustering the sounds of a search result asynchronously with Celery.
+@shared_task(name="cluster_sounds", base=ClusteringTask, queue=settings.CELERY_CLUSTERING_TASK_QUEUE_NAME)
+def cluster_sounds(cache_key, sound_ids, similarity_vectors_map=None):
+ """ Triggers the clustering of the sounds given as argument with the provided similarity vectors.
The clustering result is stored in cache using the hashed cache key built with the query parameters.
Args:
- cache_key_hashed (str): hashed key for storing/retrieving the results in cache.
+ cache_key (str): hashed key for storing/retrieving the results in cache.
sound_ids (List[int]): list containing the ids of the sound to cluster.
- features (str): name of the features used for clustering the sounds (defined in the clustering settings file).
+ similarity_vectors_map (Dict{int:List[float]}): dictionary with the similarity feature vectors for each sound.
"""
- # store pending state in cache
- cache_clustering.set(cache_key_hashed, CLUSTERING_RESULT_STATUS_PENDING, CLUSTERING_PENDING_CACHE_TIME)
-
- try:
- # perform clustering
- result = cluster_sounds.engine.cluster_points(cache_key_hashed, features, sound_ids)
-
- # store result in cache
- cache_clustering.set(cache_key_hashed, result, CLUSTERING_CACHE_TIME)
-
- except Exception as e:
- # delete pending state if exception raised during clustering
- cache_clustering.set(cache_key_hashed, CLUSTERING_RESULT_STATUS_FAILED, CLUSTERING_PENDING_CACHE_TIME)
- logger.info("Exception raised while clustering sounds", exc_info=True)
+ return cluster_sounds.engine.cluster_points(cache_key, sound_ids, similarity_vectors_map=similarity_vectors_map)
diff --git a/docker-compose.yml b/docker-compose.yml
index fe87562d6..ce59cd8bc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -80,7 +80,7 @@ services:
context: ./
dockerfile: ./docker/Dockerfile.workers_web
init: true
- command: celery -A freesound worker --concurrency=2 -l info -Q async_tasks_queue,sound_processing_queue,sound_analysis_old_queue
+ command: celery -A freesound worker --concurrency=2 -l info -Q async_tasks_queue,sound_processing_queue,sound_analysis_old_queue,clustering_queue
volumes:
- .:/code
- ./freesound-data/:/freesound-data
@@ -149,25 +149,6 @@ services:
depends_on:
- rabbitmq
- # Clustering worker (not using the previous one as this has some specific requirements)
- worker_clustering:
- profiles: ["all"]
- build:
- context: ./
- dockerfile: ./docker/Dockerfile.clustering
- init: true
- command: celery -A freesound worker -l info -Q clustering
- volumes:
- - .:/code
- - ./freesound-data/:/freesound-data
- - ./freesound-data/clustering_index:/freesound-data/clustering_index
- depends_on:
- - rabbitmq
- - redis
- environment:
- - ENV_CELERY_WORKER=1
- - FS_USER_ID
-
# Similarity http server
similarity:
profiles: ["all"]
diff --git a/docker/Dockerfile.clustering b/docker/Dockerfile.clustering
index a04588cfe..c37511187 100644
--- a/docker/Dockerfile.clustering
+++ b/docker/Dockerfile.clustering
@@ -2,20 +2,10 @@ FROM freesound:2023-07
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
- libqt4-dev \
libyaml-dev \
- swig \
libsndfile1-dev \
&& rm -rf /var/lib/apt/lists/*
-# Gaia - https://github.com/MTG/gaia
-RUN git clone https://github.com/MTG/gaia.git /tmp/gaia \
- && cd /tmp/gaia \
- && git checkout v2.4.5 \
- && ./waf configure --with-python-bindings \
- && ./waf \
- && ./waf install \
- && cd / && rm -r /tmp/gaia
RUN mkdir /code
RUN mkdir /gaia_index
diff --git a/freesound/settings.py b/freesound/settings.py
index 8e1c33cb4..712b4227a 100644
--- a/freesound/settings.py
+++ b/freesound/settings.py
@@ -648,7 +648,6 @@
SEARCH_ENGINE_NUM_SIMILAR_SOUNDS_PER_QUERY = 500
USE_SEARCH_ENGINE_SIMILARITY = False
-SEARCH_ALLOW_DISPLAY_RESULTS_IN_MAP = True
MAX_SEARCH_RESULTS_IN_MAP_DISPLAY = 10000 # This is the maximum number of sounds that will be shown when using "display results in map" mode
# -------------------------------------------------------------------------------
@@ -736,16 +735,29 @@
# Search results clustering
# NOTE: celery configuration is set after the local settings import
-# Environment variables
-# '1' indicates that a process is running as a celery worker.
-# We get it from environment variable to avoid the need of a specific settings file for celery workers.
-# We enable the imports of clustering dependencies only in celery workers.
-IS_CELERY_WORKER = os.getenv('ENV_CELERY_WORKER', None) == "1"
+MAX_RESULTS_FOR_CLUSTERING = 1000
-# Determines whether to use or not the clustering feature.
-# Set to False by default (to be overwritten in local_settings.py)
-# When activated, Enables to do js calls & html clustering facets rendering
-ENABLE_SEARCH_RESULTS_CLUSTERING = False
+# One day timeout for keeping clustering results. The cache timer is reset when the clustering is
+# requested so that popular queries that are performed once a day minimum will always stay in cache
+# and won't be recomputed.
+CLUSTERING_CACHE_TIME = 24*60*60*1
+
+# Limit of distance when creating Nearest Neighbors graph
+CLUSTERING_MAX_NEIGHBORS_DISTANCE = 20
+
+# Number of sound examples extracted per cluster for cluster facet sound preview
+NUM_SOUND_EXAMPLES_PER_CLUSTER = 7
+
+# Number of most common tags extracted per cluster for clustering facet name
+NUM_TAGS_SHOWN_PER_CLUSTER = 3
+
+# Number of maximum clusters to show to the user
+CLUSTERING_NUM_MAX_CLUSTERS = 8
+
+# Timeout for returning clustering results to the user
+CLUSTERING_TASK_TIMEOUT = 30
+
+CLUSTERING_SIMILARITY_ANALYZER = FSDSINET_ANALYZER_NAME
# -------------------------------------------------------------------------------
# Rate limiting
@@ -887,6 +899,7 @@
CELERY_RESULT_SERIALIZER = 'json'
CELERY_ASYNC_TASKS_QUEUE_NAME = 'async_tasks_queue'
CELERY_SOUND_PROCESSING_QUEUE_NAME = 'sound_processing_queue'
+CELERY_CLUSTERING_TASK_QUEUE_NAME = 'clustering_queue'
# -------------------------------------------------------------------------------
diff --git a/freesound/static/bw-frontend/src/components/asyncSection.js b/freesound/static/bw-frontend/src/components/asyncSection.js
index 76343a4dc..7cdf54a16 100644
--- a/freesound/static/bw-frontend/src/components/asyncSection.js
+++ b/freesound/static/bw-frontend/src/components/asyncSection.js
@@ -15,11 +15,13 @@ const prepareAsyncSections = (container) => {
} else {
// Unexpected errors happened while processing request: show toast
showToast('Unexpected errors occurred while loading some of the content of this page. Please try again later...')
+ element.innerHTML = '';
}
};
req.onerror = () => {
- // Unexpected errors happened while processing request: show toast
+ // Unexpected errors happened while processing request: show toast and clear async element
showToast('Unexpected errors occurred while loading some of the content of this page. Please try again later...')
+ element.innerHTML = '';
};
// Send the form
diff --git a/freesound/static/bw-frontend/src/components/select.js b/freesound/static/bw-frontend/src/components/select.js
index 550b562a7..a519bf64f 100644
--- a/freesound/static/bw-frontend/src/components/select.js
+++ b/freesound/static/bw-frontend/src/components/select.js
@@ -121,6 +121,10 @@ function makeSelect(container) {
buttonElement.className = 'select-dropdown__button select-dropdown__button--' + i;
buttonElement.setAttribute('data-value', '');
buttonElement.setAttribute('type', 'button');
+ if (el.getAttribute('disabled') !== null){
+ buttonElement.setAttribute('disabled', 'disabled');
+ buttonElement.classList.add('opacity-020');
+ }
spanElement.className = 'select-dropdown select-dropdown--' + i;
iElement.className = 'zmdi bw-icon-chevron-up bw-select__chevron';
ulElement.className = 'select-dropdown__list select-dropdown__list--' + i;
diff --git a/freesound/static/bw-frontend/src/pages/search.js b/freesound/static/bw-frontend/src/pages/search.js
index 6f73a7178..cb2cb3756 100644
--- a/freesound/static/bw-frontend/src/pages/search.js
+++ b/freesound/static/bw-frontend/src/pages/search.js
@@ -4,10 +4,10 @@ import navbar from "../components/navbar";
// Main search input box behaviour
const searchInputBrowse = document.getElementById('search-input-browse');
-const tagsModeInput = document.getElementById('tags-mode');
-const tagsMode = tagsModeInput.value == '1';
const searchInputBrowsePlaceholder = searchInputBrowse.getAttribute("placeholder");
const removeSearchInputValueBrowse = document.getElementById('remove-content-search');
+const advancedSearchOptionsDiv = document.getElementById('advanced-search-options');
+const tagsMode = location.pathname.indexOf('/browse/tags/') > -1;
const updateRemoveSearchInputButtonVisibility = (searchInputElement) => {
if (searchInputElement.value.length) {
@@ -47,7 +47,7 @@ const searchFormIsVisible = () => {
let heroRect;
if (advancedSearchOptionsIsVisible()){
// If advanced search options is expanded, use that as heroRect to check if search form is visible
- heroRect = advanced_search_options_div.getBoundingClientRect()
+ heroRect = advancedSearchOptionsDiv.getBoundingClientRect()
} else {
if (!tagsMode){
heroRect = searchInputBrowse.getBoundingClientRect()
@@ -74,84 +74,35 @@ const checkShouldShowSearchInNavbar = throttle(() => {
window.addEventListener('scroll', checkShouldShowSearchInNavbar)
-/*
- ADVANCED SEARCH STUFF
- The functions below correspond to the javascript bits for handling the advanced search options
- The JS code is old and probably doing things in wrong ways (and more complex that it should)
- This should be completely refactored, but to avoid changes in backend and for compatibility between
- BeastWhoosh and Nightingale interfaces, we leave everything as is for now (just with small updates to
- avoid using JQuery).
-*/
+// Advanced search options behaviour
-var search_form_element = document.getElementById('search_form');
-var search_page_navbar_form = document.getElementById('search-page-navbar-form');
-var advanced_search_options_div = document.getElementById('advanced-search-options');
-var advanced_search_hidden_field = document.getElementById('advanced_search_hidden');
-var toggle_advanced_search_options_element = document.getElementById('toggle_advanced_search_options');
-var filter_query_element = document.getElementById('filter_query');
-var filter_duration_min_element = document.getElementById('filter_duration_min');
-var filter_duration_max_element = document.getElementById('filter_duration_max');
-var filter_is_geotagged_element = document.getElementById('filter_is_geotagged');
-var filter_in_remix_group_element = document.getElementById('filter_in_remix_group');
-var sort_by_element = document.getElementById('sort-by');
-var group_by_pack_element = document.getElementById('group_by_pack');
-var only_sounds_with_pack_element = document.getElementById('only_sounds_with_pack');
-var use_compact_mode_element = document.getElementById('use_compact_mode');
-var use_map_mode_element = document.getElementById('use_map_mode');
-
-function update_hidden_compact_mode_element() {
- var hiddenElement = document.getElementById('use_compact_mode_hidden');
- if (use_compact_mode_element.checked) {
- hiddenElement.value = "1";
- } else {
- hiddenElement.value = "0";
- }
-}
-
-update_hidden_compact_mode_element()
-use_compact_mode_element.addEventListener('change', function() {
- update_hidden_compact_mode_element()
-})
-
-function update_hidden_map_mode_element() {
- var hiddenElement = document.getElementById('use_map_mode_hidden');
- if (use_map_mode_element.checked) {
- hiddenElement.value = "1";
- } else {
- hiddenElement.value = "0";
- }
-}
-
-update_hidden_map_mode_element()
-use_map_mode_element.addEventListener('change', function() {
- update_hidden_map_mode_element()
-})
+const toggleAdvancedSearchOptionsElement = document.getElementById('toggle_advanced_search_options');
function advancedSearchOptionsIsVisible()
{
- return advanced_search_hidden_field.value === "1";
+ return advancedSearchOptionsDiv.dataset.visible === "1";
}
function updateToggleAdvancedSearchOptionsText()
{
if (advancedSearchOptionsIsVisible()){
- toggle_advanced_search_options_element.innerHTML = 'Hide advanced search options';
+ toggleAdvancedSearchOptionsElement.innerHTML = 'Hide advanced search options';
} else {
- toggle_advanced_search_options_element.innerHTML = 'Show advanced search options';
+ toggleAdvancedSearchOptionsElement.innerHTML = 'Show advanced search options';
}
}
function showAdvancedSearchOptions()
{
- advanced_search_hidden_field.value = "1";
- advanced_search_options_div.style.display = 'block';
+ advancedSearchOptionsDiv.dataset.visible = "1";
+ advancedSearchOptionsDiv.style.display = 'block';
updateToggleAdvancedSearchOptionsText();
}
function hideAdvancedSearchOptions()
{
- advanced_search_hidden_field.value = "0";
- advanced_search_options_div.style.display = 'none';
+ advancedSearchOptionsDiv.dataset.visible = "0";
+ advancedSearchOptionsDiv.style.display = 'none';
updateToggleAdvancedSearchOptionsText();
}
@@ -163,196 +114,75 @@ function toggleAdvancedSearchOptions(){
}
}
-toggle_advanced_search_options_element.addEventListener('click', toggleAdvancedSearchOptions);
+toggleAdvancedSearchOptionsElement.addEventListener('click', toggleAdvancedSearchOptions);
-function set_hidden_grouping_value(){
-
- var hiddenElement = document.getElementById('group_by_pack_hidden');
- if (group_by_pack_element.checked) {
- hiddenElement.value = "1";
- } else {
- hiddenElement.value = "";
- }
-}
+// Track changes in advanced search options
-function set_hidden_only_sounds_with_pack_value(){
- var element = document.getElementById('only_sounds_with_pack');
- var hiddenElement = document.getElementById('only_sounds_with_pack_hidden');
- if (element.checked) {
- hiddenElement.value = "1";
- } else {
- hiddenElement.value = "";
- }
-}
-
-// Return the value of a filter given its name
-// If filter has a range, optional "range" parameter must be set to "min or "max"
-function getFilterValue(name, range)
-{
- if (!range) { range = "min"}
-
- var filter_query_element = document.getElementById('filter_query');
- var value = filter_query_element.value;
- var position_value = value.search(name) + (name + ":").length
- if (value.search((name + ":")) !== -1)
- {
- if (value[position_value] === "[") // Is range (with spaces)
- {
- var aux_value = value.substring(position_value + 1)
- var position_end = position_value + aux_value.search("]") + 2
-
- var range_string = value.substring(position_value + 1, position_end -1) // Without [ ]
- var parts = range_string.split(" ")
- if (range === "min"){
- return parts[0]
- } else if (range === "max") {
- return parts[2]
- }
- }
- else if (value[position_value] === "\"") // Is string (with spaces)
- {
- aux_value = value.substring(position_value + 1)
- position_end = position_value + aux_value.search("\"") + 2
- return value.substring(position_value, position_end)
+let initialAdvancedSearchInputValues = undefined; // NOTE: this is filled out in onDocumentReady function
+const serializeAdvanceSearchOptionsInputsData = () => {
+ const values = [];
+ advancedSearchOptionsDiv.getElementsByTagName("input").forEach(inputElement => {
+ if (inputElement.type == "hidden"){
+ // Don't include hidden elements as only the visible items are necessary
+ } else if (inputElement.type == "checkbox"){
+ values.push(inputElement.checked);
+ } else {
+ values.push(inputElement.value);
}
- else // Is number or normal text (without spaces)
- {
- aux_value = value.substring(position_value + 1)
- if (aux_value.search(" ") !== -1){
- position_end = position_value + aux_value.search(" ") + 1
- } else {
- position_end = value.length
- }
- return value.substring(position_value, position_end)
- }
- } else {
- return ""
- }
+ });
+ return values.join(",");
}
-// Remove a filter given the full tag ex: type:aiff, pack:"pack name"
-function removeFilter(tag)
-{
- var filter_query_element = document.getElementById('filter_query');
- var value = filter_query_element.value;
- var cleaned = value.replace(tag + " ", "").replace(tag, "").trim();
- filter_query_element.value = cleaned;
+const advancedSearchOptionsHaveChangedSinceLastQuery = () => {
+ const currentAdvancedSearchInputValues = serializeAdvanceSearchOptionsInputsData();
+ return initialAdvancedSearchInputValues != currentAdvancedSearchInputValues;
}
-function onDocumentReady(){
- // Fill advanced search fields that were passed through the f parameter
- // Duration
-
- if (getFilterValue("duration","min") === ""){
- filter_duration_min_element.value = "0";
- } else {
- filter_duration_min_element.value = getFilterValue("duration","min");
- }
-
- if (getFilterValue("duration","max") === ""){
- filter_duration_max_element.value = "*";
- } else {
- filter_duration_max_element.value = getFilterValue("duration","max");
- }
-
- // Geotagged
- if (getFilterValue("is_geotagged") === "1"){
- filter_is_geotagged_element.checked = true;
- }
-
- // Remix filter
- if (getFilterValue("in_remix_group") === "1"){
- // NOTE we only check "is_remix" and don't check "was_remixed" because these will go together
- filter_in_remix_group_element.checked = true;
- }
-
- // Update the text of the button to toggle advanced search options panel
- updateToggleAdvancedSearchOptionsText();
-
- // Store values of advanced search filters so later we can check if they were modified
- initialAdvancedSearchInputValues = serializeAdvanceSearchOptionsInputsData();
+const onAdvancedSearchOptionsInputsChange = () => {
+ document.getElementById('avanced-search-apply-button').disabled = !advancedSearchOptionsHaveChangedSinceLastQuery();
}
-document.addEventListener('DOMContentLoaded', onDocumentReady);
-
-function addAdvancedSearchOptionsFilters()
-{
- // Remove previously existing advanced options filters (will be replaced by current ones)
- var existing_duration_filter = "duration:[" + getFilterValue("duration","min") + " TO " + getFilterValue("duration","max") + "]";
- removeFilter(existing_duration_filter);
- removeFilter("is_geotagged:1");
- removeFilter("in_remix_group:1");
-
- // if advanced options is activated add all updated filters
- if (advanced_search_hidden_field.value === "1")
- {
- // Create and add new filter with all the advanced options
- var filter = "";
-
- // Duration filter
- var duration_min = parseFloat(filter_duration_min_element.value);
- var duration_max = parseFloat(filter_duration_max_element.value);
-
- if ((duration_min >= 0.0) || (duration_max >= 0.0)) {
- var duration_filter = "";
- if ((duration_min >= 0.0) && (duration_max >= 0.0)) { // Both min and max have been set
- if (duration_max < duration_min) {
- // interchange values if duration_min > duration_max
- var duration_aux = duration_min;
- duration_min = duration_max;
- duration_max = duration_aux;
- }
- duration_filter = "duration:[" + duration_min + " TO " + duration_max + "]";
- } else if (duration_min >= 0.0) { // Only minimum has been set
- duration_filter = "duration:[" + duration_min + " TO *]";
- } else if (duration_max >= 0.0) { // Only maximum has been set
- duration_filter = "duration:[* TO " + duration_max + "]";
- }
- filter = filter + duration_filter;
- }
+advancedSearchOptionsDiv.getElementsByTagName("input").forEach(inputElement => {
+ inputElement.addEventListener('change', evt => {
+ onAdvancedSearchOptionsInputsChange();
+ });
+ inputElement.addEventListener('input', evt => {
+ onAdvancedSearchOptionsInputsChange();
+ });
+});
- // Is geotagged filter
- if (filter_is_geotagged_element.checked){
- if (filter !== ""){
- filter = filter + " ";
- }
- filter = filter + "is_geotagged:1";
- }
+// Other sutff: form submission, navbar search form, hidden checkboxes etc.
- // Is remix filter
- if (filter_in_remix_group_element.checked){
- if (filter !== ""){
- filter = filter + " ";
- }
- filter = filter + "in_remix_group:1";
- }
+var searchFormElement = document.getElementById('search_form');
- // Update general filter with the advanced options filter
- var value = filter_query_element.value;
- if (value !== ""){
- filter_query_element.value = value + " " + filter;
- } else {
- filter_query_element.value = filter;
- }
- }
+searchFormElement.getElementsByClassName('bw-checkbox').forEach(checkbox => {
+ const hiddenCheckbox = document.createElement('input');
+ hiddenCheckbox.type = 'hidden';
+ hiddenCheckbox.name = checkbox.name;
+ checkbox.name = ''; // remove name attribute so checkbox is not submitted (the hidden input will be submitted instead)
+ hiddenCheckbox.value = checkbox.checked ? '1' : '0';
+ checkbox.addEventListener('change', evt => { // Update hidden checkbox value when checkbox is changed
+ hiddenCheckbox.value = checkbox.checked ? '1' : '0';
+ });
+ checkbox.parentNode.appendChild(hiddenCheckbox);
+});
+
+// Make the search select element submit the form when changed
+var sortByElement = document.getElementById('id_sort_by');
+if (sortByElement !== null){
+ sortByElement.addEventListener('change', function() {
+ searchFormElement.submit();
+ })
}
-search_form_element.addEventListener('submit', function() {
- addAdvancedSearchOptionsFilters();
-})
-
-sort_by_element.addEventListener('change', function() {
- addAdvancedSearchOptionsFilters();
- search_form_element.submit();
-})
-
-group_by_pack_element.addEventListener('change', function() {
- set_hidden_grouping_value();
-})
-
-only_sounds_with_pack_element.addEventListener('change', function() {
- set_hidden_only_sounds_with_pack_value();
+// Make radio cluster elements submit the form when changed
+document.getElementsByName('cid').forEach(radio => {
+ radio.addEventListener('change', (evt) => {
+ setTimeout(() => {
+ searchFormElement.submit();
+ }, 100); // Give it a little time to update the radio widget before submitting
+ });
})
document.body.addEventListener('keydown', evt => {
@@ -360,60 +190,31 @@ document.body.addEventListener('keydown', evt => {
if(evt.keyCode === ENTER_KEY){
// If ENTER key is pressed and search form is visible, trigger form submission
if (searchFormIsVisible()){
- addAdvancedSearchOptionsFilters();
- search_form_element.submit();
+ searchFormElement.submit();
}
}
})
-if (search_page_navbar_form !== null){
- search_page_navbar_form.addEventListener('submit', function(evt){
+var searchPageNavbarForm = document.getElementById('search-page-navbar-form');
+if (searchPageNavbarForm !== null){
+ searchPageNavbarForm.addEventListener('submit', function(evt){
// Prevent default form submission
if (evt.preventDefault) evt.preventDefault();
// Copy input element contents to the main input element and do submission of the main form instead of the navbar one
const searchInputBrowseNavbar = document.getElementById('search-input-browse-navbar');
searchInputBrowse.value = searchInputBrowseNavbar.value;
- addAdvancedSearchOptionsFilters();
- search_form_element.submit();
+ searchFormElement.submit();
// It is also needed to return false to prevent default form submission
return false;
})
}
-// Enable/disable "apply adbanced search filters" when filters are modified
-
-const serializeAdvanceSearchOptionsInputsData = () => {
- const values = [];
- advanced_search_options_div.getElementsByTagName("input").forEach(inputElement => {
- if (inputElement.type == "hidden"){
- // Don't include hidden elements as only the visible items are necessary
- } else if (inputElement.type == "checkbox"){
- values.push(inputElement.checked);
- } else {
- values.push(inputElement.value);
- }
- });
- return values.join(",");
-}
-
-let initialAdvancedSearchInputValues = undefined; // NOTE: this is filled out in onDocumentReady function
-
-const advancedSearchOptionsHaveChangedSinceLastQuery = () => {
- const currentAdvancedSearchInputValues = serializeAdvanceSearchOptionsInputsData();
- return initialAdvancedSearchInputValues != currentAdvancedSearchInputValues;
-}
-
-const onAdvancedSearchOptionsInputsChange = () => {
- document.getElementById('avanced-search-apply-button').disabled = !advancedSearchOptionsHaveChangedSinceLastQuery();
+function onDocumentReady(){
+ // Update the text of the button to toggle advanced search options panel
+ updateToggleAdvancedSearchOptionsText();
+ // Store values of advanced search filters so later we can check if they were modified
+ initialAdvancedSearchInputValues = serializeAdvanceSearchOptionsInputsData();
}
-
-advanced_search_options_div.getElementsByTagName("input").forEach(inputElement => {
- inputElement.addEventListener('change', evt => {
- onAdvancedSearchOptionsInputsChange();
- });
- inputElement.addEventListener('input', evt => {
- onAdvancedSearchOptionsInputsChange();
- });
-});
\ No newline at end of file
+document.addEventListener('DOMContentLoaded', onDocumentReady);
\ No newline at end of file
diff --git a/freesound/static/bw-frontend/styles/pages/search.scss b/freesound/static/bw-frontend/styles/pages/search.scss
index 96aafed0c..37ca81a75 100644
--- a/freesound/static/bw-frontend/styles/pages/search.scss
+++ b/freesound/static/bw-frontend/styles/pages/search.scss
@@ -56,15 +56,6 @@
}
}
-.bw-search__advanced-search-filter-section {
-
- padding-top: $small-spacing;
-
- .bw-search__filter-section-name > span {
- font-size: 18px;
- }
-}
-
.bw-search__player-small {
flex: 0 0 120px;
}
@@ -145,7 +136,7 @@
margin-left: 20px;
}
-.bw-search__filter-duration {
+.bw-search__filter-range {
color: $navy-grey;
font-size: 14px;
@@ -153,13 +144,13 @@
margin-left: 12px;
}
- .bw-search_input-duration {
+ .bw-search_input-range {
padding: 16px 13px;
border: 1px solid $navy-light-grey;
background-color: $background-input;
border-radius: 5px;
font-size: 14px;
- max-width: 85px;
+ max-width: 65px;
&::-webkit-calendar-picker-indicator {
display: none;
@@ -172,6 +163,23 @@
}
}
+.bw-search_input {
+ border: 1px solid $border-input;
+ color: $black;
+ background-color: $background-input;
+ padding: 10px 20px;
+ border-radius: 5px;
+
+ &::placeholder {
+ color: $navy-light-grey;
+ }
+
+ &:focus {
+ background-color: $white;
+ border: 1px solid $black;
+ }
+}
+
.browse__search-overview-sorter {
display: flex;
align-items: center;
diff --git a/freesound/urls.py b/freesound/urls.py
index 26ad58917..11152e57b 100644
--- a/freesound/urls.py
+++ b/freesound/urls.py
@@ -99,7 +99,7 @@
path('contact/', support.views.contact, name="contact"),
path('search/', search.views.search, name='sounds-search'),
- path('clustering_facet/', search.views.clustering_facet, name='clustering-facet'),
+ path('search/clusters_section/', search.views.clusters_section, name='clusters-section'),
path('clustered_graph/', search.views.clustered_graph, name='clustered-graph-json'),
path('query_suggestions/', search.views.query_suggestions, name='query-suggestions'),
diff --git a/geotags/tests.py b/geotags/tests.py
index 30778961a..c3eaa009b 100644
--- a/geotags/tests.py
+++ b/geotags/tests.py
@@ -94,5 +94,5 @@ def test_browse_geotags_case_insensitive(self):
def test_browse_geotags_for_query(self):
resp = self.client.get(reverse('geotags-query') + f'?q=barcelona')
- check_values = {'query_description': 'barcelona'}
+ check_values = {'query_description': '"barcelona"'}
self.check_context(resp.context, check_values)
diff --git a/geotags/views.py b/geotags/views.py
index ffe0ccfa2..208faa279 100644
--- a/geotags/views.py
+++ b/geotags/views.py
@@ -34,9 +34,9 @@
from django.views.decorators.clickjacking import xframe_options_exempt
from accounts.models import Profile
-from search.views import search_prepare_parameters
from sounds.models import Sound, Pack
from utils.logging_filters import get_client_ip
+from utils.search.search_query_processor import SearchQueryProcessor
from utils.search.search_sounds import perform_search_engine_query
from utils.username import redirect_if_old_username_or_404, raise_404_if_user_is_deleted
@@ -48,27 +48,6 @@ def log_map_load(map_type, num_geotags, request):
'map_type': map_type, 'num_geotags': num_geotags, 'ip': get_client_ip(request)}))
-def update_query_params_for_map_query(query_params, preserve_facets=False):
- # Force is_geotagged filter to be present
- if query_params['query_filter']:
- if 'is_geotagged' not in query_params['query_filter']:
- query_params['query_filter'] = query_params['query_filter'] + ' is_geotagged:1'
- else:
- query_params['query_filter'] = 'is_geotagged:1'
- # Force one single page with "all" results, and don't group by pack
- query_params.update({
- 'current_page': 1,
- 'num_sounds': settings.MAX_SEARCH_RESULTS_IN_MAP_DISPLAY,
- 'group_by_pack': False,
- 'only_sounds_with_pack': False,
- 'field_list': ['id', 'score', 'geotag']
- })
- if not preserve_facets:
- # No need to compute facets for the bytearray, but it might be needed for the main query
- if 'facets' in query_params:
- del query_params['facets']
-
-
def generate_bytearray(sound_queryset_or_list):
# sounds as bytearray
packed_sounds = io.BytesIO()
@@ -169,8 +148,11 @@ def geotags_for_query_barray(request):
results_docs = cache.get(results_cache_key)
else:
# Otherwise, perform a search query to get the results
- query_params, _, _ = search_prepare_parameters(request)
- update_query_params_for_map_query(query_params)
+ sqp = SearchQueryProcessor(request)
+ query_params = sqp.as_query_params()
+ if 'facets' in query_params:
+ # No need to compute facets for bytearray query
+ del query_params['facets']
results, _ = perform_search_engine_query(query_params)
results_docs = results.docs
@@ -283,20 +265,6 @@ def for_pack(request, username, pack_id):
def for_query(request):
tvars = _get_geotags_query_params(request)
request_parameters_string = request.get_full_path().split('?')[-1]
- q = request.GET.get('q', None)
- if q == '':
- q = None
- f = request.GET.get('f', None)
- query_description = ''
- if q is None and f is None:
- query_description = 'Empty query'
- elif q is not None and f is not None:
- query_description = f'{q} (some filters applied)'
- else:
- if q is not None:
- query_description = q
- if f is not None:
- query_description = f'Empty query with some filters applied'
tvars.update({
'tag': None,
'username': None,
@@ -305,7 +273,7 @@ def for_query(request):
'query_params': request_parameters_string,
'query_params_encoded': urllib.parse.quote(request_parameters_string),
'query_search_page_url': reverse('sounds-search') + f'?{request_parameters_string}',
- 'query_description': query_description,
+ 'query_description': SearchQueryProcessor(request).get_textual_description(),
'url': reverse('geotags-for-query-barray') + f'?{request_parameters_string}',
})
return render(request, 'geotags/geotags.html', tvars)
diff --git a/requirements.in b/requirements.in
index c1b929c73..d2b0d8080 100644
--- a/requirements.in
+++ b/requirements.in
@@ -14,7 +14,6 @@ django-extensions==3.1.5
django-modeladmin-reorder==0.3.1
django-multiupload==0.6.1
django-oauth-toolkit==2.2.0
-oauthlib
django-object-actions==4.1.0
django-ratelimit==3.0.1
django-recaptcha==3.0.0
@@ -32,24 +31,29 @@ future~=0.18.2
graypy==0.2.12
gunicorn==21.2.0
ipython==8.14.0
+jinja2==3.0.3 # This version needed for sphinx to not raise errors
+luqum==0.13.0
mapbox==0.18.1
markdown==3.4.1
-networkx==1.5
+networkx==3.2.1
numpy==1.24.3
+oauthlib
+openpyxl==3.1.0 # for reading .xlsx files (but not .xls)
Pillow==9.5.0
pip-tools==7.1.0
psycopg2-binary==2.9.6
PyJWT==2.6.0
pyparsing==2.4.7
-pysolr==3.10.0b1
pysndfile==1.4.4
+pysolr==3.10.0b1
+python-louvain==0.16 # community detection in clustering
pytz==2023.3
PyYAML==6.0.1
redis==3.2.0
+scikit-learn==1.4.1.post1 # clustering
+scipy==1.12.0 # clustering
sentry-sdk[django]~=1.31
Sphinx==1.6.3
stripe==2.28.1
xlrd==2.0.1 # for reading .xls files (but not .xlsx)
-openpyxl==3.1.0 # for reading .xlsx files (but not .xls)
zenpy==1.1.3
-jinja2==3.0.3 # This version needed for sphinx to not raise errors
diff --git a/requirements.txt b/requirements.txt
index 37b90ce6f..cfad9dab8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
-# pip-compile
+# pip-compile requirements.in
#
akismet==1.0.1
# via -r requirements.in
@@ -10,8 +10,6 @@ alabaster==0.7.13
# via sphinx
amqp==5.2.0
# via kombu
-appnope==0.1.3
- # via ipython
asgiref==3.7.2
# via django
asttokens==2.4.1
@@ -176,10 +174,14 @@ jmespath==1.0.1
# via
# boto3
# botocore
+joblib==1.3.2
+ # via scikit-learn
jwcrypto==1.5.1
# via django-oauth-toolkit
kombu==5.3.4
# via celery
+luqum==0.13.0
+ # via -r requirements.in
mapbox==0.18.1
# via -r requirements.in
markdown==3.4.1
@@ -190,12 +192,17 @@ matplotlib-inline==0.1.6
# via ipython
msgpack==1.0.7
# via cachecontrol
-networkx==1.5
- # via -r requirements.in
+networkx==3.2.1
+ # via
+ # -r requirements.in
+ # python-louvain
numpy==1.24.3
# via
# -r requirements.in
# pysndfile
+ # python-louvain
+ # scikit-learn
+ # scipy
oauthlib==3.2.2
# via
# -r requirements.in
@@ -220,6 +227,8 @@ pillow==9.5.0
# via -r requirements.in
pip-tools==7.1.0
# via -r requirements.in
+ply==3.11
+ # via luqum
polyline==2.0.1
# via mapbox
prompt-toolkit==3.0.43
@@ -259,6 +268,8 @@ python-dateutil==2.8.2
# freezegun
# mapbox
# zenpy
+python-louvain==0.16
+ # via -r requirements.in
pytz==2023.3
# via
# -r requirements.in
@@ -284,6 +295,12 @@ requests==2.31.0
# zenpy
s3transfer==0.6.2
# via boto3
+scikit-learn==1.4.1.post1
+ # via -r requirements.in
+scipy==1.12.0
+ # via
+ # -r requirements.in
+ # scikit-learn
sentry-sdk[django]==1.39.1
# via -r requirements.in
sgmllib3k==1.0.0
@@ -314,6 +331,8 @@ stack-data==0.6.3
# via ipython
stripe==2.28.1
# via -r requirements.in
+threadpoolctl==3.3.0
+ # via scikit-learn
toml==0.10.2
# via autopep8
tomli==2.0.1
diff --git a/requirements_clustering.txt b/requirements_clustering.txt
deleted file mode 100644
index 31dc09c66..000000000
--- a/requirements_clustering.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-networkx==2.2
-python-louvain==0.13
-scikit-learn==0.19.1
-scipy==0.18.1
-six
diff --git a/search/templatetags/search.py b/search/templatetags/search.py
index b847f8a7a..541851ea1 100644
--- a/search/templatetags/search.py
+++ b/search/templatetags/search.py
@@ -21,18 +21,30 @@
from django import template
from django.conf import settings
-from urllib.parse import quote_plus
from sounds.models import License
+from utils.search import search_query_processor_options
from utils.tags import annotate_tags
register = template.Library()
@register.inclusion_tag('search/facet.html', takes_context=True)
-def display_facet(context, flt, facet, facet_type, title=""):
- facet = annotate_tags([dict(name=f[0], count=f[1]) for f in facet if f[0] != "0"],
- sort="name", small_size=0.7, large_size=2.0)
+def display_facet(context, facet_name):
+ sqp = context['sqp']
+ facets = context['facets']
+ facet_type = {'tag': 'cloud', 'username': 'cloud'}.get(facet_name, 'list')
+ facet_title = {
+ 'tag': 'Related tags',
+ 'username': 'Related users',
+ 'grouping_pack': 'Packs',
+ 'license': 'Licenses'
+ }.get(facet_name, facet_name.capitalize())
+ if facet_name in facets:
+ facet = annotate_tags([dict(value=f[0], count=f[1]) for f in facets[facet_name] if f[0] != "0"],
+ sort="value", small_size=0.7, large_size=2.0)
+ else:
+ facet = []
# If the filter is grouping_pack and there are elements which do not contain the character "_" means that
# these sounds do not belong to any pack (as grouping pack values should by "packId_packName" if there is a pack
@@ -41,81 +53,80 @@ def display_facet(context, flt, facet, facet_type, title=""):
# the element name is a single number that does not contain the character "_"
# We add the extra Free Cultural Works license facet
- if flt == 'license':
+ if facet_name == 'license':
fcw_count = 0
only_fcw_in_facet = True
for element in facet:
- if element['name'].lower() == 'attribution' or element['name'].lower() == 'creative commons 0':
+ if element['value'].lower() == 'attribution' or element['value'].lower() == 'creative commons 0':
fcw_count += element['count']
else:
only_fcw_in_facet = False
if fcw_count and not only_fcw_in_facet:
facet.append({
- 'name': settings.FCW_FILTER_VALUE,
+ 'value': settings.FCW_FILTER_VALUE,
'count': fcw_count,
'size': 1.0,
})
-
- filtered_facet = []
- filter_query = quote_plus(context['filter_query'])
+
+ # Remove "no pack" elements form pack facet (no pack elements are those in which "grouping pack" only has the sound id and not any pack id/name)
+ if facet_name == "grouping_pack":
+ facet = [element for element in facet if '_' in element['value']]
+
for element in facet:
- if flt == "grouping_pack":
- if element['name'].count("_") > 0:
- # We also modify the display name to remove the id
- element['display_name'] = element['name'][element['name'].find("_")+1:]
- else:
- # If facet element belongs to "grouping pack" filter but does not have the "_" character in it, it
- # means this corresponds to the "no pack" grouping which we don't want to show as a facet element.
- continue
- elif element['name'] == settings.FCW_FILTER_VALUE:
- element['display_name'] = "Approved for Free Cultural Works"
- elif flt == 'license':
+ # Set display values (the values how they'll be shown in the UI)
+ if facet_name == "grouping_pack":
+ # Modify the display name to remove the pack id
+ element['display_value'] = element['value'][element['value'].find("_")+1:]
+ elif element['value'] == settings.FCW_FILTER_VALUE:
+ element['display_value'] = "Approved for Free Cultural Works"
+ elif facet_name == 'license':
# License field in solr is case insensitive and will return facet names in lowercase.
# We need to properly capitalize them to use official CC license names.
- element['display_name'] = element['name'].title().replace('Noncommercial', 'NonCommercial')
+ element['display_value'] = element['value'].title().replace('Noncommercial', 'NonCommercial')
else:
- element['display_name'] = element['name']
+ # In all other cases, use the value as is for display purposes
+ element['display_value'] = element['value']
- if element['name'] == settings.FCW_FILTER_VALUE:
- # If adding the FCW filter (which has more complex logic) don't wrap the filter in " as it breaks the syntax parsing
- element['params'] = f"{filter_query} {flt}:{quote_plus(element['name'])}"
+ # Set the URL to add facet values as filters
+ if element["value"].startswith('('):
+ # If the filter value is a "complex" operation , don't wrap it in quotes
+ filter_str = f'{facet_name}:{element["value"]}'
+ elif element["value"].isdigit():
+ # If the filter value is a digit, also don't wrap it in quotes
+ filter_str = f'{facet_name}:{element["value"]}'
else:
- element['params'] = f"{filter_query} {flt}:\"{quote_plus(element['name'])}\""
-
- element['id'] = f"{flt}--{quote_plus(element['name'])}"
- element['add_filter_url'] = '.?advanced={}&g={}&only_p={}&q={}&f={}&s={}&w={}'.format(
- context['advanced'],
- context['group_by_pack_in_request'],
- context['only_sounds_with_pack'],
- context['search_query'],
- element['params'],
- context['sort'] if context['sort'] is not None else '',
- context['weights'] or ''
- )
- if context['similar_to'] is not None:
- element['add_filter_url'] += '&similar_to={}'.format(context['similar_to'])
- if context['use_map_mode'] == True:
- element['add_filter_url'] += '&mm=1'
- filtered_facet.append(element)
-
- # We sort the facets by count. Also, we apply an opacity filter on "could" type pacets
- if filtered_facet:
- filtered_facet = sorted(filtered_facet, key=lambda x: x['count'], reverse=True)
- max_count = max([element['count'] for element in filtered_facet])
- for element in filtered_facet:
+ # Otherwise wrap in quotes
+ filter_str = f'{facet_name}:"{element["value"]}"'
+ element['add_filter_url'] = sqp.get_url(add_filters=[filter_str])
+
+ # We sort the facets by count. Also, we apply an opacity filter on "could" type facets
+ if facet:
+ facet = sorted(facet, key=lambda x: x['count'], reverse=True)
+ max_count = max([element['count'] for element in facet])
+ for element in facet:
element['weight'] = element['count'] / max_count
# We also add icons to license facets
- if flt == 'license':
- for element in filtered_facet:
- if element['name'] != settings.FCW_FILTER_VALUE:
- element['icon'] = License.bw_cc_icon_name_from_license_name(element['display_name'])
+ if facet_name == 'license':
+ for element in facet:
+ if element['value'] != settings.FCW_FILTER_VALUE:
+ element['icon'] = License.bw_cc_icon_name_from_license_name(element['display_value'])
else:
element['icon'] = 'fcw'
- context.update({
- "facet": filtered_facet,
- "type": facet_type,
- "filter": flt,
- "title": title
- })
- return context
+
+ return {'type': facet_type, 'title': facet_title, 'facet': facet}
+
+
+@register.inclusion_tag('search/search_option.html', takes_context=True)
+def display_search_option(context, option_name, widget=None):
+ sqp = context['sqp']
+ option = sqp.options[option_name]
+ if widget is None:
+ # If a widget is not provided as a parameter, use a sensible default
+ widget = {
+ search_query_processor_options.SearchOptionBool: 'checkbox',
+ search_query_processor_options.SearchOptionStr: 'text',
+ search_query_processor_options.SearchOptionChoice: 'select',
+ }.get(type(option), 'text')
+ label = option.label if option.label else option_name.capitalize().replace('_', ' ')
+ return {'option': option, 'option_name': option_name, 'label': label, 'widget': widget}
\ No newline at end of file
diff --git a/search/tests.py b/search/tests.py
index 388a55337..227b07523 100644
--- a/search/tests.py
+++ b/search/tests.py
@@ -18,14 +18,19 @@
# See AUTHORS file.
#
+from django.contrib.auth.models import User
from django.core.cache import cache
-from django.test import TestCase
+from django.conf import settings
+from django.test import TestCase, RequestFactory
from django.test.utils import skipIf, override_settings
from django.urls import reverse
+from utils.search import search_query_processor
from sounds.models import Sound
from utils.search import SearchResults, SearchResultsPaginator
from utils.test_helpers import create_user_and_sounds
+from utils.url import ComparableUrl
from unittest import mock
+from django.contrib.auth.models import AnonymousUser
def create_fake_search_engine_results():
@@ -90,7 +95,7 @@ def return_successful_clustering_results(sound_id_1, sound_id_2, sound_id_3, sou
'multigraph': False
},
'finished': True,
- 'result': [
+ 'clusters': [
[
sound_id_1,
sound_id_2
@@ -100,12 +105,12 @@ def return_successful_clustering_results(sound_id_1, sound_id_2, sound_id_3, sou
sound_id_4
],
],
- 'error':False
+ 'cluster_ids': [23, 24],
+ 'cluster_names': ['tag1 tag2 tag3', 'tag1 tag2 tag3'],
+ 'example_sounds_data': [['a'], ['b', 'c']],
}
-pending_clustering_results = {'finished': False, 'error': False}
-
-failed_clustering_results = {'finished': False, 'error': True}
+failed_clustering_results = None
def create_fake_perform_search_engine_query_response(num_results=15):
@@ -163,12 +168,12 @@ def test_search_page_num_queries(self, perform_search_engine_query):
# Now check number of queries when displaying results as packs (i.e., searching for packs)
cache.clear()
with self.assertNumQueries(6):
- self.client.get(reverse('sounds-search') + '?only_p=1')
+ self.client.get(reverse('sounds-search') + '?dp=1')
# Also check packs when displaying in grid mode
cache.clear()
with self.assertNumQueries(6):
- self.client.get(reverse('sounds-search') + '?only_p=1&cm=1')
+ self.client.get(reverse('sounds-search') + '?dp=1&cm=1')
with override_settings(USE_SEARCH_ENGINE_SIMILARITY=False):
# When not using search engine similarity, there'll be one less query performed as similarity state is retrieved directly from sound object
@@ -176,31 +181,12 @@ def test_search_page_num_queries(self, perform_search_engine_query):
# Now check number of queries when displaying results as packs (i.e., searching for packs)
cache.clear()
with self.assertNumQueries(5):
- self.client.get(reverse('sounds-search') + '?only_p=1')
+ self.client.get(reverse('sounds-search') + '?dp=1')
# Also check packs when displaying in grid mode
cache.clear()
with self.assertNumQueries(5):
- self.client.get(reverse('sounds-search') + '?only_p=1&cm=1')
-
- @mock.patch('search.views.perform_search_engine_query')
- def test_search_page_with_filters(self, perform_search_engine_query):
- perform_search_engine_query.return_value = self.perform_search_engine_query_response
-
- # 200 response on sound search page access
- resp = self.client.get(reverse('sounds-search'), {"f": 'grouping_pack:"Clutter" tag:"acoustic-guitar"'})
- self.assertEqual(resp.status_code, 200)
-
- # In this case we check if a non valid filter is applied it should be ignored.
- # grouping_pack it shouldn't be in filter_query_split, since is a not valid filter
- self.assertEqual(resp.context['filter_query_split'][0]['name'], 'tag:"acoustic-guitar"')
- self.assertEqual(len(resp.context['filter_query_split']), 1)
-
- resp = self.client.get(reverse('sounds-search'), {"f": 'grouping_pack:"19894_Clutter" tag:"acoustic-guitar"'})
- # Now we check if two valid filters are applied, then they are present in filter_query_split
- # Which means they are going to be displayed
- self.assertEqual(resp.status_code, 200)
- self.assertEqual(len(resp.context['filter_query_split']), 2)
+ self.client.get(reverse('sounds-search') + '?dp=1&cm=1')
class SearchResultClustering(TestCase):
@@ -217,44 +203,34 @@ def setUp(self):
self.sound_id_preview_urls = sound_id_preview_urls
self.successful_clustering_results = return_successful_clustering_results(*sound_ids)
- self.pending_clustering_results = pending_clustering_results
+ self.num_sounds_clustering_results = [2, 2]
self.failed_clustering_results = failed_clustering_results
- @skipIf(True, "Clustering not yet enabled in BW")
- @mock.patch('search.views.cluster_sound_results')
- def test_successful_search_result_clustering_view(self, cluster_sound_results):
- cluster_sound_results.return_value = self.successful_clustering_results
- resp = self.client.get(reverse('clustering-facet'))
+ @mock.patch('search.views.get_num_sounds_per_cluster')
+ @mock.patch('search.views.get_clusters_for_query')
+ def test_successful_search_result_clustering_view(self, get_clusters_for_query, get_num_sounds_per_cluster):
+ get_clusters_for_query.return_value = self.successful_clustering_results
+ get_num_sounds_per_cluster.return_value = self.num_sounds_clustering_results
+ resp = self.client.get(reverse('clusters-section'))
# 200 status code & use of clustering facets template
self.assertEqual(resp.status_code, 200)
- self.assertTemplateUsed(resp, 'search/clustering_facet.html')
+ self.assertTemplateUsed(resp, 'search/clustering_results.html')
# check cluster's content
- # 2 sounds per clusters
- # 3 most used tags in the cluster 'tag1 tag2 tag3'
- # context variable cluster_id_num_results_tags_sound_examples: [(, , , ), ...]
- self.assertEqual(resp.context['cluster_id_num_results_tags_sound_examples'], [
- (0, 2, 'tag1 tag2 tag3', self.sound_id_preview_urls[:2]),
- (1, 2, 'tag1 tag2 tag3', self.sound_id_preview_urls[2:])
+ self.assertEqual(resp.context['clusters_data'], [
+ (23, 2, 'tag1 tag2 tag3', ['a']),
+ (24, 2, 'tag1 tag2 tag3', ['b', 'c'])
])
- @skipIf(True, "Clustering not yet enabled in BW")
- @mock.patch('search.views.cluster_sound_results')
- def test_pending_search_result_clustering_view(self, cluster_sound_results):
- cluster_sound_results.return_value = self.pending_clustering_results
- resp = self.client.get(reverse('clustering-facet'))
-
- # 200 status code & JSON response content
- self.assertEqual(resp.status_code, 200)
- self.assertJSONEqual(resp.content, {'status': 'pending'})
-
- @skipIf(True, "Clustering not yet enabled in BW")
- @mock.patch('search.views.cluster_sound_results')
- def test_failed_search_result_clustering_view(self, cluster_sound_results):
- cluster_sound_results.return_value = self.failed_clustering_results
- resp = self.client.get(reverse('clustering-facet'))
+ @mock.patch('search.views.get_num_sounds_per_cluster')
+ @mock.patch('search.views.get_clusters_for_query')
+ def test_failed_search_result_clustering_view(self, get_clusters_for_query, get_num_sounds_per_cluster):
+ get_clusters_for_query.return_value = self.failed_clustering_results
+ get_num_sounds_per_cluster.return_value = self.num_sounds_clustering_results
+ resp = self.client.get(reverse('clusters-section'))
# 200 status code & JSON response content
self.assertEqual(resp.status_code, 200)
- self.assertJSONEqual(resp.content, {'status': 'failed'})
+ self.assertTemplateUsed(resp, 'search/clustering_results.html')
+ self.assertEqual(resp.context['clusters_data'], None)
diff --git a/search/views.py b/search/views.py
index 2ea1cfea6..e49896c21 100644
--- a/search/views.py
+++ b/search/views.py
@@ -21,10 +21,7 @@
import datetime
import json
import logging
-import re
-import uuid
import sentry_sdk
-from collections import defaultdict, Counter
from django.core.cache import cache
from django.conf import settings
@@ -34,146 +31,70 @@
import forum
import sounds
-import geotags
-from clustering.clustering_settings import DEFAULT_FEATURES, NUM_SOUND_EXAMPLES_PER_CLUSTER_FACET, \
- NUM_TAGS_SHOWN_PER_CLUSTER_FACET
-from clustering.interface import cluster_sound_results, get_sound_ids_from_search_engine_query
from forum.models import Post
from utils.encryption import create_hash
+from utils.clustering_utilities import get_clusters_for_query, get_num_sounds_per_cluster, \
+ cluster_data_is_fully_available, get_clustering_data_for_graph_display
from utils.logging_filters import get_client_ip
from utils.ratelimit import key_for_ratelimiting, rate_per_ip
-from utils.search.search_sounds import perform_search_engine_query, search_prepare_parameters, \
- split_filter_query, should_use_compact_mode, contains_active_advanced_search_filters
-from utils.search import get_search_engine, SearchEngineException, SearchResultsPaginator
+from utils.search import get_search_engine, SearchEngineException, SearchResultsPaginator, search_query_processor
+from utils.search.search_sounds import perform_search_engine_query, allow_beta_search_features
+
search_logger = logging.getLogger("search")
-def search_view_helper(request, tags_mode=False):
- query_params, advanced_search_params_dict, extra_vars = search_prepare_parameters(request)
+def search_view_helper(request):
+ # Process request data with the SearchQueryProcessor
+ sqp = search_query_processor.SearchQueryProcessor(request)
- # Check if there was a filter parsing error
- if extra_vars['parsing_error']:
- search_logger.info(f"Query filter parsing error. filter: {request.GET.get('f', '')}")
- extra_vars.update({'error_text': 'There was an error while searching, is your query correct?'})
- return extra_vars
+ # Check if there was a filter parsing error and return error if so
+ if sqp.errors:
+ search_logger.info(f"Errors in SearchQueryProcessor: {sqp.errors}")
+ return {'error_text': 'There was an error while searching, is your query correct?'}
- # Get the url query params for later sending it to the clustering engine (this is only used with the clustering feature)
- url_query_params_string = request.META['QUERY_STRING']
+ # Update compact mode prefernece if user has explicitely specified a different value than the preference
+ if request.user.is_authenticated:
+ option = sqp.options['grid_mode']
+ if option.set_in_request:
+ request_preference = option.value
+ user_preference = request.user.profile.use_compact_mode
+ if request_preference != user_preference:
+ request.user.profile.use_compact_mode = request_preference
+ request.user.profile.save()
- # Get a "split" version of the filter which is used to display filters in UI and for some other checks (see below)
- filter_query_split = split_filter_query(query_params['query_filter'], extra_vars['parsed_filters'], extra_vars['cluster_id'])
-
- # Get tags taht are being used in filters (this is used later to remove them from the facet and also for tags mode)
- tags_in_filter = []
- for filter_data in filter_query_split:
- if filter_data['name'].startswith('tag:'):
- tag = filter_data['name'].replace('tag:', '')
- if tag.startswith('"'):
- # If tag name has quotes, remove them
- tag = tag[1:-1]
- tags_in_filter.append(tag)
-
- # Process tags mode stuff
- initial_tagcloud = None
- if tags_mode:
- # In tags mode, we increase the size of the tags facet so we include more related tags
- query_params['facets'][settings.SEARCH_SOUNDS_FIELD_TAGS]['limit'] = 50
-
- # If no tags are in filter, we are "starting" tag-based browsing so display the initial tagcloud
- if not tags_in_filter:
- initial_tagcloud = cache.get('initial_tagcloud')
- if initial_tagcloud is None:
- # If tagcloud is not cached, make a query to retrieve it and save it to cache
- results, _ = perform_search_engine_query(dict(
- textual_query='',
- query_filter= "*:*",
- num_sounds=1,
- facets={settings.SEARCH_SOUNDS_FIELD_TAGS: {'limit': 100}},
- group_by_pack=True,
- group_counts_as_one_in_facets=False,
- ))
- initial_tagcloud = [dict(name=f[0], count=f[1], browse_url=reverse('tags', args=[f[0]])) for f in results.facets["tag"]]
- cache.set('initial_tagcloud', initial_tagcloud, 60 * 60 * 12) # cache for 12 hours
- return {
- 'tags_mode': True,
- 'tags_in_filter': tags_in_filter,
- 'initial_tagcloud': initial_tagcloud,
- }
-
- # In the tvars section we pass the original group_by_pack value to avoid it being set to false if there is a pack filter (see search_prepare_parameters)
- # This is so that we keep track of the original setting of group_by_pack before the filter was applied, and so that if the pack filter is removed, we can
- # automatically revert to the previous group_by_pack setting. Also, we compute "disable_group_by_pack_option" so that when we have changed the real
- # group_by_pack because there is a pack filter, we can grey out the option in the search form. Similar thing we do for only_sounds_with_pack as also
- # it does not make sense when filtering by pack
- group_by_pack_in_request = request.GET.get("g", "1") == "1"
- only_sounds_with_pack_in_request = request.GET.get("only_p", "0") == "1"
- disable_group_by_pack_option = 'pack:' in query_params['query_filter'] or only_sounds_with_pack_in_request
- disable_only_sounds_by_pack_option= 'pack:' in query_params['query_filter']
- only_sounds_with_pack = "1" if query_params['only_sounds_with_pack'] else ""
- if only_sounds_with_pack:
- # If displaying search results as packs, include 3 sounds per pack group in the results so we can display these sounds as selected sounds in the
- # display_pack templatetag
- query_params['num_sounds_per_pack_group'] = 3
-
- # Parpare variables for map view
- disable_display_results_in_grid_option = False
- map_bytearray_url = ''
- use_map_mode = settings.SEARCH_ALLOW_DISPLAY_RESULTS_IN_MAP and request.GET.get("mm", "0") == "1"
- map_mode_query_results_cache_key = None
+ # Parpare variables for map view (prepare some URLs for loading sounds and providing links to map)
open_in_map_url = None
- if use_map_mode:
- # Prepare some URLs for loading sounds and providing links to map
+ map_mode_query_results_cache_key = None
+ map_bytearray_url = ''
+ if sqp.map_mode:
current_query_params = request.get_full_path().split("?")[-1]
open_in_map_url = reverse('geotags-query') + f'?{current_query_params}'
map_mode_query_results_cache_key = f'map-query-results-{create_hash(current_query_params, 10)}'
map_bytearray_url = reverse('geotags-for-query-barray') + f'?key={map_mode_query_results_cache_key}'
- # Update some query parameters and options to adapt to map mode
- disable_group_by_pack_option = True
- disable_only_sounds_by_pack_option = True
- disable_display_results_in_grid_option = True
- geotags.views.update_query_params_for_map_query(query_params, preserve_facets=True)
-
- tvars = {
- 'error_text': None,
- 'filter_query': query_params['query_filter'],
- 'filter_query_split': filter_query_split,
- 'search_query': query_params['textual_query'],
- 'similar_to': query_params['similar_to'],
- 'group_by_pack_in_request': "1" if group_by_pack_in_request else "",
- 'disable_group_by_pack_option': disable_group_by_pack_option,
- 'only_sounds_with_pack': only_sounds_with_pack,
- 'only_sounds_with_pack_in_request': "1" if only_sounds_with_pack_in_request else "",
- 'disable_only_sounds_by_pack_option': disable_only_sounds_by_pack_option,
- 'use_compact_mode': should_use_compact_mode(request),
- 'disable_display_results_in_grid_option': disable_display_results_in_grid_option,
- 'advanced': extra_vars['advanced'],
- 'sort': query_params['sort'],
- 'sort_options': [(option, option) for option in settings.SEARCH_SOUNDS_SORT_OPTIONS_WEB],
- 'filter_query_link_more_when_grouping_packs': extra_vars['filter_query_link_more_when_grouping_packs'],
- 'current_page': query_params['current_page'],
- 'url_query_params_string': url_query_params_string,
- 'cluster_id': extra_vars['cluster_id'],
- 'clustering_on': settings.ENABLE_SEARCH_RESULTS_CLUSTERING,
- 'weights': extra_vars['raw_weights_parameter'],
- 'initial_tagcloud': initial_tagcloud,
- 'tags_mode': tags_mode,
- 'tags_in_filter': tags_in_filter,
- 'has_advanced_search_settings_set': contains_active_advanced_search_filters(request, query_params, extra_vars),
- 'advanced_search_closed_on_load': settings.ADVANCED_SEARCH_MENU_ALWAYS_CLOSED_ON_PAGE_LOAD,
- 'allow_map_mode': settings.SEARCH_ALLOW_DISPLAY_RESULTS_IN_MAP,
- 'use_map_mode': use_map_mode,
- 'map_bytearray_url': map_bytearray_url,
- 'open_in_map_url': open_in_map_url,
- 'max_search_results_map_mode': settings.MAX_SEARCH_RESULTS_IN_MAP_DISPLAY
- }
- tvars.update(advanced_search_params_dict)
+ # Prepare variables for clustering
+ get_clusters_url = None
+ clusters_data = None
+ if sqp.compute_clusters_active() and allow_beta_search_features(request):
+ if cluster_data_is_fully_available(sqp):
+ # If clustering data for the current query is fully available, we can get it directly
+ clusters_data = _get_clusters_data_helper(sqp)
+ else:
+ # Otherwise pass the url where the cluster data fill be fetched asyncronously from
+ get_clusters_url = reverse('clusters-section') + f'?{request.get_full_path().split("?")[-1]}'
+
+ # If in tags mode and no tags in filter, return before making the query as we'll make
+ # the initial tagcloud in tags.views.tags view and no need to make any further query here
+ if sqp.tags_mode_active() and not sqp.get_tags_in_filters():
+ return {'sqp': sqp} # sqp will be needed in tags.views.tags view
- try:
+ # Run the query and post-process the results
+ try:
+ query_params = sqp.as_query_params()
results, paginator = perform_search_engine_query(query_params)
- if not use_map_mode:
- if not only_sounds_with_pack:
+ if not sqp.map_mode_active():
+ if not sqp.display_as_packs_active():
resultids = [d.get("id") for d in results.docs]
resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
allsounds = {}
@@ -186,6 +107,11 @@ def search_view_helper(request, tags_mode=False):
docs = [doc for doc in results.docs if doc["id"] in allsounds]
for d in docs:
d["sound"] = allsounds[d["id"]]
+
+ # Add URLs to "more from this pack" in the result object so these are easily accessible in the template
+ for d in docs:
+ if d.get("n_more_in_group") and d["sound"].pack_id is not None:
+ d["more_from_this_pack_url"] = sqp.get_url(add_filters=[f'grouping_pack:"{d["sound"].pack_id}_{d["sound"].pack_name}"'])
else:
resultspackids = []
sound_ids_for_pack_id = {}
@@ -204,6 +130,7 @@ def search_view_helper(request, tags_mode=False):
docs = [d for d in results.docs if int(d.get("group_name").split('_')[0]) in allpacks]
for d in docs:
d["pack"] = allpacks[int(d.get("group_name").split('_')[0])]
+ d["more_from_this_pack_url"] = sqp.get_url(add_filters=[f'grouping_pack:"{d["pack"].id}_{d["pack"].name}"'])
else:
# In map we configure the search query to already return geotags data. Here we collect all this data
# and save it to the cache so we can collect it in the 'geotags_for_query_barray' view which prepares
@@ -221,173 +148,92 @@ def search_view_helper(request, tags_mode=False):
'username': request.user.username,
'page': query_params['current_page'],
'sort': query_params['sort'],
- 'group_by_pack': query_params['group_by_pack'],
- 'advanced': json.dumps(advanced_search_params_dict) if extra_vars['advanced'] == "1" else "",
+ 'url': sqp.get_url(),
+ 'tags_mode': sqp.tags_mode_active(),
'query_time': results.q_time
}))
# For the facets of fields that could have mulitple values (i.e. currently, only "tags" facet), make
- # sure to remove the filters for the corresponding facet field thar are already active (so we remove
+ # sure to remove the filters for the corresponding facet field that are already active (so we remove
# redundant information)
- if tags_in_filter:
- if 'tag' in results.facets:
- results.facets['tag'] = [(tag, count) for tag, count in results.facets['tag'] if tag not in tags_in_filter]
-
- tvars.update({
+ if 'tag' in results.facets:
+ results.facets['tag'] = [(tag, count) for tag, count in results.facets['tag'] if tag not in sqp.get_tags_in_filters()]
+
+ # Compile template variables
+ return {
+ 'sqp': sqp,
+ 'error_text': None,
+ 'current_page': query_params['current_page'],
+ 'has_advanced_search_settings_set': sqp.contains_active_advanced_search_options(),
+ 'advanced_search_closed_on_load': settings.ADVANCED_SEARCH_MENU_ALWAYS_CLOSED_ON_PAGE_LOAD,
+ 'map_bytearray_url': map_bytearray_url,
+ 'open_in_map_url': open_in_map_url,
+ 'max_search_results_map_mode': settings.MAX_SEARCH_RESULTS_IN_MAP_DISPLAY,
+ 'get_clusters_url': get_clusters_url,
+ 'clusters_data': clusters_data,
'paginator': paginator,
'page': paginator.page(query_params['current_page']),
'docs': docs,
'facets': results.facets,
'non_grouped_number_of_results': results.non_grouped_number_of_results,
- })
+ 'show_beta_search_options': allow_beta_search_features(request),
+ }
except SearchEngineException as e:
search_logger.info(f'Search error: query: {str(query_params)} error {e}')
sentry_sdk.capture_exception(e) # Manually capture exception so it has mroe info and Sentry can organize it properly
- tvars.update({'error_text': 'There was an error while searching, is your query correct?'})
+ return {'error_text': 'There was an error while searching, is your query correct?'}
except Exception as e:
search_logger.info(f'Could probably not connect to Solr - {e}')
sentry_sdk.capture_exception(e) # Manually capture exception so it has mroe info and Sentry can organize it properly
- tvars.update({'error_text': 'The search server could not be reached, please try again later.'})
-
- return tvars
+ return {'error_text': 'The search server could not be reached, please try again later.'}
@ratelimit(key=key_for_ratelimiting, rate=rate_per_ip, group=settings.RATELIMIT_SEARCH_GROUP, block=True)
def search(request):
- tvars = search_view_helper(request, tags_mode=False)
- template = 'search/search.html' if request.GET.get("ajax", "") != "1" else 'search/search_ajax.html'
- return render(request, template, tvars)
+ tvars = search_view_helper(request)
+ return render(request, 'search/search.html', tvars)
-def clustering_facet(request):
- """Triggers the computation of the clustering, returns the state of processing or the clustering facet.
- """
- # pass the url query params for later sending it to the clustering engine
- url_query_params_string = request.META['QUERY_STRING']
- # remove existing cluster facet filter from the params since the returned cluster facets will include
- # their correspondinng cluster_id query parameter (done in the template)
- url_query_params_string = re.sub(r"(&cluster_id=[0-9]*)", "", url_query_params_string)
-
- result = cluster_sound_results(request, features=DEFAULT_FEATURES)
-
- # check if computation is finished. If not, send computation state.
- if result['finished']:
- if result['result'] is not None:
- results = result['result']
- num_clusters = len(results)
- else:
- return JsonResponse({'status': 'failed'}, safe=False)
- elif result['error']:
- return JsonResponse({'status': 'failed'}, safe=False)
- else:
- return JsonResponse({'status': 'pending'}, safe=False)
-
- # check if facet filters are present in the search query
- # if yes, filter sounds from clusters
- query_params, _, extra_vars = search_prepare_parameters(request)
- if extra_vars['has_facet_filter']:
- sound_ids_filtered = get_sound_ids_from_search_engine_query(query_params)
- results = [[sound_id for sound_id in cluster if int(sound_id) in sound_ids_filtered]
- for cluster in results]
-
- num_sounds_per_cluster = [len(cluster) for cluster in results]
- partition = {sound_id: cluster_id for cluster_id, cluster in enumerate(results) for sound_id in cluster}
-
- # label clusters using most occuring tags
- sound_instances = sounds.models.Sound.objects.bulk_query_id(list(map(int, list(partition.keys()))))
- sound_tags = {sound.id: sound.tag_array for sound in sound_instances}
- cluster_tags = defaultdict(list)
-
- # extract tags for each clusters and do not use query terms for labeling clusters
- query_terms = {t.lower() for t in request.GET.get('q', '').split(' ')}
- for sound_id, tags in sound_tags.items():
- cluster_tags[partition[str(sound_id)]] += [t.lower() for t in tags if t.lower() not in query_terms]
-
- # count 3 most occuring tags
- # we iterate with range(len(results)) to ensure that we get the right order when iterating through the dict
- cluster_most_occuring_tags = [
- [tag for tag, _ in Counter(cluster_tags[cluster_id]).most_common(NUM_TAGS_SHOWN_PER_CLUSTER_FACET)]
- if cluster_tags[cluster_id] else []
- for cluster_id in range(len(results))
- ]
- most_occuring_tags_formatted = [
- ' '.join(sorted(most_occuring_tags))
- for most_occuring_tags in cluster_most_occuring_tags
- ]
-
- # extract sound examples for each cluster
- sound_ids_examples_per_cluster = [
- list(map(int, cluster_sound_ids[:NUM_SOUND_EXAMPLES_PER_CLUSTER_FACET]))
- for cluster_sound_ids in results
- ]
- sound_ids_examples = [item for sublist in sound_ids_examples_per_cluster for item in sublist]
- sound_urls = {
- sound.id: sound.locations()['preview']['LQ']['ogg']['url']
- for sound in sound_instances
- if sound.id in sound_ids_examples
- }
- sound_url_examples_per_cluster = [
- [(sound_id, sound_urls[sound_id]) for sound_id in cluster_sound_ids]
- for cluster_sound_ids in sound_ids_examples_per_cluster
- ]
-
- return render(request, 'search/clustering_facet.html', {
- 'results': partition,
- 'url_query_params_string': url_query_params_string,
- 'cluster_id_num_results_tags_sound_examples': list(zip(
- list(range(num_clusters)),
- num_sounds_per_cluster,
- most_occuring_tags_formatted,
- sound_url_examples_per_cluster
- )),
- })
+def _get_clusters_data_helper(sqp):
+ # Get main cluster data
+ results = get_clusters_for_query(sqp)
+ if results is None:
+ return None
+
+ # Get the number of sounds per cluster
+ # This number depends on the facet filters which are applied AFTER the main clustering.
+ # See get_num_sounds_per_cluster for more details.
+ num_sounds_per_cluster = get_num_sounds_per_cluster(sqp, results['clusters'])
+
+ # Resurn a list with information for each cluster
+ # Note that this information DOES NOT include the actual sound IDs per cluster.
+ return list(zip(
+ results.get('cluster_ids', []), # cluster ID
+ num_sounds_per_cluster, # Num sounds
+ results.get('cluster_names', []), # Cluster name
+ results.get('example_sounds_data', []) # Example sounds
+ ))
+
+
+def clusters_section(request):
+ sqp = search_query_processor.SearchQueryProcessor(request)
+ clusters_data = _get_clusters_data_helper(sqp)
+ if clusters_data is None:
+ return render(request, 'search/clustering_results.html', {'clusters_data': None})
+ return render(request, 'search/clustering_results.html', {'sqp': sqp, 'clusters_data': clusters_data})
def clustered_graph(request):
"""Returns the clustered sound graph representation of the search results.
"""
- result = cluster_sound_results(request, features=DEFAULT_FEATURES)
- graph = result['graph']
-
- # check if facet filters are present in the search query
- # if yes, filter nodes and links from the graph
- query_params, _, extra_vars = search_prepare_parameters(request)
- if extra_vars['has_facet_filter']:
- nodes = graph['nodes']
- links = graph['links']
- graph['nodes'] = []
- graph['links'] = []
- sound_ids_filtered = get_sound_ids_from_search_engine_query(query_params)
- for node in nodes:
- if int(node['id']) in sound_ids_filtered:
- graph['nodes'].append(node)
- for link in links:
- if int(link['source']) in sound_ids_filtered and int(link['target']) in sound_ids_filtered:
- graph['links'].append(link)
-
- results = sounds.models.Sound.objects.bulk_query_id([int(node['id']) for node in graph['nodes']])
-
- sound_metadata = {}
- for sound in results:
- sound_locations = sound.locations()
- sound_metadata.update(
- {sound.id: (
- sound_locations['preview']['LQ']['ogg']['url'],
- sound.original_filename,
- ' '.join(sound.tag_array),
- reverse("sound", args=(sound.username, sound.id)),
- sound_locations['display']['wave']['M']['url'],
- )}
- )
-
- for node in graph['nodes']:
- node['url'] = sound_metadata[int(node['id'])][0]
- node['name'] = sound_metadata[int(node['id'])][1]
- node['tags'] = sound_metadata[int(node['id'])][2]
- node['sound_page_url'] = sound_metadata[int(node['id'])][3]
- node['image_url'] = sound_metadata[int(node['id'])][4]
-
+ # TODO: this view is currently not used in the new UI, but we could add a modal in the
+ # clustering section to show results in a graph.
+ sqp = search_query_processor.SearchQueryProcessor(request)
+ results = get_clusters_for_query(sqp)
+ if results is None:
+ JsonResponse(json.dumps({'error': True}), safe=False)
+ graph = get_clustering_data_for_graph_display(sqp, results['graph'])
return JsonResponse(json.dumps(graph), safe=False)
diff --git a/sounds/management/commands/create_remix_groups.py b/sounds/management/commands/create_remix_groups.py
index 69de15789..503c5b517 100644
--- a/sounds/management/commands/create_remix_groups.py
+++ b/sounds/management/commands/create_remix_groups.py
@@ -24,7 +24,7 @@
from django.core.management.base import BaseCommand
from django.db import connection
-from networkx import nx
+import networkx as nx
from sounds.models import Sound, RemixGroup
@@ -63,14 +63,15 @@ def handle(self, *args, **options):
dg = _create_nodes(dg)
# 4) Find weakly connected components (single direction)
- subgraphs = nx.weakly_connected_component_subgraphs(dg)
+ subgraphs = nx.weakly_connected_components(dg)
# 5) delete all remixgroup objects to recalculate
RemixGroup.objects.all().delete()
# 6) Loop through all connected graphs in the dataset and create the groups
n_groups_created = 0
- for sg in subgraphs:
+ for sg_nodes in subgraphs:
+ sg = dg.subgraph(sg_nodes).copy()
_create_and_save_remixgroup(sg, RemixGroup())
n_groups_created += 1
@@ -80,19 +81,19 @@ def handle(self, *args, **options):
def _create_nodes(dg):
for node in dg.nodes():
sound = Sound.objects.get(id=node)
- dg.add_node(node, {'date': sound.created,
- 'nodeName': sound.original_filename,
- 'username': sound.user.username,
- 'sound_url_mp3': sound.locations()['preview']['LQ']['mp3']['url'],
- 'sound_url_ogg': sound.locations()['preview']['LQ']['ogg']['url'],
- 'waveform_url': sound.locations()['display']['wave']['M']['url']})
+ dg.add_node(node, **{'date': sound.created,
+ 'nodeName': sound.original_filename,
+ 'username': sound.user.username,
+ 'sound_url_mp3': sound.locations()['preview']['LQ']['mp3']['url'],
+ 'sound_url_ogg': sound.locations()['preview']['LQ']['ogg']['url'],
+ 'waveform_url': sound.locations()['display']['wave']['M']['url']})
return dg
def _create_and_save_remixgroup(sg, remixgroup):
# print ' ========================================= '
# add to list the subgraphs(connected components) with the extra data
- node_list = sg.nodes(data=True)
+ node_list = list(sg.nodes(data=True))
# pp(node_list)
# sort by date (holds all subgraph nodes sorted by date)
@@ -111,7 +112,7 @@ def _create_and_save_remixgroup(sg, remixgroup):
links = []
remixgroup.save() # need to save to have primary key before ManyToMany
# FIXME: no idea why nx.weakly_connected_components(sg) return list in list...
- remixgroup.sounds.set(set(nx.weakly_connected_components(sg)[0]))
+ remixgroup.sounds.set(max(nx.weakly_connected_components(sg), key=len))
for sound in remixgroup.sounds.all():
sound.invalidate_template_caches()
@@ -141,5 +142,5 @@ def _create_and_save_remixgroup(sg, remixgroup):
"\"nodes\": " + json.dumps(nodes) + "," \
"\"links\": " + json.dumps(links) + "}"
- remixgroup.networkx_data = json.dumps(dict(nodes=sg.nodes(), edges=sg.edges()))
+ remixgroup.networkx_data = json.dumps(dict(nodes=list(sg.nodes()), edges=list(sg.edges())))
remixgroup.save()
diff --git a/sounds/views.py b/sounds/views.py
index 385415a1b..552c4f705 100644
--- a/sounds/views.py
+++ b/sounds/views.py
@@ -149,7 +149,7 @@ def random(request):
def packs(request):
- return HttpResponseRedirect(reverse('sounds-search') + '?s=Date+added+(newest+first)&g=1&only_p=1')
+ return HttpResponseRedirect(reverse('sounds-search') + '?s=Date+added+(newest+first)&g=1&dp=1')
def front_page(request):
diff --git a/tags/templatetags/tags.py b/tags/templatetags/tags.py
index e9463f1d9..8118f3114 100644
--- a/tags/templatetags/tags.py
+++ b/tags/templatetags/tags.py
@@ -41,7 +41,7 @@ def join_tags_include(list, include):
@register.inclusion_tag('molecules/bw_follow_tags_widget.html', takes_context=True)
def bw_follow_tags_widget(context):
request = context['request']
- slash_tag = "/".join(context['tags_in_filter'])
+ slash_tag = "/".join(context['sqp'].get_tags_in_filters())
follow_tags_url = ''
unfollow_tags_url = ''
show_unfollow_button = False
diff --git a/tags/views.py b/tags/views.py
index ed80ceac2..444f60f89 100644
--- a/tags/views.py
+++ b/tags/views.py
@@ -20,12 +20,15 @@
import logging
+from django.conf import settings
+from django.core.cache import cache
from django.http import Http404, HttpResponsePermanentRedirect, HttpResponseRedirect
from django.shortcuts import render
from django.urls import reverse
from search.views import search_view_helper
from tags.models import Tag, FS1Tag
+from utils.search.search_sounds import perform_search_engine_query
search_logger = logging.getLogger("search")
@@ -51,8 +54,26 @@ def tags(request, multiple_tags=None):
return HttpResponseRedirect(f"{reverse('tags')}?f={search_filter}")
else:
- # Share same view code as for the search view, but set "tags mode" on
- tvars = search_view_helper(request, tags_mode=True)
+ # Share same view code as for the search view, but "tags mode" will be on
+ tvars = search_view_helper(request)
+
+ # If there are no tags in filter, get initial tagcloud and add it to tvars
+ if 'sqp' in tvars and not tvars['sqp'].get_tags_in_filters():
+ initial_tagcloud = cache.get('initial_tagcloud')
+ if initial_tagcloud is None:
+ # If tagcloud is not cached, make a query to retrieve it and save it to cache
+ results, _ = perform_search_engine_query(dict(
+ textual_query='',
+ query_filter= "*:*",
+ num_sounds=1,
+ facets={settings.SEARCH_SOUNDS_FIELD_TAGS: {'limit': 200}},
+ group_by_pack=True,
+ group_counts_as_one_in_facets=False,
+ ))
+ initial_tagcloud = [dict(name=f[0], count=f[1], browse_url=reverse('tags', args=[f[0]])) for f in results.facets["tag"]]
+ cache.set('initial_tagcloud', initial_tagcloud, 60 * 60 * 12) # cache for 12 hours
+ tvars.update({'initial_tagcloud': initial_tagcloud})
+
return render(request, 'search/search.html', tvars)
diff --git a/templates/molecules/navbar_search_page.html b/templates/molecules/navbar_search_page.html
index f75a526f4..6b8d05705 100644
--- a/templates/molecules/navbar_search_page.html
+++ b/templates/molecules/navbar_search_page.html
@@ -11,7 +11,7 @@
-
+
diff --git a/templates/search/clustering_results.html b/templates/search/clustering_results.html
new file mode 100644
index 000000000..166324a4a
--- /dev/null
+++ b/templates/search/clustering_results.html
@@ -0,0 +1,12 @@
+{% if clusters_data and clusters_data|length > 0 %}
+
+
+ {% for cluster_id, num_sounds, cluster_name, sound_examples in clusters_data %}
+
+
+
{{ num_sounds }} sound{{ num_sounds|pluralize }}
+
+ {% endfor %}
+
+
+{% endif %}
\ No newline at end of file
diff --git a/templates/search/facet.html b/templates/search/facet.html
index e1e8adc82..91af229c6 100644
--- a/templates/search/facet.html
+++ b/templates/search/facet.html
@@ -1,50 +1,26 @@
{% load bw_templatetags %}
-
+{% if facet and facet|length > 1%}