FE changes to support location autocomplete within the NL Search bar (#…

…4649) All FE & BE changes to add a dropdown on the NL search bar for autocompleting location search. See screencast: https://screencast.googleplex.com/cast/NDkxMjUyMjc3MDUxMzkyMHxkZjg3ZDUxMC05MA Also adds a webdriver test to verify the presence of the suggestion results.
datacommonsorg · Oct 7, 2024 · 8c74151 · 8c74151
1 parent d76d970
commit 8c74151
Show file tree

Hide file tree

Showing 18 changed files with 917 additions and 71 deletions.
diff --git a/import b/import
diff --git a/mixer b/mixer
diff --git a/server/__init__.py b/server/__init__.py
@@ -209,6 +209,10 @@ def register_routes_common(app):
   from server.routes.shared_api import stats as shared_stats
   app.register_blueprint(shared_stats.bp)
 
+  from server.routes.shared_api.autocomplete import \
+      autocomplete as shared_autocomplete
+  app.register_blueprint(shared_autocomplete.bp)
+
   from server.routes.shared_api import variable as shared_variable
   app.register_blueprint(shared_variable.bp)
 

diff --git a/server/routes/shared_api/autocomplete/autocomplete.py b/server/routes/shared_api/autocomplete/autocomplete.py
@@ -0,0 +1,63 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from flask import Blueprint
+from flask import request
+
+from server.routes.shared_api.autocomplete import helpers
+from server.routes.shared_api.place import findplacedcid
+
+# TODO(gmechali): Add Stat Var search.
+
+# Define blueprint
+bp = Blueprint("autocomplete", __name__, url_prefix='/api')
+
+
+@bp.route('/autocomplete')
+def autocomplete():
+  """Predicts the user query for location only, using the Google Maps prediction API.
+  Returns:
+      Json object represnting 5 location predictions for the query.
+  """
+  lang = request.args.get('hl')
+  query = request.args.get('query')
+
+  # Extract subqueries from the user input.
+  queries = helpers.find_queries(query)
+
+  # Send requests to the Google Maps Predictions API.
+  prediction_responses = helpers.predict(queries, lang)
+
+  place_ids = []
+  for prediction in prediction_responses:
+    place_ids.append(prediction["place_id"])
+
+  place_id_to_dcid = []
+  if place_ids:
+    place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
+
+  final_predictions = []
+  # TODO(gmechali): See if we can use typed dataclasses here.
+  for prediction in prediction_responses:
+    current_prediction = {}
+    current_prediction['name'] = prediction['description']
+    current_prediction['match_type'] = 'location_search'
+    current_prediction['matched_query'] = prediction['matched_query']
+    if prediction['place_id'] in place_id_to_dcid:
+      current_prediction['dcid'] = place_id_to_dcid[prediction['place_id']]
+      final_predictions.append(current_prediction)
+
+  return {'predictions': final_predictions}
diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
@@ -0,0 +1,167 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+from typing import Dict, List
+from urllib.parse import urlencode
+
+from flask import current_app
+import requests
+
+MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?"
+MIN_CHARACTERS_PER_QUERY = 3
+MAX_NUM_OF_QUERIES = 4
+RESPONSE_COUNT_LIMIT = 10
+DISPLAYED_RESPONSE_COUNT_LIMIT = 5
+
+
+def find_queries(user_query: str) -> List[str]:
+  """Extracts subqueries to send to the Google Maps Predictions API from the entire user input.
+  Returns:
+      List[str]: containing all subqueries to execute.
+  """
+  rgx = re.compile(r'\s+')
+  words_in_query = re.split(rgx, user_query)
+  queries = []
+  cumulative = ""
+  for word in reversed(words_in_query):
+    # Extract at most 3 subqueries.
+    if len(queries) >= MAX_NUM_OF_QUERIES:
+      break
+
+    # Prepend the current word for the next subquery.
+    if len(cumulative) > 0:
+      cumulative = word + " " + cumulative
+    else:
+      cumulative = word
+
+    # Only send queries 3 characters or longer.
+    if (len(cumulative) >= MIN_CHARACTERS_PER_QUERY):
+      queries.append(cumulative)
+
+  # Start by running the longer queries.
+  queries.reverse()
+  return queries
+
+
+def execute_maps_request(query: str, language: str) -> Dict:
+  """Execute a request to the Google Maps Prediction API for a given query.
+  Returns:
+      Json object containing the google maps prediction response.
+  """
+  request_obj = {
+      'types': "(regions)",
+      'key': current_app.config['MAPS_API_KEY'],
+      'input': query,
+      'language': language
+  }
+  response = requests.post(MAPS_API_URL + urlencode(request_obj), json={})
+  return json.loads(response.text)
+
+
+def get_match_score(name: str, match_string: str) -> float:
+  """Computes a 'score' based on the matching words in two strings.
+  Returns:
+    Float score."""
+  rgx = re.compile(r'\s+')
+  words_in_name = re.split(rgx, name)
+  words_in_str1 = re.split(rgx, match_string)
+
+  score = 0
+  for str1_word in words_in_str1:
+    str1_word = str1_word.lower()
+    for name_word in words_in_name:
+      name_word = name_word.lower()
+      if str1_word == name_word:
+        score += 1
+        break
+      elif str1_word in name_word:
+        score += 0.5
+        break
+      else:
+        score -= 1
+
+  return score
+
+
+def find_best_match(name: str, string1: str, string2: str) -> str:
+  """Finds the best match between string1 and string2 for name. We use a very
+  simple algorithm based on approximate accuracy.
+  Returns:
+    String that is the better match.
+  """
+
+  # Note that this function is implemented to find the best "matched_query", when the same response
+  # is found multiple times.
+  # For example:
+  #   name: "California, USA"
+  #   string1: "Of Calif"
+  #   string2: "Calif"
+  # should return "Calif" as a better match.
+  score1 = get_match_score(name, string1)
+  score2 = get_match_score(name, string2)
+
+  if score2 > score1:
+    return string2
+
+  return string1
+
+
+def predict(queries: List[str], lang: str) -> List[Dict]:
+  """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
+  Returns:
+      List of json objects containing predictions from all queries issued after deduping.
+  """
+  responses = []
+  place_ids = set()
+  duplicates = {}
+
+  for query in queries:
+    predictions_for_query = execute_maps_request(query, lang)['predictions']
+
+    for pred in predictions_for_query:
+      pred['matched_query'] = query
+      if pred['place_id'] not in place_ids:
+        place_ids.add(pred['place_id'])
+        responses.append(pred)
+      else:
+        if pred['place_id'] in duplicates:
+          # find best match
+          # print("Second dupe.")
+          bm = find_best_match(pred['description'],
+                               duplicates[pred['place_id']], query)
+          # print("BM won: ")
+          # print(bm)
+          duplicates[pred['place_id']] = bm
+        else:
+          # print("We're just getting our first dupe.")
+          duplicates[pred['place_id']] = query
+
+      if len(responses) >= RESPONSE_COUNT_LIMIT:
+        # prevent new loop to iterate through next answer.
+        break
+
+    if len(responses) >= RESPONSE_COUNT_LIMIT:
+      # prevent new loop that will make new request to maps api.
+      break
+
+  responses = responses[:DISPLAYED_RESPONSE_COUNT_LIMIT]
+  for resp in responses:
+    if resp['place_id'] in duplicates:
+      best_match = find_best_match(resp['description'], resp['matched_query'],
+                                   duplicates[resp['place_id']])
+      resp["matched_query"] = best_match
+
+  return responses
diff --git a/server/routes/shared_api/place.py b/server/routes/shared_api/place.py
@@ -676,14 +676,7 @@ def descendent_names():
   return Response(json.dumps(result), 200, mimetype='application/json')
 
 
-@bp.route('/placeid2dcid')
-def placeid2dcid():
-  """API endpoint to get dcid based on place id.
-
-  This is to use together with the Google Maps Autocomplete API:
-  https://developers.google.com/places/web-service/autocomplete.
-  """
-  place_ids = request.args.getlist("placeIds")
+def findplacedcid(place_ids):
   if not place_ids:
     return 'error: must provide `placeIds` field', 400
   resp = fetch.resolve_id(place_ids, "placeId", "dcid")
@@ -697,6 +690,16 @@ def placeid2dcid():
   return Response(json.dumps(result), 200, mimetype='application/json')
 
 
+@bp.route('/placeid2dcid')
+def placeid2dcid():
+  """API endpoint to get dcid based on place id.
+  This is to use together with the Google Maps Autocomplete API:
+  https://developers.google.com/places/web-service/autocomplete.
+  """
+  place_ids = request.args.getlist("placeIds")
+  return findplacedcid(place_ids)
+
+
 @bp.route('/coords2places')
 def coords2places():
   """API endpoint to get place name and dcid based on latitude/longitude

diff --git a/server/tests/routes/api/autocomplete_test.py b/server/tests/routes/api/autocomplete_test.py
@@ -0,0 +1,67 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import unittest
+from unittest.mock import patch
+
+import server.tests.routes.api.mock_data as mock_data
+from web_app import app
+
+
+class TestAutocomplete(unittest.TestCase):
+
+  def run_autocomplete_query(self, query: str, lang: str):
+    return app.test_client().get(
+        "/api/autocomplete?query=`${query}`&hl=${lang}", json={})
+
+  lang = 'en'
+
+  @patch('server.routes.shared_api.autocomplete.helpers.predict')
+  @patch('server.routes.shared_api.place.fetch.resolve_id')
+  def test_empty_query(self, mock_resolve_ids, mock_predict):
+
+    def resolve_ids_side_effect(nodes, in_prop, out_prop):
+      return []
+
+    def mock_predict_effect(query, lang):
+      return []
+
+    mock_resolve_ids.side_effect = resolve_ids_side_effect
+    mock_predict.side_effect = mock_predict_effect
+
+    response = self.run_autocomplete_query('', 'en')
+    self.assertEqual(response.status_code, 200)
+
+    response_dict = json.loads(response.data.decode("utf-8"))
+    self.assertEqual(len(response_dict["predictions"]), 0)
+
+  @patch('server.routes.shared_api.autocomplete.helpers.predict')
+  @patch('server.routes.shared_api.place.fetch.resolve_id')
+  def test_single_word_query(self, mock_resolve_ids, mock_predict):
+
+    def resolve_ids_side_effect(nodes, in_prop, out_prop):
+      return mock_data.RESOLVE_IDS_VALUES
+
+    def mock_predict_effect(query, lang):
+      return mock_data.MAPS_PREDICTIONS_VALUES
+
+    mock_resolve_ids.side_effect = resolve_ids_side_effect
+    mock_predict.side_effect = mock_predict_effect
+
+    response = self.run_autocomplete_query('Calif', 'en')
+
+    self.assertEqual(response.status_code, 200)
+
+    response_dict = json.loads(response.data.decode("utf-8"))
+    self.assertEqual(len(response_dict["predictions"]), 5)
diff --git a/server/tests/routes/api/mock_data.py b/server/tests/routes/api/mock_data.py
@@ -415,3 +415,43 @@
         }
     }
 }
+
+RESOLVE_IDS_VALUES = {
+    'ChIJPV4oX_65j4ARVW8IJ6IJUYs': [{
+        'dcid': 'geoId/4210768'
+    }],
+    'ChIJPV4oX_65j4ARVW8IJ6IJUYs1': [{
+        'dcid': 'geoId/4210769'
+    }],
+    'ChIJPV4oX_65j4ARVW8IJ6IJUYs2': [{
+        'dcid': 'geoId/4210770'
+    }],
+    'ChIJPV4oX_65j4ARVW8IJ6IJUYs3': [{
+        'dcid': 'geoId/4210771'
+    }],
+    'ChIJPV4oX_65j4ARVW8IJ6IJUYs4': [{
+        'dcid': 'geoId/4210772'
+    }]
+}
+
+MAPS_PREDICTIONS_VALUES = [{
+    'description': 'California, USA',
+    'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs',
+    'matched_query': 'calif'
+}, {
+    'description': 'Califon, NJ, USA',
+    'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs1',
+    'matched_query': 'calif'
+}, {
+    'description': 'California, MD, USA',
+    'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs2',
+    'matched_query': 'calif'
+}, {
+    'description': 'California City, CA, USA',
+    'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs3',
+    'matched_query': 'calif'
+}, {
+    'description': 'California, PA, USA',
+    'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs4',
+    'matched_query': 'calif'
+}]
+62 −0		run_test.sh
+18 −0		simple/sample/README.md
+84 −0		simple/sample/input/config.json
+15 −0		simple/sample/input/countries.csv
+3 −0		simple/sample/input/geoids.csv
+3 −0		simple/sample/input/latlng.csv
+51 −0		simple/sample/input/latlng_events.csv
+15 −0		simple/sample/input/powerplants.csv
+3 −0		simple/sample/input/s2cells.csv
+3 −0		simple/sample/input/wikidataids.csv
+23 −0		simple/sample/main_dc_output/countries.csv
+5 −0		simple/sample/main_dc_output/geoids.csv
+5 −0		simple/sample/main_dc_output/latlng.csv
+42 −0		simple/sample/main_dc_output/latlng_events.csv
+6 −0		simple/sample/main_dc_output/nl/sentences.csv
+6 −0		simple/sample/main_dc_output/observations.tmcf
+21 −0		simple/sample/main_dc_output/powerplants.csv
+15 −0		simple/sample/main_dc_output/process/debug_resolve_countries.csv
+3 −0		simple/sample/main_dc_output/process/debug_resolve_geoids.csv
+3 −0		simple/sample/main_dc_output/process/debug_resolve_latlng.csv
+51 −0		simple/sample/main_dc_output/process/debug_resolve_latlng_events.csv
+15 −0		simple/sample/main_dc_output/process/debug_resolve_powerplants.csv
+3 −0		simple/sample/main_dc_output/process/debug_resolve_s2cells.csv
+3 −0		simple/sample/main_dc_output/process/debug_resolve_wikidataids.csv
+42 −0		simple/sample/main_dc_output/process/report.json
+5 −0		simple/sample/main_dc_output/s2cells.csv
+62 −0		simple/sample/main_dc_output/schema.mcf
+5 −0		simple/sample/main_dc_output/wikidataids.csv
+6 −0		simple/sample/output/nl/sentences.csv
+15 −0		simple/sample/output/process/debug_resolve_countries.csv
+3 −0		simple/sample/output/process/debug_resolve_geoids.csv
+3 −0		simple/sample/output/process/debug_resolve_latlng.csv
+51 −0		simple/sample/output/process/debug_resolve_latlng_events.csv
+15 −0		simple/sample/output/process/debug_resolve_powerplants.csv
+3 −0		simple/sample/output/process/debug_resolve_s2cells.csv
+3 −0		simple/sample/output/process/debug_resolve_wikidataids.csv
+42 −0		simple/sample/output/process/report.json
+2 −0		simple/sample/output/tables/imports.csv
+100 −0		simple/sample/output/tables/observations.csv
+440 −0		simple/sample/output/tables/triples.csv
+22 −1		simple/stats/cache.py
+0 −4		simple/stats/config.py
+0 −69		simple/stats/data.py
+23 −75		simple/stats/db.py
+22 −13		simple/stats/observations_importer.py
+0 −10		simple/stats/schema_constants.py
+0 −48		simple/stats/util.py
+1 −6		simple/stats/variable_per_row_importer.py
+2 −2		simple/tests/stats/cache_test.py
+0 −11		simple/tests/stats/data_test.py
+9 −15		simple/tests/stats/db_test.py
+0 −3		simple/tests/stats/entities_importer_test.py
+15 −19		simple/tests/stats/observations_importer_test.py
+13 −0		simple/tests/stats/schema_test.py
+0 −1		simple/tests/stats/test_data/db/expected/observations.csv
+4 −4		simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv
+6 −6		simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv
+7 −0		simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes.db.csv
+0 −7		simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv
+0 −7		simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv
+0 −0		simple/tests/stats/test_data/observations_importer/input/countryalpha3codes.csv
+0 −7		simple/tests/stats/test_data/observations_importer/input/countryalpha3codes/config.json
+0 −12		simple/tests/stats/test_data/observations_importer/input/obs_props/config.json
+0 −5		simple/tests/stats/test_data/observations_importer/input/obs_props/input.csv
+31 −31		simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv
+31 −31		simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv
+5 −5		simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv
+31 −31		simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv
+9 −9		simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv
+5 −5		simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv
+5 −5		simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv
+7 −0		simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names.db.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv
+7 −0		simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names.db.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv
+7 −0		simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes.db.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv
+0 −0		simple/tests/stats/test_data/variable_per_row_importer/input/custom_column_names.csv
+0 −11		simple/tests/stats/test_data/variable_per_row_importer/input/custom_column_names/config.json
+0 −0		simple/tests/stats/test_data/variable_per_row_importer/input/default_column_names.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/input/default_column_names/config.json
+0 −0		simple/tests/stats/test_data/variable_per_row_importer/input/namespace_prefixes.csv
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/input/namespace_prefixes/config.json
+0 −11		simple/tests/stats/test_data/variable_per_row_importer/input/obs_props/config.json
+0 −7		simple/tests/stats/test_data/variable_per_row_importer/input/obs_props/input.csv
+2 −3		simple/tests/stats/test_util.py
+0 −34		simple/tests/stats/util_test.py
+18 −20		simple/tests/stats/variable_per_row_importer_test.py