Skip to content

Commit

Permalink
FE changes to support location autocomplete within the NL Search bar (#…
Browse files Browse the repository at this point in the history
…4649)

All FE & BE changes to add a dropdown on the NL search bar for
autocompleting location search.
See screencast:
https://screencast.googleplex.com/cast/NDkxMjUyMjc3MDUxMzkyMHxkZjg3ZDUxMC05MA
Also adds a webdriver test to verify the presence of the suggestion
results.
  • Loading branch information
gmechali authored Oct 7, 2024
1 parent d76d970 commit 8c74151
Show file tree
Hide file tree
Showing 18 changed files with 917 additions and 71 deletions.
2 changes: 1 addition & 1 deletion import
Submodule import updated 89 files
+62 −0 run_test.sh
+18 −0 simple/sample/README.md
+84 −0 simple/sample/input/config.json
+15 −0 simple/sample/input/countries.csv
+3 −0 simple/sample/input/geoids.csv
+3 −0 simple/sample/input/latlng.csv
+51 −0 simple/sample/input/latlng_events.csv
+15 −0 simple/sample/input/powerplants.csv
+3 −0 simple/sample/input/s2cells.csv
+3 −0 simple/sample/input/wikidataids.csv
+23 −0 simple/sample/main_dc_output/countries.csv
+5 −0 simple/sample/main_dc_output/geoids.csv
+5 −0 simple/sample/main_dc_output/latlng.csv
+42 −0 simple/sample/main_dc_output/latlng_events.csv
+6 −0 simple/sample/main_dc_output/nl/sentences.csv
+6 −0 simple/sample/main_dc_output/observations.tmcf
+21 −0 simple/sample/main_dc_output/powerplants.csv
+15 −0 simple/sample/main_dc_output/process/debug_resolve_countries.csv
+3 −0 simple/sample/main_dc_output/process/debug_resolve_geoids.csv
+3 −0 simple/sample/main_dc_output/process/debug_resolve_latlng.csv
+51 −0 simple/sample/main_dc_output/process/debug_resolve_latlng_events.csv
+15 −0 simple/sample/main_dc_output/process/debug_resolve_powerplants.csv
+3 −0 simple/sample/main_dc_output/process/debug_resolve_s2cells.csv
+3 −0 simple/sample/main_dc_output/process/debug_resolve_wikidataids.csv
+42 −0 simple/sample/main_dc_output/process/report.json
+5 −0 simple/sample/main_dc_output/s2cells.csv
+62 −0 simple/sample/main_dc_output/schema.mcf
+5 −0 simple/sample/main_dc_output/wikidataids.csv
+6 −0 simple/sample/output/nl/sentences.csv
+15 −0 simple/sample/output/process/debug_resolve_countries.csv
+3 −0 simple/sample/output/process/debug_resolve_geoids.csv
+3 −0 simple/sample/output/process/debug_resolve_latlng.csv
+51 −0 simple/sample/output/process/debug_resolve_latlng_events.csv
+15 −0 simple/sample/output/process/debug_resolve_powerplants.csv
+3 −0 simple/sample/output/process/debug_resolve_s2cells.csv
+3 −0 simple/sample/output/process/debug_resolve_wikidataids.csv
+42 −0 simple/sample/output/process/report.json
+2 −0 simple/sample/output/tables/imports.csv
+100 −0 simple/sample/output/tables/observations.csv
+440 −0 simple/sample/output/tables/triples.csv
+22 −1 simple/stats/cache.py
+0 −4 simple/stats/config.py
+0 −69 simple/stats/data.py
+23 −75 simple/stats/db.py
+22 −13 simple/stats/observations_importer.py
+0 −10 simple/stats/schema_constants.py
+0 −48 simple/stats/util.py
+1 −6 simple/stats/variable_per_row_importer.py
+2 −2 simple/tests/stats/cache_test.py
+0 −11 simple/tests/stats/data_test.py
+9 −15 simple/tests/stats/db_test.py
+0 −3 simple/tests/stats/entities_importer_test.py
+15 −19 simple/tests/stats/observations_importer_test.py
+13 −0 simple/tests/stats/schema_test.py
+0 −1 simple/tests/stats/test_data/db/expected/observations.csv
+4 −4 simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv
+6 −6 simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv
+7 −0 simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes.db.csv
+0 −7 simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv
+0 −7 simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv
+0 −0 simple/tests/stats/test_data/observations_importer/input/countryalpha3codes.csv
+0 −7 simple/tests/stats/test_data/observations_importer/input/countryalpha3codes/config.json
+0 −12 simple/tests/stats/test_data/observations_importer/input/obs_props/config.json
+0 −5 simple/tests/stats/test_data/observations_importer/input/obs_props/input.csv
+31 −31 simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv
+31 −31 simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv
+5 −5 simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv
+31 −31 simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv
+9 −9 simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv
+5 −5 simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv
+5 −5 simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv
+7 −0 simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names.db.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv
+7 −0 simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names.db.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv
+7 −0 simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes.db.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv
+0 −0 simple/tests/stats/test_data/variable_per_row_importer/input/custom_column_names.csv
+0 −11 simple/tests/stats/test_data/variable_per_row_importer/input/custom_column_names/config.json
+0 −0 simple/tests/stats/test_data/variable_per_row_importer/input/default_column_names.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/input/default_column_names/config.json
+0 −0 simple/tests/stats/test_data/variable_per_row_importer/input/namespace_prefixes.csv
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/input/namespace_prefixes/config.json
+0 −11 simple/tests/stats/test_data/variable_per_row_importer/input/obs_props/config.json
+0 −7 simple/tests/stats/test_data/variable_per_row_importer/input/obs_props/input.csv
+2 −3 simple/tests/stats/test_util.py
+0 −34 simple/tests/stats/util_test.py
+18 −20 simple/tests/stats/variable_per_row_importer_test.py
2 changes: 1 addition & 1 deletion mixer
Submodule mixer updated 141 files
4 changes: 4 additions & 0 deletions server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def register_routes_common(app):
from server.routes.shared_api import stats as shared_stats
app.register_blueprint(shared_stats.bp)

from server.routes.shared_api.autocomplete import \
autocomplete as shared_autocomplete
app.register_blueprint(shared_autocomplete.bp)

from server.routes.shared_api import variable as shared_variable
app.register_blueprint(shared_variable.bp)

Expand Down
63 changes: 63 additions & 0 deletions server/routes/shared_api/autocomplete/autocomplete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json

from flask import Blueprint
from flask import request

from server.routes.shared_api.autocomplete import helpers
from server.routes.shared_api.place import findplacedcid

# TODO(gmechali): Add Stat Var search.

# Define blueprint
bp = Blueprint("autocomplete", __name__, url_prefix='/api')


@bp.route('/autocomplete')
def autocomplete():
"""Predicts the user query for location only, using the Google Maps prediction API.
Returns:
Json object represnting 5 location predictions for the query.
"""
lang = request.args.get('hl')
query = request.args.get('query')

# Extract subqueries from the user input.
queries = helpers.find_queries(query)

# Send requests to the Google Maps Predictions API.
prediction_responses = helpers.predict(queries, lang)

place_ids = []
for prediction in prediction_responses:
place_ids.append(prediction["place_id"])

place_id_to_dcid = []
if place_ids:
place_id_to_dcid = json.loads(findplacedcid(place_ids).data)

final_predictions = []
# TODO(gmechali): See if we can use typed dataclasses here.
for prediction in prediction_responses:
current_prediction = {}
current_prediction['name'] = prediction['description']
current_prediction['match_type'] = 'location_search'
current_prediction['matched_query'] = prediction['matched_query']
if prediction['place_id'] in place_id_to_dcid:
current_prediction['dcid'] = place_id_to_dcid[prediction['place_id']]
final_predictions.append(current_prediction)

return {'predictions': final_predictions}
167 changes: 167 additions & 0 deletions server/routes/shared_api/autocomplete/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import re
from typing import Dict, List
from urllib.parse import urlencode

from flask import current_app
import requests

MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?"
MIN_CHARACTERS_PER_QUERY = 3
MAX_NUM_OF_QUERIES = 4
RESPONSE_COUNT_LIMIT = 10
DISPLAYED_RESPONSE_COUNT_LIMIT = 5


def find_queries(user_query: str) -> List[str]:
"""Extracts subqueries to send to the Google Maps Predictions API from the entire user input.
Returns:
List[str]: containing all subqueries to execute.
"""
rgx = re.compile(r'\s+')
words_in_query = re.split(rgx, user_query)
queries = []
cumulative = ""
for word in reversed(words_in_query):
# Extract at most 3 subqueries.
if len(queries) >= MAX_NUM_OF_QUERIES:
break

# Prepend the current word for the next subquery.
if len(cumulative) > 0:
cumulative = word + " " + cumulative
else:
cumulative = word

# Only send queries 3 characters or longer.
if (len(cumulative) >= MIN_CHARACTERS_PER_QUERY):
queries.append(cumulative)

# Start by running the longer queries.
queries.reverse()
return queries


def execute_maps_request(query: str, language: str) -> Dict:
"""Execute a request to the Google Maps Prediction API for a given query.
Returns:
Json object containing the google maps prediction response.
"""
request_obj = {
'types': "(regions)",
'key': current_app.config['MAPS_API_KEY'],
'input': query,
'language': language
}
response = requests.post(MAPS_API_URL + urlencode(request_obj), json={})
return json.loads(response.text)


def get_match_score(name: str, match_string: str) -> float:
"""Computes a 'score' based on the matching words in two strings.
Returns:
Float score."""
rgx = re.compile(r'\s+')
words_in_name = re.split(rgx, name)
words_in_str1 = re.split(rgx, match_string)

score = 0
for str1_word in words_in_str1:
str1_word = str1_word.lower()
for name_word in words_in_name:
name_word = name_word.lower()
if str1_word == name_word:
score += 1
break
elif str1_word in name_word:
score += 0.5
break
else:
score -= 1

return score


def find_best_match(name: str, string1: str, string2: str) -> str:
"""Finds the best match between string1 and string2 for name. We use a very
simple algorithm based on approximate accuracy.
Returns:
String that is the better match.
"""

# Note that this function is implemented to find the best "matched_query", when the same response
# is found multiple times.
# For example:
# name: "California, USA"
# string1: "Of Calif"
# string2: "Calif"
# should return "Calif" as a better match.
score1 = get_match_score(name, string1)
score2 = get_match_score(name, string2)

if score2 > score1:
return string2

return string1


def predict(queries: List[str], lang: str) -> List[Dict]:
"""Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
Returns:
List of json objects containing predictions from all queries issued after deduping.
"""
responses = []
place_ids = set()
duplicates = {}

for query in queries:
predictions_for_query = execute_maps_request(query, lang)['predictions']

for pred in predictions_for_query:
pred['matched_query'] = query
if pred['place_id'] not in place_ids:
place_ids.add(pred['place_id'])
responses.append(pred)
else:
if pred['place_id'] in duplicates:
# find best match
# print("Second dupe.")
bm = find_best_match(pred['description'],
duplicates[pred['place_id']], query)
# print("BM won: ")
# print(bm)
duplicates[pred['place_id']] = bm
else:
# print("We're just getting our first dupe.")
duplicates[pred['place_id']] = query

if len(responses) >= RESPONSE_COUNT_LIMIT:
# prevent new loop to iterate through next answer.
break

if len(responses) >= RESPONSE_COUNT_LIMIT:
# prevent new loop that will make new request to maps api.
break

responses = responses[:DISPLAYED_RESPONSE_COUNT_LIMIT]
for resp in responses:
if resp['place_id'] in duplicates:
best_match = find_best_match(resp['description'], resp['matched_query'],
duplicates[resp['place_id']])
resp["matched_query"] = best_match

return responses
19 changes: 11 additions & 8 deletions server/routes/shared_api/place.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,14 +676,7 @@ def descendent_names():
return Response(json.dumps(result), 200, mimetype='application/json')


@bp.route('/placeid2dcid')
def placeid2dcid():
"""API endpoint to get dcid based on place id.
This is to use together with the Google Maps Autocomplete API:
https://developers.google.com/places/web-service/autocomplete.
"""
place_ids = request.args.getlist("placeIds")
def findplacedcid(place_ids):
if not place_ids:
return 'error: must provide `placeIds` field', 400
resp = fetch.resolve_id(place_ids, "placeId", "dcid")
Expand All @@ -697,6 +690,16 @@ def placeid2dcid():
return Response(json.dumps(result), 200, mimetype='application/json')


@bp.route('/placeid2dcid')
def placeid2dcid():
"""API endpoint to get dcid based on place id.
This is to use together with the Google Maps Autocomplete API:
https://developers.google.com/places/web-service/autocomplete.
"""
place_ids = request.args.getlist("placeIds")
return findplacedcid(place_ids)


@bp.route('/coords2places')
def coords2places():
"""API endpoint to get place name and dcid based on latitude/longitude
Expand Down
67 changes: 67 additions & 0 deletions server/tests/routes/api/autocomplete_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import unittest
from unittest.mock import patch

import server.tests.routes.api.mock_data as mock_data
from web_app import app


class TestAutocomplete(unittest.TestCase):

def run_autocomplete_query(self, query: str, lang: str):
return app.test_client().get(
"/api/autocomplete?query=`${query}`&hl=${lang}", json={})

lang = 'en'

@patch('server.routes.shared_api.autocomplete.helpers.predict')
@patch('server.routes.shared_api.place.fetch.resolve_id')
def test_empty_query(self, mock_resolve_ids, mock_predict):

def resolve_ids_side_effect(nodes, in_prop, out_prop):
return []

def mock_predict_effect(query, lang):
return []

mock_resolve_ids.side_effect = resolve_ids_side_effect
mock_predict.side_effect = mock_predict_effect

response = self.run_autocomplete_query('', 'en')
self.assertEqual(response.status_code, 200)

response_dict = json.loads(response.data.decode("utf-8"))
self.assertEqual(len(response_dict["predictions"]), 0)

@patch('server.routes.shared_api.autocomplete.helpers.predict')
@patch('server.routes.shared_api.place.fetch.resolve_id')
def test_single_word_query(self, mock_resolve_ids, mock_predict):

def resolve_ids_side_effect(nodes, in_prop, out_prop):
return mock_data.RESOLVE_IDS_VALUES

def mock_predict_effect(query, lang):
return mock_data.MAPS_PREDICTIONS_VALUES

mock_resolve_ids.side_effect = resolve_ids_side_effect
mock_predict.side_effect = mock_predict_effect

response = self.run_autocomplete_query('Calif', 'en')

self.assertEqual(response.status_code, 200)

response_dict = json.loads(response.data.decode("utf-8"))
self.assertEqual(len(response_dict["predictions"]), 5)
40 changes: 40 additions & 0 deletions server/tests/routes/api/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,3 +415,43 @@
}
}
}

RESOLVE_IDS_VALUES = {
'ChIJPV4oX_65j4ARVW8IJ6IJUYs': [{
'dcid': 'geoId/4210768'
}],
'ChIJPV4oX_65j4ARVW8IJ6IJUYs1': [{
'dcid': 'geoId/4210769'
}],
'ChIJPV4oX_65j4ARVW8IJ6IJUYs2': [{
'dcid': 'geoId/4210770'
}],
'ChIJPV4oX_65j4ARVW8IJ6IJUYs3': [{
'dcid': 'geoId/4210771'
}],
'ChIJPV4oX_65j4ARVW8IJ6IJUYs4': [{
'dcid': 'geoId/4210772'
}]
}

MAPS_PREDICTIONS_VALUES = [{
'description': 'California, USA',
'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs',
'matched_query': 'calif'
}, {
'description': 'Califon, NJ, USA',
'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs1',
'matched_query': 'calif'
}, {
'description': 'California, MD, USA',
'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs2',
'matched_query': 'calif'
}, {
'description': 'California City, CA, USA',
'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs3',
'matched_query': 'calif'
}, {
'description': 'California, PA, USA',
'place_id': 'ChIJPV4oX_65j4ARVW8IJ6IJUYs4',
'matched_query': 'calif'
}]
Loading

0 comments on commit 8c74151

Please sign in to comment.