From 357655f51706e5210240c2ae193ab5a2eaab9fea Mon Sep 17 00:00:00 2001 From: NNicklas Boserup Date: Thu, 2 Sep 2021 11:05:44 +0200 Subject: [PATCH 1/5] Implement unique valid values endpoint --- terracotta/drivers/base.py | 4 ++ terracotta/drivers/mysql.py | 29 +++++++++++++ terracotta/drivers/sqlite.py | 29 +++++++++++++ terracotta/handlers/valid_values.py | 21 ++++++++++ terracotta/server/flask_api.py | 2 + terracotta/server/valid_values.py | 63 +++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 terracotta/handlers/valid_values.py create mode 100644 terracotta/server/valid_values.py diff --git a/terracotta/drivers/base.py b/terracotta/drivers/base.py index 37cd738b..188635db 100644 --- a/terracotta/drivers/base.py +++ b/terracotta/drivers/base.py @@ -81,6 +81,10 @@ def get_keys(self) -> OrderedDict: """ pass + @abstractmethod + def get_valid_values(self, where: Mapping[str, Union[str, List[str]]]) -> Dict[str, List[str]]: + pass + @abstractmethod def get_datasets(self, where: Mapping[str, Union[str, List[str]]] = None, page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], Any]: diff --git a/terracotta/drivers/mysql.py b/terracotta/drivers/mysql.py index 18ff9ecd..211b9cc5 100644 --- a/terracotta/drivers/mysql.py +++ b/terracotta/drivers/mysql.py @@ -332,6 +332,35 @@ def _get_keys(self) -> OrderedDict: return out + @requires_connection + @convert_exceptions('Could not retrieve valid key values') + def get_valid_values(self, where: Mapping[str, Union[str, List[str]]]) -> Dict[str, List[str]]: + cursor = self._cursor + + if not all(key in self.key_names for key in where.keys()): + raise exceptions.InvalidKeyError('Encountered unrecognized keys in where clause') + + conditions = [] + values = [] + for key, value in where.items(): + if isinstance(value, str): + value = [value] + values.extend(value) + conditions.append(' OR '.join([f'{key}=%s'] * len(value))) + where_fragment = ' AND '.join([f'({condition})' for condition in conditions]) + where_fragment = ' WHERE ' + where_fragment if where_fragment else '' + + valid_values = {key: [val] if isinstance(val, str) else val for key, val in where.items()} + + for key in set(self.key_names) - set(where.keys()): + cursor.execute( + f'SELECT DISTINCT {key} FROM datasets {where_fragment}', + values + ) + valid_values[key] = list([row[key] for row in cursor.fetchall()]) + + return valid_values + @trace('get_datasets') @requires_connection @convert_exceptions('Could not retrieve datasets') diff --git a/terracotta/drivers/sqlite.py b/terracotta/drivers/sqlite.py index 0cfd68cc..9253e821 100644 --- a/terracotta/drivers/sqlite.py +++ b/terracotta/drivers/sqlite.py @@ -230,6 +230,35 @@ def get_keys(self) -> OrderedDict: out[row['key']] = row['description'] return out + @requires_connection + @convert_exceptions('Could not retrieve valid key values') + def get_valid_values(self, where: Mapping[str, Union[str, List[str]]]) -> Dict[str, List[str]]: + conn = self._connection + + if not all(key in self.key_names for key in where.keys()): + raise exceptions.InvalidKeyError('Encountered unrecognized keys in where clause') + + conditions = [] + values = [] + for key, value in where.items(): + if isinstance(value, str): + value = [value] + values.extend(value) + conditions.append(' OR '.join([f'{key}=?'] * len(value))) + where_fragment = ' AND '.join([f'({condition})' for condition in conditions]) + where_fragment = ' WHERE ' + where_fragment if where_fragment else '' + + valid_values = {key: [val] if isinstance(val, str) else val for key, val in where.items()} + + for key in set(self.key_names) - set(where.keys()): + rows = conn.execute( + f'SELECT DISTINCT {key} FROM datasets {where_fragment}', + values + ) + valid_values[key] = list([row[key] for row in rows]) + + return valid_values + @trace('get_datasets') @requires_connection @convert_exceptions('Could not retrieve datasets') diff --git a/terracotta/handlers/valid_values.py b/terracotta/handlers/valid_values.py new file mode 100644 index 00000000..3807580e --- /dev/null +++ b/terracotta/handlers/valid_values.py @@ -0,0 +1,21 @@ +"""handlers/valid_values.py + +Handle /valid_values API endpoint. +""" + +from typing import Dict, Mapping, List, Union + +from terracotta import get_settings, get_driver +from terracotta.profile import trace + + +@trace('valid_values_handler') +def valid_values(some_keys: Mapping[str, Union[str, List[str]]] = None) -> Dict[str, List[str]]: + """List all available valid values""" + settings = get_settings() + driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) + + with driver.connect(): + valid_values = driver.get_valid_values(some_keys or {}) + + return valid_values diff --git a/terracotta/server/flask_api.py b/terracotta/server/flask_api.py index 94311258..6c4195c3 100644 --- a/terracotta/server/flask_api.py +++ b/terracotta/server/flask_api.py @@ -69,6 +69,7 @@ def create_app(debug: bool = False, profile: bool = False) -> Flask: from terracotta import get_settings import terracotta.server.datasets import terracotta.server.keys + import terracotta.server.valid_values import terracotta.server.colormap import terracotta.server.metadata import terracotta.server.rgb @@ -97,6 +98,7 @@ def create_app(debug: bool = False, profile: bool = False) -> Flask: with new_app.test_request_context(): SPEC.path(view=terracotta.server.datasets.get_datasets) SPEC.path(view=terracotta.server.keys.get_keys) + SPEC.path(view=terracotta.server.valid_values.get_valid_values) SPEC.path(view=terracotta.server.colormap.get_colormap) SPEC.path(view=terracotta.server.metadata.get_metadata) SPEC.path(view=terracotta.server.rgb.get_rgb) diff --git a/terracotta/server/valid_values.py b/terracotta/server/valid_values.py new file mode 100644 index 00000000..c33442b5 --- /dev/null +++ b/terracotta/server/valid_values.py @@ -0,0 +1,63 @@ +"""server/valid_values.py + +Flask route to handle /valid_values calls. +""" + +from typing import Any, Dict, List, Union +from flask import request, jsonify, Response +from marshmallow import Schema, fields, INCLUDE, post_load +import re + +from terracotta.server.flask_api import METADATA_API + + +class KeyValueOptionSchema(Schema): + class Meta: + unknown = INCLUDE + + # placeholder values to document keys + key1 = fields.String(example='value1', description='Value of key1', dump_only=True) + key2 = fields.String(example='value2', description='Value of key2', dump_only=True) + + @post_load + def list_items(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Union[str, List[str]]]: + # Create lists of values supplied as stringified lists + for key, value in data.items(): + if isinstance(value, str) and re.match(r'^\[.*\]$', value): + data[key] = value[1:-1].split(',') + return data + + +@METADATA_API.route('/valid_values', methods=['GET']) +def get_valid_values() -> Response: + """Get all valid values combinations (possibly when given a value for some keys) + --- + get: + summary: /datasets + description: + Get keys of all available datasets that match given key constraint. + Constraints may be combined freely. Returns all known datasets if no query parameters + are given. + parameters: + - in: query + schema: DatasetOptionSchema + responses: + 200: + description: All available key combinations + schema: + type: array + items: DatasetSchema + 400: + description: Query parameters contain unrecognized keys + """ + from terracotta.handlers.valid_values import valid_values + option_schema = KeyValueOptionSchema() + options = option_schema.load(request.args) + + keys = options or None + + payload = { + 'valid_values': valid_values(keys) + } + + return jsonify(payload) From da319a6210db2bff7a77bb033cbb3db69ba364c6 Mon Sep 17 00:00:00 2001 From: NNicklas Boserup Date: Thu, 2 Sep 2021 11:06:16 +0200 Subject: [PATCH 2/5] Add tests for valid_values --- tests/drivers/test_raster_drivers.py | 34 ++++++++++++++++++++++++ tests/handlers/test_valid_values.py | 10 +++++++ tests/server/test_flask_api.py | 39 ++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 tests/handlers/test_valid_values.py diff --git a/tests/drivers/test_raster_drivers.py b/tests/drivers/test_raster_drivers.py index c3abaac2..b853d879 100644 --- a/tests/drivers/test_raster_drivers.py +++ b/tests/drivers/test_raster_drivers.py @@ -46,6 +46,40 @@ def test_path_override(driver_path, provider, raster_file): assert bogus_path in exc.value +@pytest.mark.parametrize('provider', DRIVERS) +def test_valid_values(driver_path, provider, raster_file): + from terracotta import drivers, exceptions + db = drivers.get_driver(driver_path, provider=provider) + keys = ('some', 'keynames') + + db.create(keys) + db.insert(['some', 'value'], str(raster_file)) + db.insert(['some', 'other_value'], str(raster_file)) + db.insert({'some': 'a', 'keynames': 'third_value'}, str(raster_file)) + + data = db.get_valid_values({}) + assert len(data) == 2 + assert len(data['some']) == 2 + assert len(data['keynames']) == 3 + + data = db.get_valid_values(where=dict(some='some')) + assert len(data) == 2 + assert data['some'] == ['some'] + assert set(data['keynames']) == set(['value', 'other_value']) + + data = db.get_valid_values(where=dict(some='some', keynames='value')) + assert set(data.keys()) == set(['some', 'keynames']) + assert data['some'] == ['some'] + assert data['keynames'] == ['value'] + + data = db.get_valid_values(where=dict(some='unknown')) + assert data == {'some': ['unknown'], 'keynames': []} + + with pytest.raises(exceptions.InvalidKeyError) as exc: + db.get_valid_values(where=dict(unknown='foo')) + assert 'unrecognized keys' in str(exc.value) + + @pytest.mark.parametrize('provider', DRIVERS) def test_where(driver_path, provider, raster_file): from terracotta import drivers, exceptions diff --git a/tests/handlers/test_valid_values.py b/tests/handlers/test_valid_values.py new file mode 100644 index 00000000..1017a575 --- /dev/null +++ b/tests/handlers/test_valid_values.py @@ -0,0 +1,10 @@ + +def test_valid_values_handler(testdb, use_testdb): + import terracotta + from terracotta.handlers import valid_values + + driver = terracotta.get_driver(str(testdb)) + + handler_response = valid_values.valid_values({}) + assert handler_response + assert set(handler_response.keys()) == set(driver.key_names) diff --git a/tests/server/test_flask_api.py b/tests/server/test_flask_api.py index 0ee0f9b6..e337a430 100644 --- a/tests/server/test_flask_api.py +++ b/tests/server/test_flask_api.py @@ -44,6 +44,45 @@ def test_get_metadata_nonexisting(client, use_testdb): assert rv.status_code == 404 +def test_get_valid_values(client, use_testdb): + rv = client.get('/valid_values') + assert rv.status_code == 200 + valid_values = json.loads(rv.data, object_pairs_hook=OrderedDict)['valid_values'] + assert len(valid_values) == 3 + assert len(valid_values['key1']) == 2 + assert 'val11' in valid_values['key1'] and 'val21' in valid_values['key1'] + assert valid_values['akey'] == ['x'] + + +def test_get_valid_values_selective(client, use_testdb): + rv = client.get('/valid_values?key1=val21') + assert rv.status_code == 200 + valid_values = json.loads(rv.data, object_pairs_hook=OrderedDict)['valid_values'] + assert len(valid_values) == 3 + assert valid_values['key1'] == ['val21'] + assert len(valid_values['key2']) == 3 + assert 'val22' in valid_values['key2'] and 'val23' in valid_values['key2'] + assert valid_values['akey'] == ['x'] + + rv = client.get('/valid_values?key1=[val21]') + assert rv.status_code == 200 + valid_values = json.loads(rv.data, object_pairs_hook=OrderedDict)['valid_values'] + assert len(valid_values) == 3 + assert valid_values['key1'] == ['val21'] + assert len(valid_values['key2']) == 3 + assert 'val22' in valid_values['key2'] and 'val23' in valid_values['key2'] + assert valid_values['akey'] == ['x'] + + rv = client.get('/valid_values?key1=val21&key2=[val23,val24]') + assert rv.status_code == 200 + valid_values = json.loads(rv.data, object_pairs_hook=OrderedDict)['valid_values'] + assert len(valid_values) == 3 + assert valid_values['key1'] == ['val21'] + assert len(valid_values['key2']) == 2 + assert 'val23' in valid_values['key2'] and 'val24' in valid_values['key2'] + assert valid_values['akey'] == ['x'] + + def test_get_datasets(client, use_testdb): rv = client.get('/datasets') assert rv.status_code == 200 From e68f3106fd4c2eed66714ae341d2187597e349a4 Mon Sep 17 00:00:00 2001 From: NNicklas Boserup Date: Thu, 2 Sep 2021 11:31:01 +0200 Subject: [PATCH 3/5] Use schemas and update documentation --- terracotta/server/valid_values.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/terracotta/server/valid_values.py b/terracotta/server/valid_values.py index c33442b5..e4d92f03 100644 --- a/terracotta/server/valid_values.py +++ b/terracotta/server/valid_values.py @@ -28,6 +28,15 @@ def list_items(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Union[str return data +class KeyValueSchema(Schema): + valid_values = fields.Dict( + key=fields.String(example='key1'), + values=fields.List(fields.String(example='value1')), + required=True, + description='Array containing all available key combinations' + ) + + @METADATA_API.route('/valid_values', methods=['GET']) def get_valid_values() -> Response: """Get all valid values combinations (possibly when given a value for some keys) @@ -35,18 +44,18 @@ def get_valid_values() -> Response: get: summary: /datasets description: - Get keys of all available datasets that match given key constraint. - Constraints may be combined freely. Returns all known datasets if no query parameters + Get uniwue key values of all available datasets that match given key constraint. + Constraints may be combined freely. Returns all valid key values if no query parameters are given. parameters: - in: query - schema: DatasetOptionSchema + schema: KeyValueOptionSchema responses: 200: description: All available key combinations schema: type: array - items: DatasetSchema + items: KeyValueSchema 400: description: Query parameters contain unrecognized keys """ @@ -60,4 +69,5 @@ def get_valid_values() -> Response: 'valid_values': valid_values(keys) } - return jsonify(payload) + schema = KeyValueSchema() + return jsonify(schema.load(payload)) From 1d54c026fb3a72796bbe5d89c3f3a8a67adb52b0 Mon Sep 17 00:00:00 2001 From: NNicklas Boserup Date: Thu, 2 Sep 2021 11:32:11 +0200 Subject: [PATCH 4/5] Correct documentation --- terracotta/server/valid_values.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/terracotta/server/valid_values.py b/terracotta/server/valid_values.py index e4d92f03..b95fb30b 100644 --- a/terracotta/server/valid_values.py +++ b/terracotta/server/valid_values.py @@ -54,8 +54,7 @@ def get_valid_values() -> Response: 200: description: All available key combinations schema: - type: array - items: KeyValueSchema + type: KeyValueSchema 400: description: Query parameters contain unrecognized keys """ From 5c930dc02dd83a42d957150c5447cc4be6b39e3c Mon Sep 17 00:00:00 2001 From: NNicklas Boserup Date: Thu, 2 Sep 2021 11:33:07 +0200 Subject: [PATCH 5/5] Fix spelling in documentation --- terracotta/server/valid_values.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terracotta/server/valid_values.py b/terracotta/server/valid_values.py index b95fb30b..f19e4b2a 100644 --- a/terracotta/server/valid_values.py +++ b/terracotta/server/valid_values.py @@ -44,7 +44,7 @@ def get_valid_values() -> Response: get: summary: /datasets description: - Get uniwue key values of all available datasets that match given key constraint. + Get unique key values of all available datasets that match given key constraint. Constraints may be combined freely. Returns all valid key values if no query parameters are given. parameters: @@ -52,7 +52,7 @@ def get_valid_values() -> Response: schema: KeyValueOptionSchema responses: 200: - description: All available key combinations + description: All available key value combinations schema: type: KeyValueSchema 400: