Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trino experiments #864

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ohsome_quality_api/api/request_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Field,
field_validator,
)
from pydantic.json_schema import SkipJsonSchema

from ohsome_quality_api.attributes.definitions import AttributeEnum
from ohsome_quality_api.topics.definitions import TopicEnum, get_topic_preset
Expand Down Expand Up @@ -68,6 +69,8 @@ class IndicatorRequest(BaseBpolys):
alias="topic",
)
include_figure: bool = True
# feature flag to use SQL queries against Trino instead of ohsome API
trino: SkipJsonSchema[bool] = False

@field_validator("topic")
@classmethod
Expand Down
1 change: 1 addition & 0 deletions ohsome_quality_api/api/response_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class BaseResponse(BaseConfig):
attribution: dict[str, str] = {"url": ATTRIBUTION_URL}


# TODO: add sql_filter
class TopicMetadata(BaseConfig):
name: str
description: str
Expand Down
70 changes: 56 additions & 14 deletions ohsome_quality_api/indicators/attribute_completeness/indicator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import logging
import os
from string import Template

import dateutil.parser
import dateutil
import plotly.graph_objects as go
from geojson import Feature

Expand All @@ -12,6 +14,10 @@
from ohsome_quality_api.indicators.base import BaseIndicator
from ohsome_quality_api.ohsome import client as ohsome_client
from ohsome_quality_api.topics.models import BaseTopic as Topic
from ohsome_quality_api.trino import client as trino_client
from ohsome_quality_api.utils.helper_geo import get_bounding_box

WORKING_DIR = os.path.dirname(os.path.abspath(__file__))


class AttributeCompleteness(BaseIndicator):
Expand Down Expand Up @@ -46,8 +52,9 @@ def __init__(
attribute_keys: list[str] | None = None,
attribute_filter: str | None = None,
attribute_title: str | None = None,
trino: bool = False, # Feature flag to use SQL instead of ohsome API queries
) -> None:
super().__init__(topic=topic, feature=feature)
super().__init__(topic=topic, feature=feature, trino=trino)
self.threshold_yellow = 0.75
self.threshold_red = 0.25
self.attribute_keys = attribute_keys
Expand All @@ -56,7 +63,9 @@ def __init__(
self.absolute_value_1 = None
self.absolute_value_2 = None
self.description = None
if self.attribute_keys:
if self.trino:
self.attribute_filter = attribute_filter
elif self.attribute_keys:
self.attribute_filter = build_attribute_filter(
self.attribute_keys,
self.topic.key,
Expand All @@ -74,17 +83,50 @@ def __init__(
)

async def preprocess(self) -> None:
# Get attribute filter
response = await ohsome_client.query(
self.topic,
self.feature,
attribute_filter=self.attribute_filter,
)
timestamp = response["ratioResult"][0]["timestamp"]
self.result.timestamp_osm = dateutil.parser.isoparse(timestamp)
self.result.value = response["ratioResult"][0]["ratio"]
self.absolute_value_1 = response["ratioResult"][0]["value"]
self.absolute_value_2 = response["ratioResult"][0]["value2"]
if self.trino:
filter = self.topic.sql_filter
file_path = os.path.join(WORKING_DIR, "query.sql")
with open(file_path, "r") as file:
template = file.read()

bounding_box = get_bounding_box(self.feature)
geometry = json.dumps(self.feature["geometry"])

sql = template.format(
bounding_box=bounding_box,
geometry=geometry,
filter=filter,
)
query = await trino_client.query(sql)
results = await trino_client.fetch(query)
# TODO: Check for None
self.absolute_value_1 = results[0][0]

filter = self.topic.sql_filter + " AND " + self.attribute_filter
sql = template.format(
bounding_box=bounding_box,
geometry=geometry,
filter=filter,
)
query = await trino_client.query(sql)
results = await trino_client.fetch(query)
self.absolute_value_2 = results[0][0]

# timestamp = response["ratioResult"][0]["timestamp"]
# self.result.timestamp_osm = dateutil.parser.isoparse(timestamp)
self.result.value = self.absolute_value_2 / self.absolute_value_1
else:
# Get attribute filter
response = await ohsome_client.query(
self.topic,
self.feature,
attribute_filter=self.attribute_filter,
)
timestamp = response["ratioResult"][0]["timestamp"]
self.result.timestamp_osm = dateutil.parser.isoparse(timestamp)
self.result.value = response["ratioResult"][0]["ratio"]
self.absolute_value_1 = response["ratioResult"][0]["value"]
self.absolute_value_2 = response["ratioResult"][0]["value2"]

def calculate(self) -> None:
# result (ratio) can be NaN if no features matching filter1
Expand Down
20 changes: 20 additions & 0 deletions ohsome_quality_api/indicators/attribute_completeness/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
WITH bpoly AS (
SELECT
to_geometry (from_geojson_geometry ('{geometry}')) AS geometry
)
SELECT
Sum(
CASE WHEN ST_Within (ST_GeometryFromText (contributions.geometry), bpoly.geometry) THEN
length
ELSE
Cast(ST_Length (ST_Intersection (ST_GeometryFromText (contributions.geometry), bpoly.geometry)) AS integer)
END) AS length
FROM
bpoly,
sotm2024_iceberg.geo_sort.contributions AS contributions
WHERE
status = 'latest'
AND ST_Intersects (bpoly.geometry, ST_GeometryFromText (contributions.geometry))
AND {filter}
AND (bbox.xmax <= {bounding_box.lon_max} AND bbox.xmin >= {bounding_box.lon_min})
AND (bbox.ymax <= {bounding_box.lat_max} AND bbox.ymin >= {bounding_box.lat_min})
7 changes: 6 additions & 1 deletion ohsome_quality_api/indicators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
self,
topic: Topic,
feature: Feature,
trino: bool,
) -> None:
self.metadata: IndicatorMetadata = get_indicator(
camel_to_hyphen(type(self).__name__)
Expand All @@ -39,6 +40,7 @@ def __init__(
self.result: Result = Result(
description=self.templates.label_description["undefined"],
)
self.trino: bool = False
self._get_default_figure()

def as_dict(self, include_data: bool = False, exclude_label: bool = False) -> dict:
Expand All @@ -50,7 +52,10 @@ def as_dict(self, include_data: bool = False, exclude_label: bool = False) -> di
"metadata": self.metadata.model_dump(by_alias=True),
"topic": self.topic.model_dump(
by_alias=True,
exclude={"ratio_filter"},
exclude={
"ratio_filter",
"sql_filter",
}, # TODO: do not exclude SQL filter
),
"result": result,
**self.feature.properties,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ def __init__(
self,
topic: BaseTopic,
feature: Feature,
trino: bool = False,
) -> None:
super().__init__(
topic=topic,
feature=feature,
)
super().__init__(topic=topic, feature=feature, trino=trino)
# The result is the ratio of area within coverage (between 0-1) or an empty list
#
# TODO: Evaluate thresholds
Expand Down
3 changes: 2 additions & 1 deletion ohsome_quality_api/indicators/currentness/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ def __init__(
self,
topic: Topic,
feature: Feature,
trino: bool = False,
) -> None:
super().__init__(topic=topic, feature=feature)
super().__init__(topic=topic, feature=feature, trino=trino)
# thresholds for binning in months
self.up_to_date, self.out_of_date, self.th_source = load_thresholds(
self.topic.key
Expand Down
4 changes: 2 additions & 2 deletions ohsome_quality_api/indicators/density/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@


class Density(BaseIndicator):
def __init__(self, topic: Topic, feature: Feature) -> None:
super().__init__(topic=topic, feature=feature)
def __init__(self, topic: Topic, feature: Feature, trino: bool = False) -> None:
super().__init__(topic=topic, feature=feature, trino=trino)
self.threshold_yellow = 30
self.threshold_red = 10
self.area_sqkm = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ def __init__(
self,
topic: BaseTopic,
feature: Feature,
trino: bool = False,
time_range: str = "2008-01-01//P1M",
) -> None:
super().__init__(topic=topic, feature=feature)
super().__init__(topic=topic, feature=feature, trino=trino)

self.time_range: str = time_range

Expand Down
4 changes: 2 additions & 2 deletions ohsome_quality_api/indicators/minimal/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@


class Minimal(BaseIndicator):
def __init__(self, topic: Topic, feature: Feature) -> None:
super().__init__(topic=topic, feature=feature)
def __init__(self, topic: Topic, feature: Feature, trino: bool = False) -> None:
super().__init__(topic=topic, feature=feature, trino=trino)
self.count = 0

async def preprocess(self) -> None:
Expand Down
7 changes: 2 additions & 5 deletions ohsome_quality_api/indicators/road_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,11 @@ class RoadComparison(BaseIndicator):
to the total length of reference roads.
"""

def __init__(
self,
topic: BaseTopic,
feature: Feature,
) -> None:
def __init__(self, topic: BaseTopic, feature: Feature, trino: bool = False) -> None:
super().__init__(
topic=topic,
feature=feature,
trino=trino,
)
# TODO: Evaluate thresholds
self.th_high = 0.85 # Above or equal to this value label should be green
Expand Down
4 changes: 4 additions & 0 deletions ohsome_quality_api/oqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ async def create_indicator(
bpolys: FeatureCollection,
topic: TopicData | TopicDefinition,
include_figure: bool = True,
trino: bool = False,
*args,
**kwargs,
) -> list[Indicator]:
Expand Down Expand Up @@ -46,6 +47,7 @@ async def create_indicator(
feature,
topic,
include_figure,
trino,
*args,
**kwargs,
)
Expand All @@ -58,6 +60,7 @@ async def _create_indicator(
feature: Feature,
topic: Topic,
include_figure: bool = True,
trino: bool = False,
*args,
**kwargs,
) -> Indicator:
Expand All @@ -71,6 +74,7 @@ async def _create_indicator(
indicator = indicator_class(
topic,
feature,
trino=trino,
*args,
**kwargs,
)
Expand Down
2 changes: 1 addition & 1 deletion ohsome_quality_api/topics/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def load_topic_presets() -> dict[str, TopicDefinition]:
raw = yaml.safe_load(f)
topics = {}
for k, v in raw.items():
v["filter"] = v.pop("filter")
v["filter"] = v.pop("filter") # TODO
v["key"] = k
topics[k] = TopicDefinition(**v)
return topics
Expand Down
1 change: 1 addition & 0 deletions ohsome_quality_api/topics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TopicDefinition(BaseTopic):
projects: list[ProjectEnum]
source: str | None = None
ratio_filter: str | None = None
sql_filter: str | None = None


class TopicData(BaseTopic):
Expand Down
7 changes: 7 additions & 0 deletions ohsome_quality_api/topics/presets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ car-roads:
aggregation_type: length
filter: >-
highway in (motorway, trunk, primary, secondary, tertiary, residential, service, living_street, trunk_link, motorway_link, primary_link, secondary_link, tertiary_link) and type:way
sql_filter: >-
osm_type = 'way'
AND element_at (contributions.tags, 'highway') IS NOT NULL
AND contributions.tags['highway'] IN ('motorway', 'trunk',
'motorway_link', 'trunk_link', 'primary', 'primary_link', 'secondary',
'secondary_link', 'tertiary', 'tertiary_link', 'unclassified',
'residential')
indicators:
- mapping-saturation
- attribute-completeness
Expand Down
Loading