Skip to content

Commit

Permalink
test: psycopg vs asyncpg performance
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasschaub committed Feb 5, 2025
1 parent e0cae09 commit 77ac5cb
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 17 deletions.
2 changes: 1 addition & 1 deletion ohsome_quality_api/geodatabase/get_matched_roads.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WITH bpoly AS (
SELECT
-- split mutlipolygon into list of polygons for more efficient processing
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
)
SELECT
SUM(cr.covered),
Expand Down
2 changes: 1 addition & 1 deletion ohsome_quality_api/geodatabase/select_building_area.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WITH bpoly AS (
SELECT
-- split mutlipolygon into list of polygons for more efficient processing
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
)
SELECT
SUM({table_name}.area) as area
Expand Down
20 changes: 18 additions & 2 deletions ohsome_quality_api/indicators/building_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def format_sources(self):


# alru needs hashable type, therefore, use string instead of Feature
@alru_cache
# @alru_cache
async def get_reference_building_area(feature_str: str, table_name: str) -> float:
"""Get the building area for a AoI from the EUBUCCO dataset."""
# TODO: https://github.com/GIScience/ohsome-quality-api/issues/746
Expand All @@ -295,11 +295,27 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa
geom = geojson.dumps(feature.geometry)
async with await psycopg.AsyncConnection.connect(dns) as con:
async with con.cursor() as cur:
await cur.execute(query.format(table_name=table_name), (geom,))
await cur.execute(query.format(table_name=table_name, geom=geom))
res = await cur.fetchone()
return res[0] or 0.0


async def get_reference_building_area_asyncpg(
feature_str: str, table_name: str
) -> float:
file_path = os.path.join(db_client.WORKING_DIR, "select_building_area.sql")
with open(file_path, "r") as file:
query = file.read()
feature = geojson.loads(feature_str)
geom = geojson.dumps(feature.geometry)

from ohsome_quality_api.geodatabase.client import get_connection

async with get_connection() as conn:
result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
return result[0] or 0.0


def load_datasets_metadata() -> dict:
file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
with open(file_path, "r") as f:
Expand Down
24 changes: 21 additions & 3 deletions ohsome_quality_api/indicators/road_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def format_sources(self):


# alru needs hashable type, therefore, use string instead of Feature
@alru_cache
# @alru_cache
async def get_matched_roadlengths(
feature_str: str,
table_name: str,
Expand All @@ -290,13 +290,31 @@ async def get_matched_roadlengths(
await cur.execute(
query.format(
table_name=table_name,
),
(geom,),
geom=geom,
)
)
res = await cur.fetchone()
return res[0], res[1]


async def get_matched_roadlengths_async(
feature_str: str,
table_name: str,
) -> tuple[float, float]:
file_path = os.path.join(db_client.WORKING_DIR, "get_matched_roads.sql")
with open(file_path, "r") as file:
query = file.read()
feature = geojson.loads(feature_str)
geom = geojson.dumps(feature.geometry)
table_name = table_name.replace(" ", "_")

from ohsome_quality_api.geodatabase.client import get_connection

async with get_connection() as conn:
result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
return result[0], result[1]


def load_datasets_metadata() -> dict:
file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
with open(file_path, "r") as f:
Expand Down
34 changes: 26 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ requests = "^2.32.0"
PyYAML = "^6.0"
toml = "^0.10.2"
httpx = "^0.23.0"
asyncpg = "^0.30"
vcrpy = "^4.1.1"
python-dateutil = "^2.8.2"
scipy = "^1.9.3"
Expand All @@ -46,10 +45,10 @@ plotly = "^5.16.1"
psycopg = {extras = ["binary"], version = "^3.1"}
async-lru = "^2.0.4"
approvaltests = "^12.1.0"
asyncpg = "^0.30.0"

[tool.poetry.dev-dependencies]
pre-commit = "^3.2.1"
pytest = "^7.2.2"
pytest-cov = "^4.0.0"
pytest-mock = "^3.11.1"

Expand All @@ -58,6 +57,8 @@ pytest-mock = "^3.11.1"

[tool.poetry.group.dev.dependencies]
ruff = "^0.7.3"
pytest = "^8.3.4"
pytest-asyncio = "^0.25.3"

[build-system]
requires = ["poetry-core"]
Expand All @@ -83,3 +84,4 @@ select = [
[tool.pytest.ini_options]
testpaths = ["tests"]
filterwarnings = ["ignore::DeprecationWarning"]
addopts = "-s" # show print statements
32 changes: 32 additions & 0 deletions tests/integrationtests/indicators/test_building_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,35 @@ def test_create_figure_building_area_zero(
assert isinstance(indicator.result.figure, dict)
assert indicator.result.figure["data"][0]["type"] == "pie"
pgo.Figure(indicator.result.figure)


@pytest.mark.asyncio
async def test_compare_database_libraries_execution_time(feature_germany_berlin):
import time
import geojson
from ohsome_quality_api.indicators.building_comparison.indicator import (
get_reference_building_area,
get_reference_building_area_asyncpg,
)

for dataset in ("eubucco", "microsoft_buildings"):
start_psycopg = time.time()
result_psycopg = await get_reference_building_area(
geojson.dumps(feature_germany_berlin),
dataset,
)
end_psycopg = time.time()
time_psycopg = end_psycopg - start_psycopg
print(time_psycopg) # ~4-5 sec

start_asyncpg = time.time()
result_asyncpg = await get_reference_building_area_asyncpg(
geojson.dumps(feature_germany_berlin),
dataset,
)
end_asyncpg = time.time()
time_asyncpg = end_asyncpg - start_asyncpg
print(time_asyncpg) # ~4-5 sec

assert result_psycopg == result_asyncpg
assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff
31 changes: 31 additions & 0 deletions tests/integrationtests/indicators/test_road_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,34 @@ def test_get_matched_roadlengths():
assert (1502620657, 1969546917) == asyncio.run(
get_matched_roadlengths(json.dumps(polygon), "microsoft_roads_midpoint")
)


@pytest.mark.asyncio
async def test_compare_database_libraries_execution_time(feature_germany_berlin):
import time
import geojson
from ohsome_quality_api.indicators.road_comparison.indicator import (
get_matched_roadlengths,
get_matched_roadlengths_asyncpg
)

start_psycopg = time.time()
result_psycopg = await get_matched_roadlengths(
geojson.dumps(feature_germany_berlin),
"microsoft_roads_midpoint",
)
end_psycopg = time.time()
time_psycopg = end_psycopg - start_psycopg
print(time_psycopg) # ~4-5 sec

start_asyncpg = time.time()
result_asyncpg = await get_matched_roadlengths_asyncpg(
geojson.dumps(feature_germany_berlin),
"microsoft_roads_midpoint",
)
end_asyncpg = time.time()
time_asyncpg = end_asyncpg - start_asyncpg
print(time_asyncpg) # ~4-5 sec

assert result_psycopg == result_asyncpg
assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff

0 comments on commit 77ac5cb

Please sign in to comment.