test: psycopg vs asyncpg performance

GIScience · Feb 5, 2025 · 77ac5cb · 77ac5cb
1 parent e0cae09
commit 77ac5cb
Show file tree

Hide file tree

Showing 8 changed files with 134 additions and 17 deletions.
diff --git a/ohsome_quality_api/geodatabase/get_matched_roads.sql b/ohsome_quality_api/geodatabase/get_matched_roads.sql
@@ -1,7 +1,7 @@
 WITH bpoly AS (
     SELECT
         -- split mutlipolygon into list of polygons for more efficient processing
-        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
+        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
 )
 SELECT
     SUM(cr.covered),

diff --git a/ohsome_quality_api/geodatabase/select_building_area.sql b/ohsome_quality_api/geodatabase/select_building_area.sql
@@ -1,7 +1,7 @@
 WITH bpoly AS (
     SELECT
         -- split mutlipolygon into list of polygons for more efficient processing
-        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
+        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
 )
 SELECT
     SUM({table_name}.area) as area

diff --git a/ohsome_quality_api/indicators/building_comparison/indicator.py b/ohsome_quality_api/indicators/building_comparison/indicator.py
@@ -277,7 +277,7 @@ def format_sources(self):
 
 
 # alru needs hashable type, therefore, use string instead of Feature
-@alru_cache
+# @alru_cache
 async def get_reference_building_area(feature_str: str, table_name: str) -> float:
     """Get the building area for a AoI from the EUBUCCO dataset."""
     # TODO: https://github.com/GIScience/ohsome-quality-api/issues/746
@@ -295,11 +295,27 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa
     geom = geojson.dumps(feature.geometry)
     async with await psycopg.AsyncConnection.connect(dns) as con:
         async with con.cursor() as cur:
-            await cur.execute(query.format(table_name=table_name), (geom,))
+            await cur.execute(query.format(table_name=table_name, geom=geom))
             res = await cur.fetchone()
     return res[0] or 0.0
 
 
+async def get_reference_building_area_asyncpg(
+    feature_str: str, table_name: str
+) -> float:
+    file_path = os.path.join(db_client.WORKING_DIR, "select_building_area.sql")
+    with open(file_path, "r") as file:
+        query = file.read()
+    feature = geojson.loads(feature_str)
+    geom = geojson.dumps(feature.geometry)
+
+    from ohsome_quality_api.geodatabase.client import get_connection
+
+    async with get_connection() as conn:
+        result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
+    return result[0] or 0.0
+
+
 def load_datasets_metadata() -> dict:
     file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
     with open(file_path, "r") as f:

diff --git a/ohsome_quality_api/indicators/road_comparison/indicator.py b/ohsome_quality_api/indicators/road_comparison/indicator.py
@@ -267,7 +267,7 @@ def format_sources(self):
 
 
 # alru needs hashable type, therefore, use string instead of Feature
-@alru_cache
+# @alru_cache
 async def get_matched_roadlengths(
     feature_str: str,
     table_name: str,
@@ -290,13 +290,31 @@ async def get_matched_roadlengths(
             await cur.execute(
                 query.format(
                     table_name=table_name,
-                ),
-                (geom,),
+                    geom=geom,
+                )
             )
             res = await cur.fetchone()
     return res[0], res[1]
 
 
+async def get_matched_roadlengths_async(
+    feature_str: str,
+    table_name: str,
+) -> tuple[float, float]:
+    file_path = os.path.join(db_client.WORKING_DIR, "get_matched_roads.sql")
+    with open(file_path, "r") as file:
+        query = file.read()
+    feature = geojson.loads(feature_str)
+    geom = geojson.dumps(feature.geometry)
+    table_name = table_name.replace(" ", "_")
+
+    from ohsome_quality_api.geodatabase.client import get_connection
+
+    async with get_connection() as conn:
+        result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
+    return result[0], result[1]
+
+
 def load_datasets_metadata() -> dict:
     file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
     with open(file_path, "r") as f:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,6 @@ requests = "^2.32.0"
 PyYAML = "^6.0"
 toml = "^0.10.2"
 httpx = "^0.23.0"
-asyncpg = "^0.30"
 vcrpy = "^4.1.1"
 python-dateutil = "^2.8.2"
 scipy = "^1.9.3"
@@ -46,10 +45,10 @@ plotly = "^5.16.1"
 psycopg = {extras = ["binary"], version = "^3.1"}
 async-lru = "^2.0.4"
 approvaltests = "^12.1.0"
+asyncpg = "^0.30.0"
 
 [tool.poetry.dev-dependencies]
 pre-commit = "^3.2.1"
-pytest = "^7.2.2"
 pytest-cov = "^4.0.0"
 pytest-mock = "^3.11.1"
 
@@ -58,6 +57,8 @@ pytest-mock = "^3.11.1"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "^0.7.3"
+pytest = "^8.3.4"
+pytest-asyncio = "^0.25.3"
 
 [build-system]
 requires = ["poetry-core"]
@@ -83,3 +84,4 @@ select = [
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 filterwarnings = ["ignore::DeprecationWarning"]
+addopts = "-s"  # show print statements
diff --git a/tests/integrationtests/indicators/test_building_comparison.py b/tests/integrationtests/indicators/test_building_comparison.py
@@ -364,3 +364,35 @@ def test_create_figure_building_area_zero(
         assert isinstance(indicator.result.figure, dict)
         assert indicator.result.figure["data"][0]["type"] == "pie"
         pgo.Figure(indicator.result.figure)
+
+
+@pytest.mark.asyncio
+async def test_compare_database_libraries_execution_time(feature_germany_berlin):
+    import time
+    import geojson
+    from ohsome_quality_api.indicators.building_comparison.indicator import (
+        get_reference_building_area,
+        get_reference_building_area_asyncpg,
+    )
+
+    for dataset in ("eubucco", "microsoft_buildings"):
+        start_psycopg = time.time()
+        result_psycopg = await get_reference_building_area(
+            geojson.dumps(feature_germany_berlin),
+            dataset,
+        )
+        end_psycopg = time.time()
+        time_psycopg = end_psycopg - start_psycopg
+        print(time_psycopg)  # ~4-5 sec
+
+        start_asyncpg = time.time()
+        result_asyncpg = await get_reference_building_area_asyncpg(
+            geojson.dumps(feature_germany_berlin),
+            dataset,
+        )
+        end_asyncpg = time.time()
+        time_asyncpg = end_asyncpg - start_asyncpg
+        print(time_asyncpg)  # ~4-5 sec
+
+        assert result_psycopg == result_asyncpg
+        assert time_psycopg == pytest.approx(time_asyncpg, abs=1)  # allow 1 seconds diff
diff --git a/tests/integrationtests/indicators/test_road_comparison.py b/tests/integrationtests/indicators/test_road_comparison.py
@@ -244,3 +244,34 @@ def test_get_matched_roadlengths():
     assert (1502620657, 1969546917) == asyncio.run(
         get_matched_roadlengths(json.dumps(polygon), "microsoft_roads_midpoint")
     )
+
+
+@pytest.mark.asyncio
+async def test_compare_database_libraries_execution_time(feature_germany_berlin):
+    import time
+    import geojson
+    from ohsome_quality_api.indicators.road_comparison.indicator import (
+        get_matched_roadlengths,
+        get_matched_roadlengths_asyncpg
+    )
+
+    start_psycopg = time.time()
+    result_psycopg = await get_matched_roadlengths(
+        geojson.dumps(feature_germany_berlin),
+        "microsoft_roads_midpoint",
+    )
+    end_psycopg = time.time()
+    time_psycopg = end_psycopg - start_psycopg
+    print(time_psycopg)  # ~4-5 sec
+
+    start_asyncpg = time.time()
+    result_asyncpg = await get_matched_roadlengths_asyncpg(
+        geojson.dumps(feature_germany_berlin),
+        "microsoft_roads_midpoint",
+    )
+    end_asyncpg = time.time()
+    time_asyncpg = end_asyncpg - start_asyncpg
+    print(time_asyncpg)  # ~4-5 sec
+
+    assert result_psycopg == result_asyncpg
+    assert time_psycopg == pytest.approx(time_asyncpg, abs=1)  # allow 1 seconds diff