From eb66034e92033d1f5732def7c27272b071952aeb Mon Sep 17 00:00:00 2001 From: Sam <78538841+spwoodcock@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:45:01 +0800 Subject: [PATCH] fix: simplify project features response & prep for flatgeobuf osm extracts (#932) * fix: simplify response when getting project features * refactor: update get_osm_extracts func for clarity --- src/backend/app/projects/project_crud.py | 56 ++++++++++++++++++---- src/backend/app/projects/project_routes.py | 14 +++--- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index ddd7a013df..a972d644ff 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -576,6 +576,12 @@ def remove_z_dimension(coord): def get_osm_extracts(boundary: str): + """Request an extract from raw-data-api and extract the file contents. + + - The query is posted to raw-data-api and job initiated for fetching the extract. + - The status of the job is polled every few seconds, until 'SUCCESS' is returned. + - The resulting zip file is downloaded, extracted, and data returned. + """ # Filters for osm extracts query = { "filters": { @@ -587,19 +593,27 @@ def get_osm_extracts(boundary: str): } } + # Boundary to extract data for json_boundary = json.loads(boundary) - if json_boundary.get("features", None) is not None: query["geometry"] = json_boundary # query["geometry"] = json_boundary["features"][0]["geometry"] - else: query["geometry"] = json_boundary + # Filename to generate + query["fileName"] = "extract" + # File format to generate + query["outputType"] = "geojson" + extract_filename = f'{query["fileName"]}.{query["outputType"]}' + log.debug(f"Setting data extract file name to: {extract_filename}") + + log.debug(f"Query for raw data api: {query}") base_url = settings.UNDERPASS_API_URL query_url = f"{base_url}/snapshot/" headers = {"accept": "application/json", "Content-Type": "application/json"} + # Send the request to raw data api result = requests.post(query_url, data=json.dumps(query), headers=headers) if result.status_code == 200: @@ -607,23 +621,26 @@ def get_osm_extracts(boundary: str): else: return False + # Check status of task (PENDING, or SUCCESS) task_url = f"{base_url}/tasks/status/{task_id}" - # extracts = requests.get(task_url) while True: result = requests.get(task_url, headers=headers) if result.json()["status"] == "PENDING": - time.sleep(1) + # Wait 2 seconds before polling again + time.sleep(2) elif result.json()["status"] == "SUCCESS": break + # TODO update code to generate fgb file format + # then input the download_url directly into our database + # (no need to download the file and extract) zip_url = result.json()["result"]["download_url"] - zip_url result = requests.get(zip_url, headers=headers) # result.content fp = BytesIO(result.content) zfp = zipfile.ZipFile(fp, "r") - zfp.extract("Export.geojson", "/tmp/") - data = json.loads(zfp.read("Export.geojson")) + zfp.extract(extract_filename, "/tmp/") + data = json.loads(zfp.read(extract_filename)) for feature in data["features"]: properties = feature["properties"] @@ -721,6 +738,10 @@ def split_polygon_into_tasks( db.add(db_task) db.commit() + # Get the data extract from raw-data-api + # Input into DbBuildings and DbOsmLines + # TODO update to use flatgeobuf file directly + # No need to store in our database if not has_data_extracts: data = get_osm_extracts(json.dumps(boundary_data)) if not data: @@ -1655,7 +1676,7 @@ def get_task_geometry(db: Session, project_id: int): async def get_project_features_geojson(db: Session, project_id: int): - # Get the geojson of those features for this task. + """Get a geojson of all features for a task.""" query = text( f"""SELECT jsonb_build_object( 'type', 'FeatureCollection', @@ -1676,6 +1697,25 @@ async def get_project_features_geojson(db: Session, project_id: int): result = db.execute(query) features = result.fetchone()[0] + # Simplify the geojson to send (strip project_id & task_id to reduce size) + # TODO coordinate with frontend to remove the first level geometry key + # Only return geojson with properties: + # {'type': 'feature', 'geometry': {...}, 'properties': {...}} + features = [ + { + "id": feature["id"], + "geometry": { + "id": feature["geometry"]["id"], + "type": feature["geometry"]["type"], + "geometry": feature["geometry"]["geometry"], + "properties": { + "id": feature["geometry"]["properties"]["id"], + "building": feature["geometry"]["properties"]["building"], + }, + }, + } + for feature in features + ] return features diff --git a/src/backend/app/projects/project_routes.py b/src/backend/app/projects/project_routes.py index b1966bff09..83875c5b92 100644 --- a/src/backend/app/projects/project_routes.py +++ b/src/backend/app/projects/project_routes.py @@ -673,16 +673,16 @@ def get_project_features( task_id: int = None, db: Session = Depends(database.get_db), ): - """Get api for fetching all the features of a project. + """Fetch all the features for a project. - This endpoint allows you to get all the features of a project. + The features are generated from raw-data-api - ## Request Body - - `project_id` (int): the project's id. Required. - - ## Response - - Returns a JSON object containing a list of features. + Args: + project_id (int): The project id. + task_id (int): The task id. + Returns: + feature(json): JSON object containing a list of features """ features = project_crud.get_project_features(db, project_id, task_id) return features