Skip to content

Commit

Permalink
Feature/issue 210 - Load large geometry polygons (#219)
Browse files Browse the repository at this point in the history
* add functions to handle null geometries and convert polygons to points

* update doi in docs

* fix fill null geometries

* fix tests and update changelog
  • Loading branch information
torimcd authored Aug 14, 2024
1 parent 5636520 commit 279f7bb
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 3 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Issue 201- Create table for tracking granule ingest status
- Issue 201 - Create table for tracking granule ingest status
- Issue 198 - Implement track ingest lambda function CMR and Hydrocron queries
- Issue 193 - Add new Dynamo table for prior lake data
### Changed
### Deprecated
### Removed
### Fixed
- Issue 210 - Features with large geometries cannot be loaded
### Security

## [1.3.0]
Expand Down
2 changes: 1 addition & 1 deletion docs/intro.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Hydrocron Documentation

[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11193245.svg)](https://doi.org/10.5281/zenodo.11193245)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11176233.svg)](https://doi.org/10.5281/zenodo.11176233)

Hydrocron is an API that repackages hydrology datasets from the Surface Water and Ocean Topography (SWOT) satellite into formats that make time-series analysis easier.

Expand Down
2 changes: 1 addition & 1 deletion docs/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ Data return size is limited to 6 MB. If your query response is larger than this

## Citation

Cite Hydrocron using the following DOI: [10.5281/zenodo.11193245](https://doi.org/10.5281/zenodo.11193245).
Cite Hydrocron using the following DOI: [10.5281/zenodo.11176233](https://doi.org/10.5281/zenodo.11176233).
48 changes: 48 additions & 0 deletions hydrocron/db/io/swot_shp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely import Polygon

from hydrocron.utils import constants


Expand Down Expand Up @@ -69,6 +71,10 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None):
with zipfile.ZipFile(filepath) as archive:
shp_xml_tree = ET.fromstring(archive.read(filename[:-4] + ".shp.xml"))

if 'LakeSP_Prior' in filename:
shp_file = handle_null_geometries(shp_file)
shp_file = convert_polygon_to_centerpoint(shp_file)

if obscure_data:
numeric_columns = shp_file[columns].select_dtypes(include=[np.number]).columns

Expand All @@ -89,6 +95,48 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None):
return items


def handle_null_geometries(geodf):
"""
Assign fill value polygon to any features that contain null geometries
Parameters
----------
geodf : geopandas.GeoDataFrame
the geodataframe containing the unpacked shapefile features
Returns
-------
geodf_no_nulls : geopandas.GeoDataFrame
the geodataframe with null geometries handled
"""

geodf['geometry'].fillna(
value=Polygon(constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS),
inplace=True)

return geodf


def convert_polygon_to_centerpoint(geodf_polygon):
"""
Converts polygon geometries to centerpoints. Used to reduce the size of lake features
Parameters
----------
geodf_polygon : geopandas.GeoDataFrame
the geodataframe containing the unpacked shapefile features with polygon feature types
Returns
-------
geodf_centerpoint : geopandas.GeoDataFrame
the geodataframe with point feature types and calculated centerpoint geometries
"""
geodf_centerpoint = geodf_polygon
geodf_centerpoint['geometry'] = geodf_polygon['geometry'].centroid

return geodf_centerpoint


def parse_metadata_from_shpxml(xml_elem):
"""
Read the prior database (SWORD or PLD) version number from the shp.xml file
Expand Down
73 changes: 73 additions & 0 deletions hydrocron/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,57 @@
"area_total_units": "km^2"
}

TEST_PLAKE_GEOM_DICT = {
"lake_id": "9130047472",
"geometry": (
(-50.14521191, 69.30222612), (-50.14550301, 69.30215475),
(-50.14612341, 69.30224091), (-50.14641609, 69.30216916),
(-50.14674362, 69.30232713), (-50.14677875, 69.30255678),
(-50.1471051, 69.30271506,), (-50.14713811, 69.30294524),
(-50.1474643, 69.30310355), (-50.14808323, 69.30319009),
(-50.14837586, 69.30311835), (-50.1489949, 69.30320486),
(-50.14932109, 69.30336317), (-50.14993974, 69.30344978),
(-50.14990648, 69.30321964), (-50.14987337, 69.30298947),
(-50.15016696, 69.30291749), (-50.15046021, 69.30284559),
(-50.15075352, 69.30277367), (-50.15137324, 69.30286),
(-50.15166675, 69.30278803), (-50.15228657, 69.30287434),
(-50.15261279, 69.30303264), (-50.15293903, 69.30319094),
(-50.15326526, 69.30334923), (-50.15359148, 69.30350753),
(-50.15362358, 69.30373794), (-50.15365621, 69.30396824),
(-50.15398295, 69.30412641), (-50.15460271, 69.30421273),
(-50.15492895, 69.30437102), (-50.15496171, 69.30460129),
(-50.15558151, 69.3046876), (-50.15590777, 69.3048459),
(-50.15623406, 69.30500419), (-50.156559, 69.30516281),
(-50.15626538, 69.30523481), (-50.15597192, 69.30530677),
(-50.15567856, 69.30537871), (-50.15538527, 69.30545062),
(-50.15541698, 69.30568115), (-50.15512387, 69.30575302),
(-50.15483084, 69.30582487), (-50.15421294, 69.30573808),
(-50.15392004, 69.3058099), (-50.15359524, 69.30565124),
(-50.15297631, 69.3055647), (-50.15268369, 69.30563645),
(-50.15235735, 69.30547818), (-50.15203056, 69.30532002),
(-50.15141283, 69.30523318), (-50.15112013, 69.30530495),
(-50.1505015, 69.30521834), (-50.15017531, 69.30506003),
(-50.15046779, 69.30498832), (-50.15043315, 69.30475852),
(-50.1501077, 69.30460003), (-50.15040003, 69.30452836),
(-50.15036646, 69.3042983), (-50.15065919, 69.30422653),
(-50.15095197, 69.30415475), (-50.15091834, 69.3039247),
(-50.15059214, 69.3037664), (-50.14997352, 69.30367978),
(-50.14968083, 69.30375154), (-50.14906209, 69.30366496),
(-50.14844328, 69.30357838), (-50.14815009, 69.30365026),
(-50.14785768, 69.30372195), (-50.14753153, 69.30356363),
(-50.14691338, 69.30347689), (-50.14629497, 69.30339021),
(-50.14567671, 69.3033035), (-50.14535057, 69.30314519),
(-50.14564277, 69.30307356), (-50.14626111, 69.30316024),
(-50.14655355, 69.30308855), (-50.14651952, 69.30285862),
(-50.14648605, 69.30262853), (-50.14615845, 69.30247059),
(-50.14553817, 69.30238438), (-50.14521191, 69.30222612))
}


TEST_PLAKE_ITEM_NO_GEO_DICT = {
"lake_id": "9120145452"
}

DB_TEST_PLAKE_TABLE_NAME = "hydrocron-swot-testlake-table"
API_TEST_PLAKE_TABLE_NAME = "hydrocron-swot-prior-lake-table"
TEST_PLAKE_PARTITION_KEY_NAME = 'lake_id'
Expand All @@ -84,6 +135,28 @@
SWOT_REACH_COLLECTION_VERSION = SWOT_REACH_COLLECTION_NAME[19:]
SWOT_NODE_COLLECTION_VERSION = SWOT_NODE_COLLECTION_NAME[19:]
SWOT_PRIOR_LAKE_COLLECTION_VERSION = SWOT_PRIOR_LAKE_COLLECTION_NAME[18:]
SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS = (
(-31.286028054129474, -27.207309600925463),
(-22.19117572552625, -28.812946226841383),
(-15.725605024311761, -29.21206933352415),
(-9.73430598260046, -29.228374663756604),
(-9.643271006951636, -27.233170541912884),
(-13.841716582541977, -27.37318973052451),
(-13.640561876091681, -21.64742387547294),
(-15.517427505373604, -21.61501976602659),
(-15.687806151090996, -28.090307824912784),
(-20.53678800850099, -28.156869804349213),
(-20.271711250148456, -24.421282696689033),
(-16.826147231682597, -24.69813060607345),
(-16.457685427420472, -21.588744491452957),
(-21.46664265437724, -21.33573507315593),
(-21.962226106320827, -27.948720914494196),
(-23.98629064034978, -27.80816909915125),
(-22.949633572250406, -20.8450893435173),
(-25.16962667009571, -20.772294910422403),
(-25.61120124377038, -25.40631583584434),
(-31.032731158967948, -24.810351227750644),
(-31.286028054129474, -27.207309600925463))

SWOT_REACH_PARTITION_KEY = "reach_id"
SWOT_NODE_PARTITION_KEY = "node_id"
Expand Down
Binary file not shown.
43 changes: 43 additions & 0 deletions tests/test_io_swot_reach_node_shp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
"""
from datetime import datetime, timedelta, timezone
import pytz
import numpy as np
from shapely import Polygon, Point, geometry, wkt, centroid
from hydrocron.utils import constants

from hydrocron.db.io import swot_shp
Expand Down Expand Up @@ -79,6 +81,47 @@ def test_read_lake_shapefile():
assert val == items[4596][key]


def test_lake_null_geometry():
"""
Tests replacing null geometry with fillvalue for lake polygons
"""
items = swot_shp.read_shapefile(
constants.TEST_PLAKE_SHAPEFILE_PATH,
obscure_data=False,
columns=constants.PRIOR_LAKE_DATA_COLUMNS)

geojson = geometry.mapping(wkt.loads(items[0]['geometry']))
coords_0 = np.round(np.array(geojson['coordinates']), 3)

assert str(Point(coords_0) == str(centroid(Polygon(
constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS))))


def test_lake_centerpoints():
"""
Tests replacing polygons with centerpoints
"""
items = swot_shp.read_shapefile(
constants.TEST_PLAKE_SHAPEFILE_PATH,
obscure_data=False,
columns=constants.PRIOR_LAKE_DATA_COLUMNS)

geojson = geometry.mapping(wkt.loads(items[0]['geometry']))
coords_0 = np.round(np.array(geojson['coordinates']), 3)

assert str(Point(coords_0) == str(centroid(Polygon(
constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS))))

geojson_4596 = geometry.mapping(wkt.loads(items[4596]['geometry']))
coords_4596 = np.round(np.array(geojson_4596['coordinates']), 3)

geojson_test_4596 = geometry.mapping(centroid(Polygon(
constants.TEST_PLAKE_GEOM_DICT['geometry'])))
test_4596 = np.round(np.array(geojson_test_4596['coordinates']), 3)

assert str(Point(coords_4596)) == str(Point(test_4596))


def test_read_shapefile_obscured():
"""
Tests reading attributes from the shapefile with real values obscured
Expand Down

0 comments on commit 279f7bb

Please sign in to comment.