diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3f657..bd850fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - - Issue 201- Create table for tracking granule ingest status + - Issue 201 - Create table for tracking granule ingest status - Issue 198 - Implement track ingest lambda function CMR and Hydrocron queries - Issue 193 - Add new Dynamo table for prior lake data ### Changed ### Deprecated ### Removed ### Fixed + - Issue 210 - Features with large geometries cannot be loaded ### Security ## [1.3.0] diff --git a/docs/intro.md b/docs/intro.md index 7a29bc7..1ff8330 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -1,6 +1,6 @@ # Hydrocron Documentation -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11193245.svg)](https://doi.org/10.5281/zenodo.11193245) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11176233.svg)](https://doi.org/10.5281/zenodo.11176233) Hydrocron is an API that repackages hydrology datasets from the Surface Water and Ocean Topography (SWOT) satellite into formats that make time-series analysis easier. diff --git a/docs/overview.md b/docs/overview.md index 2329bf3..eba2ffb 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -20,4 +20,4 @@ Data return size is limited to 6 MB. If your query response is larger than this ## Citation -Cite Hydrocron using the following DOI: [10.5281/zenodo.11193245](https://doi.org/10.5281/zenodo.11193245). +Cite Hydrocron using the following DOI: [10.5281/zenodo.11176233](https://doi.org/10.5281/zenodo.11176233). diff --git a/hydrocron/db/io/swot_shp.py b/hydrocron/db/io/swot_shp.py index 6d11234..6c5466f 100644 --- a/hydrocron/db/io/swot_shp.py +++ b/hydrocron/db/io/swot_shp.py @@ -13,6 +13,8 @@ import geopandas as gpd import numpy as np import pandas as pd +from shapely import Polygon + from hydrocron.utils import constants @@ -69,6 +71,10 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None): with zipfile.ZipFile(filepath) as archive: shp_xml_tree = ET.fromstring(archive.read(filename[:-4] + ".shp.xml")) + if 'LakeSP_Prior' in filename: + shp_file = handle_null_geometries(shp_file) + shp_file = convert_polygon_to_centerpoint(shp_file) + if obscure_data: numeric_columns = shp_file[columns].select_dtypes(include=[np.number]).columns @@ -89,6 +95,48 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None): return items +def handle_null_geometries(geodf): + """ + Assign fill value polygon to any features that contain null geometries + + Parameters + ---------- + geodf : geopandas.GeoDataFrame + the geodataframe containing the unpacked shapefile features + + Returns + ------- + geodf_no_nulls : geopandas.GeoDataFrame + the geodataframe with null geometries handled + """ + + geodf['geometry'].fillna( + value=Polygon(constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS), + inplace=True) + + return geodf + + +def convert_polygon_to_centerpoint(geodf_polygon): + """ + Converts polygon geometries to centerpoints. Used to reduce the size of lake features + + Parameters + ---------- + geodf_polygon : geopandas.GeoDataFrame + the geodataframe containing the unpacked shapefile features with polygon feature types + + Returns + ------- + geodf_centerpoint : geopandas.GeoDataFrame + the geodataframe with point feature types and calculated centerpoint geometries + """ + geodf_centerpoint = geodf_polygon + geodf_centerpoint['geometry'] = geodf_polygon['geometry'].centroid + + return geodf_centerpoint + + def parse_metadata_from_shpxml(xml_elem): """ Read the prior database (SWORD or PLD) version number from the shp.xml file diff --git a/hydrocron/utils/constants.py b/hydrocron/utils/constants.py index e8f2058..93cc789 100644 --- a/hydrocron/utils/constants.py +++ b/hydrocron/utils/constants.py @@ -60,6 +60,57 @@ "area_total_units": "km^2" } +TEST_PLAKE_GEOM_DICT = { + "lake_id": "9130047472", + "geometry": ( + (-50.14521191, 69.30222612), (-50.14550301, 69.30215475), + (-50.14612341, 69.30224091), (-50.14641609, 69.30216916), + (-50.14674362, 69.30232713), (-50.14677875, 69.30255678), + (-50.1471051, 69.30271506,), (-50.14713811, 69.30294524), + (-50.1474643, 69.30310355), (-50.14808323, 69.30319009), + (-50.14837586, 69.30311835), (-50.1489949, 69.30320486), + (-50.14932109, 69.30336317), (-50.14993974, 69.30344978), + (-50.14990648, 69.30321964), (-50.14987337, 69.30298947), + (-50.15016696, 69.30291749), (-50.15046021, 69.30284559), + (-50.15075352, 69.30277367), (-50.15137324, 69.30286), + (-50.15166675, 69.30278803), (-50.15228657, 69.30287434), + (-50.15261279, 69.30303264), (-50.15293903, 69.30319094), + (-50.15326526, 69.30334923), (-50.15359148, 69.30350753), + (-50.15362358, 69.30373794), (-50.15365621, 69.30396824), + (-50.15398295, 69.30412641), (-50.15460271, 69.30421273), + (-50.15492895, 69.30437102), (-50.15496171, 69.30460129), + (-50.15558151, 69.3046876), (-50.15590777, 69.3048459), + (-50.15623406, 69.30500419), (-50.156559, 69.30516281), + (-50.15626538, 69.30523481), (-50.15597192, 69.30530677), + (-50.15567856, 69.30537871), (-50.15538527, 69.30545062), + (-50.15541698, 69.30568115), (-50.15512387, 69.30575302), + (-50.15483084, 69.30582487), (-50.15421294, 69.30573808), + (-50.15392004, 69.3058099), (-50.15359524, 69.30565124), + (-50.15297631, 69.3055647), (-50.15268369, 69.30563645), + (-50.15235735, 69.30547818), (-50.15203056, 69.30532002), + (-50.15141283, 69.30523318), (-50.15112013, 69.30530495), + (-50.1505015, 69.30521834), (-50.15017531, 69.30506003), + (-50.15046779, 69.30498832), (-50.15043315, 69.30475852), + (-50.1501077, 69.30460003), (-50.15040003, 69.30452836), + (-50.15036646, 69.3042983), (-50.15065919, 69.30422653), + (-50.15095197, 69.30415475), (-50.15091834, 69.3039247), + (-50.15059214, 69.3037664), (-50.14997352, 69.30367978), + (-50.14968083, 69.30375154), (-50.14906209, 69.30366496), + (-50.14844328, 69.30357838), (-50.14815009, 69.30365026), + (-50.14785768, 69.30372195), (-50.14753153, 69.30356363), + (-50.14691338, 69.30347689), (-50.14629497, 69.30339021), + (-50.14567671, 69.3033035), (-50.14535057, 69.30314519), + (-50.14564277, 69.30307356), (-50.14626111, 69.30316024), + (-50.14655355, 69.30308855), (-50.14651952, 69.30285862), + (-50.14648605, 69.30262853), (-50.14615845, 69.30247059), + (-50.14553817, 69.30238438), (-50.14521191, 69.30222612)) +} + + +TEST_PLAKE_ITEM_NO_GEO_DICT = { + "lake_id": "9120145452" +} + DB_TEST_PLAKE_TABLE_NAME = "hydrocron-swot-testlake-table" API_TEST_PLAKE_TABLE_NAME = "hydrocron-swot-prior-lake-table" TEST_PLAKE_PARTITION_KEY_NAME = 'lake_id' @@ -84,6 +135,28 @@ SWOT_REACH_COLLECTION_VERSION = SWOT_REACH_COLLECTION_NAME[19:] SWOT_NODE_COLLECTION_VERSION = SWOT_NODE_COLLECTION_NAME[19:] SWOT_PRIOR_LAKE_COLLECTION_VERSION = SWOT_PRIOR_LAKE_COLLECTION_NAME[18:] +SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS = ( + (-31.286028054129474, -27.207309600925463), + (-22.19117572552625, -28.812946226841383), + (-15.725605024311761, -29.21206933352415), + (-9.73430598260046, -29.228374663756604), + (-9.643271006951636, -27.233170541912884), + (-13.841716582541977, -27.37318973052451), + (-13.640561876091681, -21.64742387547294), + (-15.517427505373604, -21.61501976602659), + (-15.687806151090996, -28.090307824912784), + (-20.53678800850099, -28.156869804349213), + (-20.271711250148456, -24.421282696689033), + (-16.826147231682597, -24.69813060607345), + (-16.457685427420472, -21.588744491452957), + (-21.46664265437724, -21.33573507315593), + (-21.962226106320827, -27.948720914494196), + (-23.98629064034978, -27.80816909915125), + (-22.949633572250406, -20.8450893435173), + (-25.16962667009571, -20.772294910422403), + (-25.61120124377038, -25.40631583584434), + (-31.032731158967948, -24.810351227750644), + (-31.286028054129474, -27.207309600925463)) SWOT_REACH_PARTITION_KEY = "reach_id" SWOT_NODE_PARTITION_KEY = "node_id" diff --git a/tests/data/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01.dbf b/tests/data/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01.dbf index fda9690..7aae378 100644 Binary files a/tests/data/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01.dbf and b/tests/data/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01/SWOT_L2_HR_LakeSP_Prior_018_100_GR_20240713T111741_20240713T112027_PIC0_01.dbf differ diff --git a/tests/test_io_swot_reach_node_shp.py b/tests/test_io_swot_reach_node_shp.py index c9c817b..c7db389 100644 --- a/tests/test_io_swot_reach_node_shp.py +++ b/tests/test_io_swot_reach_node_shp.py @@ -8,6 +8,8 @@ """ from datetime import datetime, timedelta, timezone import pytz +import numpy as np +from shapely import Polygon, Point, geometry, wkt, centroid from hydrocron.utils import constants from hydrocron.db.io import swot_shp @@ -79,6 +81,47 @@ def test_read_lake_shapefile(): assert val == items[4596][key] +def test_lake_null_geometry(): + """ + Tests replacing null geometry with fillvalue for lake polygons + """ + items = swot_shp.read_shapefile( + constants.TEST_PLAKE_SHAPEFILE_PATH, + obscure_data=False, + columns=constants.PRIOR_LAKE_DATA_COLUMNS) + + geojson = geometry.mapping(wkt.loads(items[0]['geometry'])) + coords_0 = np.round(np.array(geojson['coordinates']), 3) + + assert str(Point(coords_0) == str(centroid(Polygon( + constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS)))) + + +def test_lake_centerpoints(): + """ + Tests replacing polygons with centerpoints + """ + items = swot_shp.read_shapefile( + constants.TEST_PLAKE_SHAPEFILE_PATH, + obscure_data=False, + columns=constants.PRIOR_LAKE_DATA_COLUMNS) + + geojson = geometry.mapping(wkt.loads(items[0]['geometry'])) + coords_0 = np.round(np.array(geojson['coordinates']), 3) + + assert str(Point(coords_0) == str(centroid(Polygon( + constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS)))) + + geojson_4596 = geometry.mapping(wkt.loads(items[4596]['geometry'])) + coords_4596 = np.round(np.array(geojson_4596['coordinates']), 3) + + geojson_test_4596 = geometry.mapping(centroid(Polygon( + constants.TEST_PLAKE_GEOM_DICT['geometry']))) + test_4596 = np.round(np.array(geojson_test_4596['coordinates']), 3) + + assert str(Point(coords_4596)) == str(Point(test_4596)) + + def test_read_shapefile_obscured(): """ Tests reading attributes from the shapefile with real values obscured