Skip to content

Commit

Permalink
Merge pull request #5 from TomAugspurger/tom/fix/to_item_collection-t…
Browse files Browse the repository at this point in the history
…ypes

Improved to_item_collection
  • Loading branch information
Tom Augspurger authored Apr 24, 2023
2 parents 54e02fd + 50f2886 commit 85efc86
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:
- id: flake8
language_version: python3
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.942
rev: v1.2.0
hooks:
- id: mypy
# Override default --ignore-missing-imports
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dependencies = [
"geopandas",
"pandas",
"pyarrow",
"shapely"
"shapely",
"packaging",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -54,6 +55,8 @@ filterwarnings = [

[tool.mypy]

python_version = "3.10"

[[tool.mypy.overrides]]
module = [
"shapely.*",
Expand Down
4 changes: 2 additions & 2 deletions stac_geoparquet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""stac-geoparquet"""
__version__ = "0.2.0"

from .stac_geoparquet import to_geodataframe
from .stac_geoparquet import to_geodataframe, to_dict, to_item_collection


__all__ = ["__version__", "to_geodataframe"]
__all__ = ["__version__", "to_geodataframe", "to_dict", "to_item_collection"]
6 changes: 6 additions & 0 deletions stac_geoparquet/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pystac
import packaging.version

PYSTAC_1_7_0 = packaging.version.parse(pystac.__version__) >= packaging.version.Version(
"1.7.0"
)
31 changes: 28 additions & 3 deletions stac_geoparquet/stac_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,22 @@
import pystac
import geopandas
import pandas as pd
import numpy as np
import shapely.geometry

from stac_geoparquet.utils import fix_empty_multipolygon


def _fix_array(v):
if isinstance(v, np.ndarray):
v = v.tolist()

elif isinstance(v, dict):
v = {k: _fix_array(v2) for k, v2 in v.items()}

return v


def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:
"""
Convert a sequence of STAC items to a :class:`geopandas.GeoDataFrame`.
Expand Down Expand Up @@ -94,6 +105,7 @@ def to_dict(record: dict) -> dict:
}
item = {}
for k, v in record.items():
v = _fix_array(v)

if k in top_level_keys:
item[k] = v
Expand All @@ -107,6 +119,20 @@ def to_dict(record: dict) -> dict:


def to_item_collection(df: geopandas.GeoDataFrame) -> pystac.ItemCollection:
"""
Convert a GeoDataFrame of STAC items to an :class:`pystac.ItemCollection`.
Parameters
----------
df : geopandas.GeoDataFrame
A GeoDataFrame with a schema similar to that exported by stac-geoparquet.
Returns
-------
item_collection : pystac.ItemCollection
The converted ItemCollection. There will be one record / feature per
row in the in the GeoDataFrame.
"""
df2 = df.copy()
datelike = df2.select_dtypes(
include=["datetime64[ns, UTC]", "datetime64[ns]"]
Expand All @@ -116,6 +142,5 @@ def to_item_collection(df: geopandas.GeoDataFrame) -> pystac.ItemCollection:
df2[k].dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ").fillna("").replace({"": None})
)

return pystac.ItemCollection(
[to_dict(record) for record in df2.to_dict(orient="records")]
)
records = [to_dict(record) for record in df2.to_dict(orient="records")]
return pystac.ItemCollection(records)
Binary file added tests/data/naip.parquet
Binary file not shown.
16 changes: 14 additions & 2 deletions tests/test_pgstac_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import stac_geoparquet.pgstac_reader
from stac_geoparquet.utils import assert_equal
from stac_geoparquet._compat import PYSTAC_1_7_0


HERE = pathlib.Path(__file__).parent
Expand Down Expand Up @@ -96,7 +97,8 @@ def test_naip_item():
]

cfg = stac_geoparquet.pgstac_reader.CollectionConfig(
collection_id="naip", render_config="assets=image&asset_bidx=image%7C1%2C2%2C3"
collection_id="naip",
render_config="assets=image&asset_bidx=image%7C1%2C2%2C3&format=png",
)
result = cfg.make_pgstac_items(records, base_item)[0]
# shapely uses tuples instead of lists
Expand All @@ -106,6 +108,11 @@ def test_naip_item():
"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/pa_m_4108053_se_17_1_20150725_20151201" # noqa: E501
)

if PYSTAC_1_7_0:
# https://github.com/stac-utils/pystac/issues/1102
expected.remove_links(rel=pystac.RelType.SELF)
result.remove_links(rel=pystac.RelType.SELF)

assert_equal(result, expected)


Expand All @@ -120,12 +127,17 @@ def test_sentinel2_l2a():
partition_frequency=None,
stac_api="https://planetarycomputer.microsoft.com/api/stac/v1",
should_inject_dynamic_properties=True,
render_config="assets=visual&asset_bidx=visual%7C1%2C2%2C3&nodata=0",
render_config="assets=visual&asset_bidx=visual%7C1%2C2%2C3&nodata=0&format=png",
)
result = pystac.read_dict(config.make_pgstac_items([record], base_item)[0])
expected = pystac.read_file(
"https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20150704T101006_R022_T35XQA_20210411T133707" # noqa: E501
)
if PYSTAC_1_7_0:
# https://github.com/stac-utils/pystac/issues/1102
expected.remove_links(rel=pystac.RelType.SELF)
result.remove_links(rel=pystac.RelType.SELF)

expected.remove_links(rel=pystac.RelType.LICENSE)
assert_equal(result, expected)

Expand Down
6 changes: 4 additions & 2 deletions tests/test_stac_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,11 @@ def test_s1_grd():
],
)
def test_smoke(collection_id):
items = requests.get(
r = requests.get(
f"https://planetarycomputer.microsoft.com/api/stac/v1/collections/{collection_id}/items?limit=1"
).json()["features"]
)
r.raise_for_status()
items = r.json()["features"]
df = stac_geoparquet.to_geodataframe(items)

result = to_item_collection(df)
Expand Down
118 changes: 118 additions & 0 deletions tests/test_to_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import pathlib

import pytest
import geopandas

import stac_geoparquet


HERE = pathlib.Path(__file__).parent


@pytest.fixture
def naip():
return geopandas.read_parquet(HERE / "data" / "naip.parquet")


def test_to_dict(naip):
result = stac_geoparquet.to_item_collection(naip)
expected = {
"assets": {
"image": {
"eo:bands": [
{"common_name": "red", "description": None, "name": "Red"},
{"common_name": "green", "description": None, "name": "Green"},
{"common_name": "blue", "description": None, "name": "Blue"},
{
"common_name": "nir",
"description": "near-infrared",
"name": "NIR",
},
],
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ok/2010/ok_100cm_2010/34099/m_3409901_nw_14_1_20100425.tif", # noqa: E501
"roles": ["data"],
"title": "RGBIR COG tile",
"type": "image/tiff; application=geotiff; " "profile=cloud-optimized",
},
"rendered_preview": {
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=ok_m_3409901_nw_14_1_20100425&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"rel": "preview",
"roles": ["overview"],
"title": "Rendered preview",
"type": "image/png",
},
"thumbnail": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ok/2010/ok_100cm_2010/34099/m_3409901_nw_14_1_20100425.200.jpg", # noqa: E501
"roles": ["thumbnail"],
"title": "Thumbnail",
"type": "image/jpeg",
},
"tilejson": {
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=ok_m_3409901_nw_14_1_20100425&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"roles": ["tiles"],
"title": "TileJSON with default rendering",
"type": "application/json",
},
},
"bbox": [-100.004084, 34.934259, -99.933454, 35.00323],
"collection": "naip",
"geometry": {
"coordinates": (
(
(-99.933454, 34.934815),
(-99.93423, 35.00323),
(-100.004084, 35.002673),
(-100.00325, 34.934259),
(-99.933454, 34.934815),
),
),
"type": "Polygon",
},
"id": "ok_m_3409901_nw_14_1_20100425",
"links": [
{
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
"rel": "collection",
"type": "application/json",
},
{
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
"rel": "parent",
"type": "application/json",
},
{
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/",
"rel": "root",
"title": "Microsoft Planetary Computer STAC API",
"type": "application/json",
},
{
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/ok_m_3409901_nw_14_1_20100425", # noqa: E501
"rel": "self",
"type": "application/geo+json",
},
{
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=ok_m_3409901_nw_14_1_20100425", # noqa: E501
"rel": "preview",
"title": "Map of item",
"type": "text/html",
},
],
"properties": {
"datetime": "2010-04-25T00:00:00Z",
"gsd": 1.0,
"naip:state": "ok",
"naip:year": "2010",
"proj:bbox": [408377.0, 3866212.0, 414752.0, 3873800.0],
"proj:epsg": 26914,
"proj:shape": [7588, 6375],
"proj:transform": [1.0, 0.0, 408377.0, 0.0, -1.0, 3873800.0, 0.0, 0.0, 1.0],
},
"stac_extensions": [
"https://stac-extensions.github.io/eo/v1.0.0/schema.json",
"https://stac-extensions.github.io/projection/v1.0.0/schema.json",
],
"stac_version": "1.0.0",
"type": "Feature",
}
assert result[0].to_dict() == expected

0 comments on commit 85efc86

Please sign in to comment.