Skip to content

Commit

Permalink
Convert simplification to use Healpix and fix rotation (#133)
Browse files Browse the repository at this point in the history
* Upate simplification script to use healpix

* Fix group calculation

* Improve top value extraction speed

* Fix lat/lon handling on healpy

* Fully working simplification with healpix rotation

* Groups algorithm ready for scale
  • Loading branch information
yellowcap authored Feb 6, 2025
1 parent 5f9bcf0 commit 7a10c7d
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 138 deletions.
62 changes: 39 additions & 23 deletions scripts/groups.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,67 @@
import xdggs
import xarray as xr
import numpy as np

from simplify import has_states, list_tags, logger, open_dataset
from simplify import (
has_states,
list_tags,
logger,
open_dataset,
rotate_data,
get_filesystem,
)

NSIDE = 4096


def main():
logger.debug("Listing tags")
def rotate_group():
data = xr.open_zarr("data/sea_bass_average_with_shift.zarr")
rotated = rotate_data(data.rename_dims({"quarter": "time"}))
rotated = rotated.rename_dims({"time": "quarter"})
rotated.to_zarr("data/sea_bass_average.zarr", mode="w")

store = get_filesystem().get_mapper(
"s3://destine-gfts-visualisation-data/groups/sea_bass_average.zarr"
)
rotated.to_zarr(store=store, mode="w", consolidated=True, compute=True)


def create_groups():
tags = list_tags()

result = None
timpesteps = 0
for tag in tags:
for idx, tag in enumerate(tags):
if not has_states(tag):
logger.debug(f"No states.zarr file found for {tag}")
continue
logger.debug(f"Processing tag {tag} ({idx + 1}/{len(tags)})")

data = open_dataset(tag)
timpesteps += data.time.shape[0]
data = data.fillna(0)

data.cell_ids.attrs = {
"grid_name": "healpix",
"nside": NSIDE,
"nest": True,
}

data = data.drop_vars(["latitude", "longitude"]).stack(
cell=["x", "y"], create_index=False
)
data = data.set_xindex("cell_ids", xdggs.DGGSIndex)

avg_by_quarter = data.groupby("time.quarter").sum("time", skipna=True)
avg_by_quarter = data.groupby("time.quarter").sum("time").compute()

if result is None:
result = avg_by_quarter
else:
result += avg_by_quarter

result = result.fillna(0)
result = result.compute()

result = result / timpesteps

result.to_zarr("data/pollock_average.zarr")
result = result.where(result != 0, other=np.nan)

# Also export normalized and sretched as uint16
max_value = result.states.max()
result.states = (result.states / max_value * 65535).astype("uint16")
result.to_zarr("data/sea_bass_average_with_shift.zarr", mode="w")

result.to_zarr("data/pollock_average_uint16.zarr")
store = get_filesystem().get_mapper(
"s3://destine-gfts-visualisation-data/groups/pollock_average_with_shift.zarr"
)
result.to_zarr(store=store, mode="w", consolidated=True, compute=True)


if __name__ == "__main__":
main()
create_groups()
rotate_group()
Loading

0 comments on commit 7a10c7d

Please sign in to comment.