Skip to content

Commit

Permalink
Add snipped to convert zarr to parquet (#135)
Browse files Browse the repository at this point in the history
  • Loading branch information
yellowcap authored Feb 7, 2025
1 parent b0ea4de commit 527bb79
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions scripts/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,30 @@
NSIDE = 4096


def convert_to_parquet():
data = xr.open_zarr("data/sea_bass_average.zarr")
data = (
data.to_dataframe()
.reset_index()
.set_index(["cell_ids", "quarter"])
.unstack("quarter")
)
del data["cell"]
data = data.reset_index()
data.columns = ["cell_ids"] + [f"quarter_{i}" for i in range(1, 5)]

for i in range(1, 5):
quarter = data[["cell_ids", f"quarter_{i}"]]
quarter = quarter[quarter > 1e-7].dropna()
logger.info(f"Writing parquet file for quarter {i}")
quarter.to_parquet(f"data/sea_bass_average_q{i}.parquet", index=False)
with get_filesystem().open(
f"s3://destine-gfts-visualisation-data/groups/sea_bass_average_q{i}.parquet",
"wb",
) as fl:
quarter.to_parquet(fl)


def rotate_group():
data = xr.open_zarr("data/sea_bass_average_with_shift.zarr")
rotated = rotate_data(data.rename_dims({"quarter": "time"}))
Expand Down Expand Up @@ -70,3 +94,4 @@ def create_groups():
if __name__ == "__main__":
create_groups()
rotate_group()
convert_to_parquet()

0 comments on commit 527bb79

Please sign in to comment.