From d96afa10af4bcc74d67196ff5d16649cd67da74d Mon Sep 17 00:00:00 2001 From: scharlottej13 Date: Mon, 4 Dec 2023 17:09:09 -0800 Subject: [PATCH 1/3] Update kwarg for spot --- national-water-model/xarray-water-model.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/national-water-model/xarray-water-model.py b/national-water-model/xarray-water-model.py index 2190a2e..ab188ed 100644 --- a/national-water-model/xarray-water-model.py +++ b/national-water-model/xarray-water-model.py @@ -16,35 +16,29 @@ cluster = coiled.Cluster( name="nwm-1979-2020", - region="us-east-1", # close to data + region="us-east-1", # close to data n_workers=10, - scheduler_vm_types="r7g.xlarge", # ARM instance + scheduler_vm_types="r7g.xlarge", # ARM instance worker_vm_types="r7g.2xlarge", - compute_purchase_option="spot_with_fallback" # use spot, replace with on-demand + spot_policy="spot_with_fallback", # use spot, replace with on-demand ) client = cluster.get_client() cluster.adapt(minimum=10, maximum=200) ds = xr.open_zarr( - fsspec.get_mapper( - "s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", - anon=True - ), + fsspec.get_mapper("s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", anon=True), consolidated=True, - chunks={"time": 896, "x": 350, "y": 350} + chunks={"time": 896, "x": 350, "y": 350}, ) -subset = ds.zwattablrt.sel( - time=slice("1979-02-01", "2020-12-31") -) +subset = ds.zwattablrt.sel(time=slice("1979-02-01", "2020-12-31")) fs = fsspec.filesystem("s3", requester_pays=True) with dask.annotate(retries=3): counties = rioxarray.open_rasterio( - fs.open("s3://nwm-250m-us-counties/Counties_on_250m_grid.tif"), - chunks="auto" + fs.open("s3://nwm-250m-us-counties/Counties_on_250m_grid.tif"), chunks="auto" ).squeeze() # remove any small floating point error in coordinate locations From 0cd92101fc1033db086c7332bf3d0679cf0cdd0a Mon Sep 17 00:00:00 2001 From: scharlottej13 Date: Mon, 4 Dec 2023 17:09:44 -0800 Subject: [PATCH 2/3] Add code for making NWM video --- .../make-timelapse-video.ipynb | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 national-water-model/make-timelapse-video.ipynb diff --git a/national-water-model/make-timelapse-video.ipynb b/national-water-model/make-timelapse-video.ipynb new file mode 100644 index 0000000..0ebe389 --- /dev/null +++ b/national-water-model/make-timelapse-video.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Prep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import xarray as xr\n", + "import numpy as np\n", + "\n", + "# Read county shapefile, combo of state FIPS code and county FIPS code as multi-index\n", + "counties = gpd.read_file(\n", + " \"https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_us_county_20m.zip\"\n", + ").to_crs(\"EPSG:3395\")\n", + "counties[\"STATEFP\"] = counties.STATEFP.astype(int)\n", + "counties[\"COUNTYFP\"] = counties.COUNTYFP.astype(int)\n", + "continental = counties[\n", + " ~counties[\"STATEFP\"].isin([2, 15, 72])\n", + "] # drop Alaska, Hawaii, Puerto Rico\n", + "\n", + "# Read in saved data from xarray-water-model.py\n", + "ds = xr.open_dataset(\"mean_zwattablrt_nwm_1979_2020.nc\")\n", + "ds[\"week\"] = ds.time.dt.strftime(\"%Y-%U\")\n", + "ds = ds.groupby(\"week\").mean()\n", + "# Interpret county as combo of state FIPS code and county FIPS code\n", + "ds.coords[\"STATEFP\"] = (ds.county // 1000).astype(int)\n", + "ds.coords[\"COUNTYFP\"] = np.mod(ds.county, 1000).astype(int)\n", + "df = ds.to_dataframe().reset_index()\n", + "\n", + "# Join\n", + "merge_df = continental.merge(df, on=[\"STATEFP\", \"COUNTYFP\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make all the plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", + "from datetime import datetime\n", + "\n", + "day_0 = datetime.strptime(weeks[0] + \"-0\", \"%Y-%U-%w\")\n", + "weeks = merge_df.week.unique()\n", + "\n", + "for week in weeks:\n", + " fig, ax = plt.subplots(1, 1, figsize=(7.68, 4.32)) # for 3840x2160 resolution\n", + "\n", + " ax.set_axis_off()\n", + "\n", + " divider = make_axes_locatable(ax)\n", + " cax = divider.append_axes(\"bottom\", size=\"5%\", pad=0.1)\n", + "\n", + " cax.tick_params(labelsize=8)\n", + " cax.set_title(\"Depth (in meters) of the water table\", fontsize=8)\n", + "\n", + " merge_df[merge_df[\"week\"] == f\"{week}\"].plot(\n", + " column=\"zwattablrt\",\n", + " cmap=\"BrBG_r\",\n", + " vmin=0,\n", + " vmax=2,\n", + " legend=True,\n", + " ax=ax,\n", + " cax=cax,\n", + " legend_kwds={\n", + " \"orientation\": \"horizontal\",\n", + " \"ticks\": [0, 0.5, 1, 1.5, 2],\n", + " },\n", + " )\n", + "\n", + " # Add legends for memory, time, and cost\n", + " current_day = datetime.strptime(week + \"-0\", \"%Y-%U-%w\")\n", + " n_days = (current_day - day_0).days\n", + " memory = n_days * (16.88 * 1.07374) # daily memory converted to GB\n", + " cost = (n_days / 7) * 0.01124606 # weekly cost\n", + "\n", + " if memory >= 1000:\n", + " memory_string = f\"{memory/1000:.1f} TB processed, ~${cost:.2f} in cloud costs\"\n", + " else:\n", + " memory_string = f\"{memory:.0f} GB processed, ~${cost:.2f} in cloud costs\"\n", + "\n", + " plt.text(0, 1, memory_string, transform=ax.transAxes, size=9)\n", + " # convert Year - Week Number to Month - Year\n", + " date = datetime.strptime(week + \"-0\", \"%Y-%U-%w\").strftime(\"%Y %b\")\n", + " plt.text(0.85, 1, f\"{date}\", transform=ax.transAxes, size=10)\n", + " plt.savefig(f\"../../nwm-animation/3840x2160/{week}.png\", transparent=True, dpi=500)\n", + " plt.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use [ffmpeg](https://ffmpeg.org/) to stitch the images together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ffmpeg -pattern_type glob -i '3840x2160/*.png' -r 60 -crf 18 -pix_fmt yuv420p nwm-video.mp4" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nwm", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 1e17e79b2e606036c8299da2fd7195bf83b68bbe Mon Sep 17 00:00:00 2001 From: Steve <3835734+stevebanik@users.noreply.github.com> Date: Wed, 6 Dec 2023 14:31:06 -0600 Subject: [PATCH 3/3] Update arxiv-matplotlib.ipynb s/prepreint/preprint/g --- arxiv-matplotlib.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arxiv-matplotlib.ipynb b/arxiv-matplotlib.ipynb index 452f6bb..3f78673 100644 --- a/arxiv-matplotlib.ipynb +++ b/arxiv-matplotlib.ipynb @@ -15,7 +15,7 @@ "\n", "*\"About 15% of arXiv papers use Matplotlib\"*\n", "\n", - "arXiv is the preeminent repository for scholarly prepreint articles. It stores millions of journal articles used across science. It's also public access, and so we can just scrape the entire thing given enough compute power.\n", + "arXiv is the preeminent repository for scholarly preprint articles. It stores millions of journal articles used across science. It's also public access, and so we can just scrape the entire thing given enough compute power.\n", "\n", "## Watermark\n", "\n",