From 94fee528da37b25e0601a021a66eb317ce38443b Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 07:09:26 -0500 Subject: [PATCH 1/7] chore: bump ruff --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d8c8e3b..27a5e602 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.5.7" + rev: "v0.6.1" hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] From 4974f439d2f8434579f1154de2d7505b0f52a334 Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 11:51:23 -0500 Subject: [PATCH 2/7] ci: setup more data for testing --- .github/setupdata.sh | 1 + .github/setuppaths.sh | 8 ++++++++ .github/workflows/pydep.yml | 15 +++++++-------- 3 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 .github/setupdata.sh create mode 100644 .github/setuppaths.sh diff --git a/.github/setupdata.sh b/.github/setupdata.sh new file mode 100644 index 00000000..fd347dc7 --- /dev/null +++ b/.github/setupdata.sh @@ -0,0 +1 @@ +touch /i/0/cli/100x042/100.30x042.26.cli diff --git a/.github/setuppaths.sh b/.github/setuppaths.sh new file mode 100644 index 00000000..4cec34a6 --- /dev/null +++ b/.github/setuppaths.sh @@ -0,0 +1,8 @@ +# Create necessary paths +mkdir -p _data/2/0 +mkdir _data/data +sudo ln -s `pwd`/_data /mnt/idep2 +sudo ln -s `pwd`/_data/2 /i +sudo ln -s `pwd` /opt/dep + +mkdir -p /i/0/cli/100x042 diff --git a/.github/workflows/pydep.yml b/.github/workflows/pydep.yml index 9b577a8d..077d8a08 100644 --- a/.github/workflows/pydep.yml +++ b/.github/workflows/pydep.yml @@ -11,7 +11,7 @@ jobs: defaults: run: # Ensures environment gets sourced right - shell: bash -l {0} + shell: bash -e -l {0} name: Build and Test runs-on: ubuntu-latest strategy: @@ -26,12 +26,6 @@ jobs: run: | cat .github/workflows/etchosts.txt | sudo tee -a /etc/hosts - # daryl is a hack - - name: Create symlink - run: | - set -e - sudo ln -s `pwd` /opt/dep - # setup conda-forge with micromamba - name: Setup Python uses: mamba-org/setup-micromamba@v1 @@ -46,6 +40,12 @@ jobs: environment-name: prod cache-environment: true + - name: Setup paths + run: sh .github/setuppaths.sh + + - name: Setup data + run: sh .github/setupdata.sh + - name: Setup Postgres run: | git clone --depth 1 https://github.com/akrherz/iem-database.git database @@ -60,7 +60,6 @@ jobs: - name: Build and test run: | - set -e python -m pip install . --upgrade --no-deps python -m pytest --cov=pydep tests python -m coverage xml From 684597e24c63a8f401d79ae0a042c5148126fdca Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 11:51:45 -0500 Subject: [PATCH 3/7] refactor CLI file editing to support expansion --- scripts/cligen/daily_clifile_editor.py | 488 ++++++++++++------------- scripts/cligen/proctor_tile_edit.py | 52 ++- src/pydep/io/cli.py | 34 ++ tests/io/test_cli.py | 21 +- 4 files changed, 327 insertions(+), 268 deletions(-) diff --git a/scripts/cligen/daily_clifile_editor.py b/scripts/cligen/daily_clifile_editor.py index 7de0b710..f8852ada 100644 --- a/scripts/cligen/daily_clifile_editor.py +++ b/scripts/cligen/daily_clifile_editor.py @@ -1,77 +1,63 @@ -"""DEP WEPP cli editor. One "tile" at a time. - -Usage: - python daily_climate_editor.py -
- -Where tiles start in the lower left corner and are 5x5 deg in size - -development laptop has data for 3 March 2019 - -""" +"""DEP WEPP cli editor. One "tile" at a time.""" try: from zoneinfo import ZoneInfo # type: ignore except ImportError: from backports.zoneinfo import ZoneInfo # type: ignore -import datetime import os import sys import traceback -from collections import namedtuple +import warnings +from datetime import date, datetime, timedelta, timezone from io import StringIO from multiprocessing import cpu_count from multiprocessing.pool import ThreadPool +import click import numpy as np +from affine import Affine from osgeo import gdal -from pydep.io.cli import daily_formatter -from pydep.util import get_cli_fname from pyiem import iemre -from pyiem.iemre import EAST, NORTH, SOUTH, WEST -from pyiem.util import convert_value, logger, ncopen -from scipy.interpolate import NearestNDInterpolator +from pyiem.util import archive_fetch, convert_value, logger, ncopen +from rasterio.errors import NotGeoreferencedWarning +from rasterio.warp import reproject from tqdm import tqdm +from pydep.io.cli import ( + check_has_clifiles, + compute_tile_bounds, + daily_formatter, +) +from pydep.util import get_cli_fname + +# NB: This seems to be some thread safety issue with GDAL +warnings.simplefilter("ignore", NotGeoreferencedWarning) gdal.UseExceptions() LOG = logger() CENTRAL = ZoneInfo("America/Chicago") -UTC = datetime.timezone.utc +UTC = timezone.utc ST4PATH = "/mesonet/data/stage4" # used for breakpoint logic -ZEROHOUR = datetime.datetime(2000, 1, 1, 0, 0) +ZEROHOUR = datetime(2000, 1, 1, 0, 0) # How many CPUs are we going to burn CPUCOUNT = min([4, int(cpu_count() / 4)]) -MEMORY = {"stamp": datetime.datetime.now()} -BOUNDS = namedtuple("Bounds", ["south", "north", "east", "west"]) +MEMORY = {"stamp": datetime.now()} # An estimate of the number of years of data we have in our climate files -ESTIMATED_YEARS = datetime.date.today().year - 2007 + 1 - +ESTIMATED_YEARS = date.today().year - 2007 + 1 -def check_has_clifiles(bounds: BOUNDS): - """Check that a directory exists, which likely indicates clifiles exist.""" - for lon in np.arange(bounds.west, bounds.east, 1): - for lat in np.arange(bounds.south, bounds.north, 1): - ckdir = f"/i/0/cli/{(0 - lon):03.0f}x{(lat):03.0f}" - if os.path.isdir(ckdir): - return True - return False - -def get_sts_ets_at_localhour(date, local_hour): +def get_sts_ets_at_localhour(dt: date, local_hour): """Return a Day Interval in UTC for the given date at CST/CDT hour.""" # ZoneInfo is supposed to get this right at instanciation - sts = datetime.datetime( - date.year, - date.month, - date.day, + sts = datetime( + dt.year, + dt.month, + dt.day, local_hour, tzinfo=CENTRAL, ) - date2 = datetime.date( - date.year, date.month, date.day - ) + datetime.timedelta(days=1) - ets = datetime.datetime( + date2 = date(dt.year, dt.month, dt.day) + timedelta(days=1) + ets = datetime( date2.year, date2.month, date2.day, @@ -84,7 +70,7 @@ def get_sts_ets_at_localhour(date, local_hour): ) -def iemre_bounds_check(name, val, lower, upper): +def iemre_bounds_check(name, val, lower, upper) -> None: """Make sure our data is within bounds, if not, exit!""" if np.isnan(val).all(): LOG.warning("FATAL: iemre %s all NaN", name) @@ -101,77 +87,82 @@ def iemre_bounds_check(name, val, lower, upper): upper, ) sys.exit(3) - return val -def load_iemre(nc, data, valid): +def load_iemre(data, valid, tile_affine: Affine): """Use IEM Reanalysis for non-precip data - 24km product is smoothed down to the 0.01 degree grid + 1/8 degree product is nearest neighbor resampled to the 0.01 degree grid """ - offset = iemre.daily_offset(valid) - lats = nc.variables["lat"][:] - lons = nc.variables["lon"][:] - lons, lats = np.meshgrid(lons, lats) - - # Storage is W m-2, we want langleys per day - ncdata = ( - nc.variables["rsds"][offset, :, :].filled(np.nan) - * 86400.0 - / 1_000_000.0 - * 23.9 - ) - # Default to a value of 300 when this data is missing, for some reason - nn = NearestNDInterpolator( - (np.ravel(lons), np.ravel(lats)), np.ravel(ncdata) - ) - # What's a reasonable max bounts? Well, 50 MJ/d should be an extreme high - data["solar"][:] = iemre_bounds_check( - "rsds", nn(data["lon"], data["lat"]), 0, 50.0 * 23.9 - ) - - ncdata = convert_value( - nc.variables["high_tmpk"][offset, :, :].filled(np.nan), "degK", "degC" - ) - nn = NearestNDInterpolator( - (np.ravel(lons), np.ravel(lats)), np.ravel(ncdata) - ) - data["high"][:] = iemre_bounds_check( - "high_tmpk", nn(data["lon"], data["lat"]), -60, 60 + ds = iemre.get_grids( + valid, + varnames=["high_tmpk", "low_tmpk", "avg_dwpk", "wind_speed", "rsds"], ) - ncdata = convert_value( - nc.variables["low_tmpk"][offset, :, :].filled(np.nan), "degK", "degC" + dom = iemre.DOMAINS[""] + iemre_affine = Affine( + iemre.DX, 0.0, dom["west"], 0.0, iemre.DY, dom["south"] ) - nn = NearestNDInterpolator( - (np.ravel(lons), np.ravel(lats)), np.ravel(ncdata) + reproject( + # Convert W m-2 to langleys per day + ds["rsds"] * 86400.0 / 1_000_000.0 * 23.9, + data["solar"], + src_transform=iemre_affine, + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + dst_nodata=np.nan, ) - data["low"][:] = iemre_bounds_check( - "low_tmpk", nn(data["lon"], data["lat"]), -60, 60 - ) - - ncdata = convert_value( - nc.variables["avg_dwpk"][offset, :, :].filled(np.nan), "degK", "degC" + # What's a reasonable max bounts? Well, 50 MJ/d should be an extreme high + iemre_bounds_check("rsds", data["solar"], 0, 50.0 * 23.9) + + reproject( + convert_value(ds["high_tmpk"], "degK", "degC"), + data["high"], + src_transform=iemre_affine, + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + dst_nodata=np.nan, ) - nn = NearestNDInterpolator( - (np.ravel(lons), np.ravel(lats)), np.ravel(ncdata) + iemre_bounds_check("high", data["high"], -60, 60) + + reproject( + convert_value(ds["low_tmpk"], "degK", "degC"), + data["low"], + src_transform=iemre_affine, + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + dst_nodata=np.nan, ) - data["dwpt"][:] = iemre_bounds_check( - "avg_dwpk", nn(data["lon"], data["lat"]), -60, 60 + iemre_bounds_check("low", data["low"], -60, 60) + + reproject( + convert_value(ds["avg_dwpk"], "degK", "degC"), + data["dwpt"], + src_transform=iemre_affine, + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + dst_nodata=np.nan, ) + iemre_bounds_check("dwpt", data["dwpt"], -60, 60) # Wind is already in m/s, but could be masked - ncdata = nc.variables["wind_speed"][offset, :, :].filled(np.nan) - nn = NearestNDInterpolator( - (np.ravel(lons), np.ravel(lats)), np.ravel(ncdata) - ) - # 30 found to be too high (13 Sep 2008) - data["wind"][:] = iemre_bounds_check( - "wind_speed", nn(data["lon"], data["lat"]), 0, 40 + reproject( + ds["wind_speed"], + data["wind"], + src_transform=iemre_affine, + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + dst_nodata=np.nan, ) + iemre_bounds_check("wind", data["wind"], 0, 40) -def load_stage4(data, valid, xtile, ytile): +def load_stage4(data, dt: date, tile_affine): """It sucks, but we need to load the stage IV data to give us something to benchmark the MRMS data against, to account for two things: 1) Wind Farms @@ -179,7 +170,7 @@ def load_stage4(data, valid, xtile, ytile): """ LOG.debug("called") # The stage4 files store precip in the rears, so compute 1 AM - one_am, tomorrow = get_sts_ets_at_localhour(valid, 1) + one_am, tomorrow = get_sts_ets_at_localhour(dt, 1) sts_tidx = iemre.hourly_offset(one_am) ets_tidx = iemre.hourly_offset(tomorrow) @@ -190,20 +181,28 @@ def load_stage4(data, valid, xtile, ytile): ets_tidx, tomorrow, ) - with ncopen(f"{ST4PATH}/{valid.year}_stage4_hourly.nc", "r") as nc: + # ll, so no flipping + projparams = { + "a": 6371200.0, + "b": 6371200.0, + "proj": "stere", + "lat_ts": 60.0, + "lat_0": 90.0, + "lon_0": 255.0 - 360.0, + } + stage4_affine = Affine(4762.5, 0.0, -1902531, 0.0, 4762.5, -7617604) + with ncopen(f"{ST4PATH}/{dt:%Y}_stage4_hourly.nc", "r") as nc: p01m = nc.variables["p01m"] - lats = nc.variables["lat"][:] - lons = nc.variables["lon"][:] # crossing jan 1 if ets_tidx < sts_tidx: LOG.debug("Exercise special stageIV logic for jan1!") - totals = np.sum(p01m[sts_tidx:, :, :], axis=0) + totals = np.sum(p01m[sts_tidx:], axis=0) with ncopen(f"{ST4PATH}/{tomorrow.year}_stage4_hourly.nc") as nc2: p01m = nc2.variables["p01m"] - totals += np.sum(p01m[:ets_tidx, :, :], axis=0) + totals += np.sum(p01m[:ets_tidx], axis=0) else: - totals = np.sum(p01m[sts_tidx:ets_tidx, :, :], axis=0) + totals = np.sum(p01m[sts_tidx:ets_tidx], axis=0) if np.ma.max(totals) > 0: pass @@ -213,15 +212,18 @@ def load_stage4(data, valid, xtile, ytile): # set a small non-zero number to keep things non-zero totals[totals < 0.001] = 0.001 - nn = NearestNDInterpolator( - (lons.flatten(), lats.flatten()), totals.flatten() + reproject( + totals, + data["stage4"], + src_transform=stage4_affine, + src_crs=projparams, + dst_transform=tile_affine, + dst_crs="EPSG:4326", ) - data["stage4"][:] = nn(data["lon"], data["lat"]) - write_grid(data["stage4"], valid, xtile, ytile, "stage4") LOG.debug("finished") -def qc_precip(data, valid, xtile, ytile, tile_bounds): +def qc_precip(data, dt: date, tile_affine: Affine): """Make some adjustments to the `precip` grid Not very sophisticated here, if the hires precip grid is within 33% of @@ -230,7 +232,7 @@ def qc_precip(data, valid, xtile, ytile, tile_bounds): """ hires_total = np.sum(data["precip"], 2) # Only do this check for tiles west of -101, likely too crude for east - if valid.year >= 2015 and xtile < 5: + if dt.year >= 2015 and tile_affine.c < -101: # Do an assessment of the hires_total (A2M MRMS), it may have too many # zeros where stage IV has actual data. a2m_zeros = hires_total < 0.01 @@ -238,38 +240,45 @@ def qc_precip(data, valid, xtile, ytile, tile_bounds): score = np.sum(np.logical_and(a2m_zeros, s4)) / np.multiply(*s4.shape) if score > 0.005: # 0.5% is arb, but good enough? LOG.warning("MRMS had %.2f%% bad zeros, using legacy", score * 100) - load_precip_legacy(data, valid, tile_bounds) + load_precip_legacy(data, dt, tile_affine) hires_total = np.sum(data["precip"], 2) # prevent zeros hires_total[hires_total < 0.01] = 0.01 - write_grid(hires_total, valid, xtile, ytile, "inqcprecip") multiplier = data["stage4"] / hires_total # Anything close to 1, set it to 1 multiplier = np.where( np.logical_and(multiplier > 0.67, multiplier < 1.33), 1.0, multiplier ) - write_grid(multiplier, valid, xtile, ytile, "multiplier") data["precip"][:] *= multiplier[:, :, None] data["precip"][np.isnan(data["precip"])] = 0.0 - write_grid(np.sum(data["precip"], 2), valid, xtile, ytile, "outqcprecip") - - -def _reader(filename, tile_bounds): - top = int((50.0 - tile_bounds.north) * 100.0) - bottom = int((50.0 - tile_bounds.south) * 100.0) - right = int((tile_bounds.east - -126.0) * 100.0) - left = int((tile_bounds.west - -126.0) * 100.0) - imgdata = gdal.Open(filename, 0).ReadAsArray() - # Convert the image data to dbz - dbz = (np.flipud(imgdata[top:bottom, left:right]) - 7.0) * 5.0 - return np.where(dbz < 255, ((10.0 ** (dbz / 10.0)) / 200.0) ** 0.625, 0) +def _reader(utcnow: datetime, tidx, shp, tile_affine): + """Read the legacy N0R data""" + ppath = utcnow.strftime("%Y/%m/%d/GIS/uscomp/n0r_%Y%m%d%H%M.png") + with archive_fetch(ppath) as fn: + if fn is None: + return None, None + # upper right corner is 50N, 126W + imgdata = gdal.Open(fn, 0).ReadAsArray() + # Convert the image data to dbz + dbz = (imgdata - 7.0) * 5.0 + dbz = np.where(dbz < 255, ((10.0 ** (dbz / 10.0)) / 200.0) ** 0.625, 0) + grid = np.zeros(shp, np.float32) + reproject( + dbz, + grid, + src_transform=Affine(0.01, 0.0, -126.0, 0.0, -0.01, 50.0), + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + ) + return tidx, grid -def load_precip_legacy(data, valid, tile_bounds): +def load_precip_legacy(data, valid, tile_affine): """Compute a Legacy Precip product for dates prior to 1 Jan 2014""" LOG.debug("called") ts = 12 * 24 # 5 minute @@ -278,36 +287,36 @@ def load_precip_legacy(data, valid, tile_bounds): now = midnight # To avoid division by zero below when we have the strip of no-data 23-24N - m5 = np.ones((ts, *data["solar"].shape), np.float16) * 0.01 + m5 = np.ones((ts, *data["solar"].shape), np.float32) * 0.01 + + def _cb(args): + """callback""" + tidx, precip = args + if precip is not None: + m5[tidx] = precip + tidx = 0 - filenames = [] - indices = [] - # Load up the n0r data, every 5 minutes - while now < tomorrow: - utcvalid = now.astimezone(UTC) - fn = utcvalid.strftime( - "/mesonet/ARCHIVE/data/%Y/%m/%d/GIS/uscomp/n0r_%Y%m%d%H%M.png" - ) - if os.path.isfile(fn): + shp = data["solar"].shape + with ThreadPool(4) as pool: + # Load up the n0r data, every 5 minutes + while now < tomorrow: if tidx >= ts: # Abort as we are in CST->CDT break - filenames.append(fn) - indices.append(tidx) - else: - LOG.warning("missing: %s", fn) - - now += datetime.timedelta(minutes=5) - tidx += 1 + pool.apply_async( + _reader, + (now.astimezone(UTC), tidx, shp, tile_affine), + callback=_cb, + ) + now += timedelta(minutes=5) + tidx += 1 + pool.close() + pool.join() - for tidx, filename in zip(indices, filenames): - # Legacy product may not go far enough south :/ - yslice = slice(100, 500) if tile_bounds.south == 23 else slice(0, 500) - m5[tidx, yslice, :] = _reader(filename, tile_bounds) LOG.debug("finished loading N0R Composites") m5 = np.transpose(m5, (1, 2, 0)).copy() LOG.debug("transposed the data!") - # Prevent overflow with likely bad data leading to value > np.float16 + # Prevent overflow with likely bad data leading to value > np.float32 m5total = np.sum(m5, 2, dtype=np.float32) LOG.debug("computed sum(m5)") wm5 = m5 / m5total[:, :, None] @@ -335,75 +344,67 @@ def _compute(yidx, xidx): LOG.debug("finished precip calculation") -def load_precip(data, valid, tile_bounds): - """Load the 5 minute precipitation data into our ginormus grid""" - LOG.debug("called") - ts = 30 * 24 # 2 minute +def _mrms_reader(utcnow: datetime, tidx, shp, tile_affine, a2m_divisor): + """Read the MRMS data.""" + ppath = utcnow.strftime("%Y/%m/%d/GIS/mrms/a2m_%Y%m%d%H%M.png") + with archive_fetch(ppath) as fn: + if fn is None: + return None, None + imgdata = gdal.Open(fn, 0).ReadAsArray() + imgdata = np.where(imgdata < 255, imgdata / a2m_divisor, 0) + grid = np.zeros(shp, np.float32) + reproject( + imgdata, + grid, + src_transform=Affine(0.01, 0.0, -130.0, 0.0, -0.01, 55.0), + src_crs="EPSG:4326", + dst_transform=tile_affine, + dst_crs="EPSG:4326", + ) + return tidx, grid - midnight, tomorrow = get_sts_ets_at_localhour(valid, 0) - top = int((55.0 - tile_bounds.north) * 100.0) - bottom = int((55.0 - tile_bounds.south) * 100.0) +def load_precip(data, dt: date, tile_affine: Affine): + """Load the 2 minute precipitation data into our ginormus grid""" + LOG.debug("called") + ts = 30 * 24 # 2 minute - right = int((tile_bounds.east - -130.0) * 100.0) - left = int((tile_bounds.west - -130.0) * 100.0) + midnight, tomorrow = get_sts_ets_at_localhour(dt, 0) # Oopsy we discovered a problem - a2m_divisor = 10.0 if (valid < datetime.date(2015, 1, 1)) else 50.0 + a2m_divisor = 10.0 if (dt < date(2015, 1, 1)) else 50.0 now = midnight # Require at least 75% data coverage, if not, we will abort back to legacy - quorum = ts * 0.75 - fns = [] - while now < tomorrow: - fn = now.astimezone(UTC).strftime( - "/mesonet/ARCHIVE/data/%Y/%m/%d/GIS/mrms/a2m_%Y%m%d%H%M.png" - ) - if os.path.isfile(fn): - quorum -= 1 - fns.append(fn) - else: - fns.append(None) - # TODO log this from proctor to cut down on noise - LOG.info("missing: %s", fn) - - now += datetime.timedelta(minutes=2) - if quorum > 0: - LOG.warning( - "Failed 75%% quorum with MRMS a2m %.1f, loading legacy", quorum - ) - load_precip_legacy(data, valid, tile_bounds) - return - LOG.debug("fns[0]: %s fns[-1]: %s", fns[0], fns[-1]) - - def _reader(tidx, fn): - """Reader.""" - if fn is None: - return tidx, 0 - # PIL is not thread safe, so we need to use GDAL - imgdata = gdal.Open(fn, 0).ReadAsArray() - # sample out and then flip top to bottom! - imgdata = np.flipud(np.array(imgdata)[top:bottom, left:right]) - return tidx, imgdata + data["quorum"] = ts * 0.75 def _cb(args): - """write data.""" - tidx, pdata = args - data["precip"][:, :, tidx] = np.where( - pdata < 255, - pdata / a2m_divisor, - 0, - ) + """callback""" + tidx, grid = args + if grid is not None: + data["quorum"] -= 1 + data["precip"][:, :, tidx] = grid - LOG.debug("starting %s threads to read a2m", CPUCOUNT) - with ThreadPool(CPUCOUNT) as pool: - for tidx, fn in enumerate(fns): - # we ignore an hour for CDT->CST, meh - if tidx >= ts: - continue - pool.apply_async(_reader, (tidx, fn), callback=_cb) + tidx = 0 + shp = data["solar"].shape + with ThreadPool(4) as pool: + while now < tomorrow: + pool.apply_async( + _mrms_reader, + (now.astimezone(UTC), tidx, shp, tile_affine, a2m_divisor), + callback=_cb, + error_callback=LOG.error, + ) + now += timedelta(minutes=2) + tidx += 1 pool.close() pool.join() + if data["quorum"] > 0: + LOG.warning( + "Failed 75%% quorum with MRMS a2m %.1f, loading legacy", + data["quorum"], + ) + load_precip_legacy(data, dt, tile_affine) def bpstr(ts, accum): @@ -440,7 +441,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): continue # Need to initialize the breakpoint data if bp is None: - ts = ZEROHOUR + datetime.timedelta(minutes=(i * 2)) + ts = ZEROHOUR + timedelta(minutes=(i * 2)) bp = [bpstr(ts, 0)] accum += intensity lasti = i @@ -451,7 +452,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): if (i + 1) == len(ar): ts = ZEROHOUR.replace(hour=23, minute=59) else: - ts = ZEROHOUR + datetime.timedelta(minutes=((i + 1) * 2)) + ts = ZEROHOUR + timedelta(minutes=((i + 1) * 2)) bp.append(bpstr(ts, accum)) # To append a last accumulation, we need to have something meaningful # so to not have redundant values, which bombs WEPP @@ -459,7 +460,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): if (lasti + 1) == len(ar): ts = ZEROHOUR.replace(hour=23, minute=59) else: - ts = ZEROHOUR + datetime.timedelta(minutes=((lasti + 1) * 2)) + ts = ZEROHOUR + timedelta(minutes=((lasti + 1) * 2)) bp.append(bpstr(ts, accum)) return bp @@ -475,19 +476,18 @@ def write_grid(grid, valid, xtile, ytile, fnadd=""): np.save(f"{basedir}/{valid:%Y%m%d}{fnadd}.tile_{xtile}_{ytile}", grid) -def precip_workflow(data, valid, xtile, ytile, tile_bounds): +def precip_workflow(data, valid, tile_affine: Affine): """Drive the precipitation workflow""" - load_stage4(data, valid, xtile, ytile) + load_stage4(data, valid, tile_affine) # We have MRMS a2m RASTER files prior to 1 Jan 2015, but these files used # a very poor choice of data interval of 0.1mm, which is not large enough # to capture low intensity events. Files after 1 Jan 2015 used a better # 0.02mm resolution if valid.year < 2015: - load_precip_legacy(data, valid, tile_bounds) + load_precip_legacy(data, valid, tile_affine) else: - load_precip(data, valid, tile_bounds) - qc_precip(data, valid, xtile, ytile, tile_bounds) - write_grid(np.sum(data["precip"], 2), valid, xtile, ytile) + load_precip(data, valid, tile_affine) + qc_precip(data, valid, tile_affine) def edit_clifile(xidx, yidx, clifn, data, valid) -> bool: @@ -501,7 +501,7 @@ def edit_clifile(xidx, yidx, clifn, data, valid) -> bool: return False pos2 = clidata[pos:].find( - (valid + datetime.timedelta(days=1)).strftime("%-d\t%-m\t%Y") + (valid + timedelta(days=1)).strftime("%-d\t%-m\t%Y") ) if pos2 == -1: LOG.warning("Date2 find failure for %s", clifn) @@ -552,33 +552,21 @@ def edit_clifile(xidx, yidx, clifn, data, valid) -> bool: return True -def compute_tile_bounds(xtile, ytile, tilesize) -> BOUNDS: - """Return a BOUNDS namedtuple.""" - south = SOUTH + ytile * tilesize - west = WEST + xtile * tilesize - return BOUNDS( - south=south, - north=min([south + tilesize, NORTH]), - west=west, - east=min([west + tilesize, EAST]), - ) - - -def main(argv): +@click.command() +@click.option("--scenario", "-s", type=int, default=0, help="Scenario") +@click.option("--xtile", type=int, required=True, help="X Tile") +@click.option("--ytile", type=int, required=True, help="Y Tile") +@click.option( + "--date", "dt", type=click.DateTime(), required=True, help="Date" +) +def main(scenario, xtile, ytile, dt: datetime): """The workflow to get the weather data variables we want!""" - if len(argv) != 8: - print( - "Usage: python daily_climate_editor.py " - "
" - ) - return - xtile = int(argv[1]) - ytile = int(argv[2]) - tilesize = int(argv[3]) - scenario = int(argv[4]) - valid = datetime.date(int(argv[5]), int(argv[6]), int(argv[7])) + valid = dt.date() - tile_bounds = compute_tile_bounds(xtile, ytile, tilesize) + tile_bounds = compute_tile_bounds(xtile, ytile) + tile_affine = Affine( + 0.01, 0.0, tile_bounds.west, 0.0, 0.01, tile_bounds.south + ) LOG.info("bounds %s", tile_bounds) shp = ( int((tile_bounds.north - tile_bounds.south) * 100), @@ -586,31 +574,28 @@ def main(argv): ) data = {} for vname in "high low dwpt wind solar stage4".split(): - data[vname] = np.zeros(shp, np.float16) + data[vname] = np.zeros(shp, np.float32) # Optimize for continuous memory - data["precip"] = np.zeros((*shp, 30 * 24), np.float16) + data["precip"] = np.zeros((*shp, 30 * 24), np.float32) - # We could be outside conus, if so, just write out zeros + # We could be outside contiguous US, if so, just write out zeros if not check_has_clifiles(tile_bounds): LOG.info("Exiting as tile %s %s is outside CONUS", xtile, ytile) write_grid(np.sum(data["precip"], 2), valid, xtile, ytile) return - xaxis = np.arange(tile_bounds.west, tile_bounds.east, 0.01) - yaxis = np.arange(tile_bounds.south, tile_bounds.north, 0.01) - data["lon"], data["lat"] = np.meshgrid(xaxis, yaxis) - # 1. Max Temp C # 2. Min Temp C # 3. Radiation l/d # 4. wind mps # 6. Mean dewpoint C - with ncopen(iemre.get_daily_ncname(valid.year)) as nc: - load_iemre(nc, data, valid) + load_iemre(data, valid, tile_affine) # 5. wind direction (always zero) # 7. breakpoint precip mm - precip_workflow(data, valid, xtile, ytile, tile_bounds) + precip_workflow(data, valid, tile_affine) + write_grid(data["stage4"], valid, xtile, ytile, "stage4") + write_grid(np.sum(data["precip"], 2), valid, xtile, ytile) queue = [] for yidx in range(shp[0]): @@ -656,11 +641,14 @@ def _proxy(xidx, yidx, clifn, data, valid): _proxy, (xidx, yidx, clifn, data, valid), callback=_callback, + error_callback=LOG.error, ) pool.close() pool.join() + LOG.info("Finished with %s errors", errors["cnt"]) + if __name__ == "__main__": # Go Main Go - main(sys.argv) + main() diff --git a/scripts/cligen/proctor_tile_edit.py b/scripts/cligen/proctor_tile_edit.py index 2b0bc3b8..c7f26e10 100644 --- a/scripts/cligen/proctor_tile_edit.py +++ b/scripts/cligen/proctor_tile_edit.py @@ -7,12 +7,12 @@ import sys import time from concurrent.futures import ProcessPoolExecutor -from datetime import date +from datetime import date, datetime from multiprocessing import cpu_count import click import numpy as np -from pyiem.iemre import EAST, NORTH, SOUTH, WEST +from pyiem import iemre from pyiem.util import logger LOG = logger() @@ -26,12 +26,13 @@ def get_fn(dt: date): def assemble_grids(tilesz, dt: date): """Build back the grid from the tiles.""" - YS = int((NORTH - SOUTH) * 100.0) - XS = int((EAST - WEST) * 100.0) + dom = iemre.DOMAINS[""] + YS = int((dom["north"] - dom["south"]) * 100.0) + XS = int((dom["east"] - dom["west"]) * 100.0) res = np.zeros((YS, XS)) basedir = f"{DATADIR}/{dt.year}" - for i, _lon in enumerate(np.arange(WEST, EAST, tilesz)): - for j, _lat in enumerate(np.arange(SOUTH, NORTH, tilesz)): + for i, _lo in enumerate(np.arange(dom["west"], dom["east"], tilesz)): + for j, _la in enumerate(np.arange(dom["south"], dom["north"], tilesz)): fn = f"{basedir}/{dt:%Y%m%d}.tile_{i}_{j}.npy" if not os.path.isfile(fn): continue @@ -44,16 +45,16 @@ def assemble_grids(tilesz, dt: date): np.save(file=fh, arr=res) -def myjob(cmd): +def myjob(cmd: list): """Run this command and return any result.""" proc = subprocess.Popen( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = proc.communicate() if stdout != b"" or stderr != b"": LOG.warning( "CMD: %s\nSTDOUT: %s\nSTDERR: %s", - cmd, + " ".join(cmd), stdout.decode("ascii", "ignore").strip(), stderr.decode("ascii", "ignore").strip(), ) @@ -62,8 +63,20 @@ def myjob(cmd): @click.command() @click.option("--scenario", "-s", type=int, default=0, help="Scenario") -@click.option("--date", "dt", type=click.DateTime(), help="Date to process") -def main(scenario, dt): +@click.option( + "--date", + "dt", + type=click.DateTime(), + help="Date to process", + required=True, +) +@click.option( + "--domain", + type=str, + default="", + help="Domain to process", +) +def main(scenario, dt: datetime, domain: str): """Go Main Go.""" tilesz = 5 dt = dt.date() @@ -72,12 +85,17 @@ def main(scenario, dt): filets = os.stat(fn)[stat.ST_MTIME] LOG.warning("%s was last processed on %s", dt, time.ctime(filets)) jobs = [] - for i, _lon in enumerate(np.arange(WEST, EAST, tilesz)): - for j, _lat in enumerate(np.arange(SOUTH, NORTH, tilesz)): - cmd = ( - f"python daily_clifile_editor.py {i} {j} {tilesz} " - f"{scenario} {dt:%Y %m %d}" - ) + dom = iemre.DOMAINS[domain] + for i, _lo in enumerate(np.arange(dom["west"], dom["east"], tilesz)): + for j, _la in enumerate(np.arange(dom["south"], dom["north"], tilesz)): + cmd = [ + "python", + "daily_clifile_editor.py", + f"--xtile={i}", + f"--ytile={j}", + f"--scenario={scenario}", + f"--date={dt:%Y-%m-%d}", + ] jobs.append(cmd) failed = False # 12 Nov 2021 audit shows per process usage in the 2-3 GB range diff --git a/src/pydep/io/cli.py b/src/pydep/io/cli.py index 008383f5..de8b5405 100644 --- a/src/pydep/io/cli.py +++ b/src/pydep/io/cli.py @@ -1,6 +1,40 @@ """DEP/WEPP Climate File Helpers.""" # pylint: disable=too-many-arguments +import os +from collections import namedtuple + +import numpy as np +from pyiem import iemre + +# Definition of some bounds for processing +BOUNDS = namedtuple("Bounds", ["south", "north", "east", "west"]) +# Climate File Tile Size (in degrees) +CLIMATE_TILE_SIZE = 5 + + +def check_has_clifiles(bounds: BOUNDS): + """Check that a directory exists, which likely indicates clifiles exist.""" + for lon in np.arange(bounds.west, bounds.east, 1): + for lat in np.arange(bounds.south, bounds.north, 1): + ckdir = f"/i/0/cli/{(0 - lon):03.0f}x{(lat):03.0f}" + if os.path.isdir(ckdir): + return True + return False + + +def compute_tile_bounds(xtile, ytile, domain="") -> BOUNDS: + """Return a BOUNDS namedtuple.""" + dom = iemre.DOMAINS[domain] + south = dom["south"] + ytile * CLIMATE_TILE_SIZE + west = dom["west"] + xtile * CLIMATE_TILE_SIZE + return BOUNDS( + south=south, + north=min([south + CLIMATE_TILE_SIZE, dom["north"]]), + west=west, + east=min([west + CLIMATE_TILE_SIZE, dom["east"]]), + ) + def daily_formatter(valid, bpdata, high, low, solar, wind, dwpt) -> str: """Generate string formatting the given data. diff --git a/tests/io/test_cli.py b/tests/io/test_cli.py index 67d850e8..00419a0a 100644 --- a/tests/io/test_cli.py +++ b/tests/io/test_cli.py @@ -2,7 +2,26 @@ from datetime import date -from pydep.io.cli import daily_formatter +from pydep.io.cli import ( + BOUNDS, + check_has_clifiles, + compute_tile_bounds, + daily_formatter, +) + + +def test_compute_bounds(): + """Test that we compute bounds properly.""" + res = compute_tile_bounds(1, 1, domain="") + assert res == BOUNDS(south=28, north=33, east=-116, west=-121) + + +def test_has_clifiles(): + """Test our check for clifiles.""" + bnds = BOUNDS(south=41, north=43, east=-99, west=-101) + assert check_has_clifiles(bnds) + bnds = BOUNDS(south=11, north=13, east=-97, west=-98) + assert not check_has_clifiles(bnds) def test_daily_formatter(): From e7a132e8a3b5c0a882d75762016452860da62b57 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:52:27 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/RT/env2csv.py | 5 +++-- scripts/RT/env2database.py | 5 +++-- scripts/RT/harvest2database.py | 3 ++- scripts/RT/harvest_wb.py | 3 ++- scripts/RT/proctor_sweep.py | 3 ++- scripts/RT/scenario_compare_event.py | 3 ++- scripts/cligen/add_clifile.py | 3 ++- scripts/cligen/assign_climate_file.py | 3 ++- scripts/cligen/edit_cli_header.py | 3 ++- scripts/cligen/init_clifiles.py | 3 ++- scripts/cligen/locate_clifile.py | 3 ++- scripts/cligen/qc_summarize.py | 3 ++- scripts/cligen/r_factor.py | 3 ++- scripts/climatechange/addstorms_cli.py | 3 ++- scripts/climatechange/dump_results.py | 3 ++- scripts/dynamic_tillage/bootstrap_year.py | 3 ++- scripts/dynamic_tillage/compute_smstate.py | 3 ++- scripts/gridorder/flowpathlength_totals.py | 3 ++- scripts/gridorder/list_flowpath_delivery.py | 1 + scripts/gridorder2/dumper.py | 3 ++- scripts/gridorder2/strips_dumper.py | 3 ++- scripts/import/flowpath2prj.py | 5 +++-- scripts/import/flowpath_importer.py | 3 ++- scripts/plots/plot_wb.py | 3 ++- scripts/plots/plot_wb_snow.py | 3 ++- scripts/plots/scenario_slope_q_q.py | 3 ++- scripts/plots/slope_histogram.py | 1 + scripts/switchgrass/modify_for_switchgrass.py | 3 ++- scripts/tillage/flowpath2prj.py | 3 ++- scripts/tillage_timing/dump_dataset1.py | 3 ++- scripts/tillage_timing/dump_mar10_tab1.py | 3 ++- scripts/tillage_timing/dump_mar10_tab2.py | 3 ++- scripts/tillage_timing/dump_mar10_tab3.py | 3 ++- scripts/tillage_timing/dump_mar10_tab4.py | 3 ++- scripts/tillage_timing/dynamic_tillage_mod_rot.py | 3 ++- scripts/tillage_timing/plot_crop.py | 3 ++- scripts/tillage_timing/plot_dailydeltas.py | 3 ++- scripts/tillage_timing/plot_sm_freq.py | 3 ++- scripts/util/compare_scenarios_1huc12.py | 3 ++- scripts/util/compute_wilting_capacity.py | 3 ++- scripts/util/custom_datum_summary.py | 3 ++- scripts/util/diagnose_huc12.py | 3 ++- scripts/util/dump_ofe_results.py | 3 ++- scripts/util/dump_wb.py | 1 + scripts/util/dump_wb_one.py | 1 + scripts/util/huc12_summary.py | 3 ++- scripts/util/make_dirs.py | 3 ++- tests/test_util.py | 3 ++- 48 files changed, 95 insertions(+), 47 deletions(-) diff --git a/scripts/RT/env2csv.py b/scripts/RT/env2csv.py index 2aa51588..069743b4 100644 --- a/scripts/RT/env2csv.py +++ b/scripts/RT/env2csv.py @@ -12,13 +12,14 @@ import numpy as np import pandas as pd from affine import Affine -from pydep.io.wepp import read_env -from pydep.util import load_scenarios from pyiem.iemre import NORTH, WEST from pyiem.util import get_dbconn, logger from rasterstats import zonal_stats from tqdm import tqdm +from pydep.io.wepp import read_env +from pydep.util import load_scenarios + LOG = logger() PRECIP_AFF = Affine(0.01, 0.0, WEST, 0.0, -0.01, NORTH) CONFIG = {"subset": False} diff --git a/scripts/RT/env2database.py b/scripts/RT/env2database.py index 91d4c712..7bd6b0f6 100644 --- a/scripts/RT/env2database.py +++ b/scripts/RT/env2database.py @@ -19,13 +19,14 @@ import numpy as np import pandas as pd from affine import Affine -from pydep.io.dep import read_env -from pydep.util import load_scenarios from pyiem.grid.zs import CachingZonalStats from pyiem.iemre import NORTH, WEST from pyiem.util import get_dbconn, get_dbconnstr, logger from tqdm import tqdm +from pydep.io.dep import read_env +from pydep.util import load_scenarios + LOG = logger() PRECIP_AFF = Affine(0.01, 0.0, WEST, 0.0, -0.01, NORTH) CONFIG = {"subset": False} diff --git a/scripts/RT/harvest2database.py b/scripts/RT/harvest2database.py index 0c20bf2d..0167f97b 100644 --- a/scripts/RT/harvest2database.py +++ b/scripts/RT/harvest2database.py @@ -9,10 +9,11 @@ # third party import pandas as pd -from pydep.io.wepp import read_yld from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_yld + def readfile(huc12, filename): """Make me a df please""" diff --git a/scripts/RT/harvest_wb.py b/scripts/RT/harvest_wb.py index 2e57b7e3..ead40ecc 100644 --- a/scripts/RT/harvest_wb.py +++ b/scripts/RT/harvest_wb.py @@ -9,12 +9,13 @@ import numpy as np import pandas as pd from affine import Affine -from pydep.io.dep import read_wb from pyiem.iemre import NORTH, WEST from pyiem.util import get_dbconn from rasterstats import zonal_stats from tqdm import tqdm +from pydep.io.dep import read_wb + PRECIP_AFF = Affine(0.01, 0.0, WEST, 0.0, -0.01, NORTH) diff --git a/scripts/RT/proctor_sweep.py b/scripts/RT/proctor_sweep.py index 6dedd804..613f7f53 100644 --- a/scripts/RT/proctor_sweep.py +++ b/scripts/RT/proctor_sweep.py @@ -29,12 +29,13 @@ import numpy as np import pandas as pd import requests -from pydep.io.man import man2df, read_man from pyiem.database import get_sqlalchemy_conn from pyiem.util import logger from sqlalchemy import text from tqdm import tqdm +from pydep.io.man import man2df, read_man + HUC12S = ["090201081101", "090201081102", "090201060605"] LOG = logger() IEMRE = "http://mesonet.agron.iastate.edu/iemre/hourly" diff --git a/scripts/RT/scenario_compare_event.py b/scripts/RT/scenario_compare_event.py index e5f77806..70b0c449 100644 --- a/scripts/RT/scenario_compare_event.py +++ b/scripts/RT/scenario_compare_event.py @@ -4,9 +4,10 @@ import os import pandas as pd -from pydep.io.wepp import read_env from pyiem.plot.use_agg import plt +from pydep.io.wepp import read_env + def main(): """Go Main Go.""" diff --git a/scripts/cligen/add_clifile.py b/scripts/cligen/add_clifile.py index 39e086f8..941b2d9c 100644 --- a/scripts/cligen/add_clifile.py +++ b/scripts/cligen/add_clifile.py @@ -11,9 +11,10 @@ import pandas as pd import requests +from pyiem.util import convert_value, logger + from pydep.io.wepp import read_cli from pydep.util import get_cli_fname -from pyiem.util import convert_value, logger LOG = logger() diff --git a/scripts/cligen/assign_climate_file.py b/scripts/cligen/assign_climate_file.py index e1a57f76..54ef2eed 100644 --- a/scripts/cligen/assign_climate_file.py +++ b/scripts/cligen/assign_climate_file.py @@ -2,9 +2,10 @@ import sys -from pydep.util import get_cli_fname from pyiem.util import get_dbconn, logger +from pydep.util import get_cli_fname + LOG = logger() diff --git a/scripts/cligen/edit_cli_header.py b/scripts/cligen/edit_cli_header.py index ffef13a4..dbf806a6 100644 --- a/scripts/cligen/edit_cli_header.py +++ b/scripts/cligen/edit_cli_header.py @@ -7,11 +7,12 @@ # Third Party import numpy as np import pandas as pd -from pydep.util import get_cli_fname from pyiem.iemre import EAST, NORTH, SOUTH, WEST, find_ij, get_dailyc_ncname from pyiem.util import convert_value, logger, ncopen from tqdm import tqdm +from pydep.util import get_cli_fname + LOG = logger() # Untracked here, but this version was used to fix the header column labels REV = "HeaderRev: v20230605.1" diff --git a/scripts/cligen/init_clifiles.py b/scripts/cligen/init_clifiles.py index 66abe528..e670fe7d 100644 --- a/scripts/cligen/init_clifiles.py +++ b/scripts/cligen/init_clifiles.py @@ -9,10 +9,11 @@ import subprocess import numpy as np -from pydep.util import get_cli_fname from pyiem.iemre import EAST, NORTH, SOUTH, WEST from pyiem.util import get_dbconn +from pydep.util import get_cli_fname + def finder(lon, lat, clscenario): """The logic of finding a file.""" diff --git a/scripts/cligen/locate_clifile.py b/scripts/cligen/locate_clifile.py index 3a04d806..2d9349b7 100644 --- a/scripts/cligen/locate_clifile.py +++ b/scripts/cligen/locate_clifile.py @@ -4,9 +4,10 @@ import subprocess import sys -from pydep.util import get_cli_fname from pyiem.util import get_dbconn, logger +from pydep.util import get_cli_fname + LOG = logger() diff --git a/scripts/cligen/qc_summarize.py b/scripts/cligen/qc_summarize.py index 59602cea..016fdc03 100644 --- a/scripts/cligen/qc_summarize.py +++ b/scripts/cligen/qc_summarize.py @@ -8,10 +8,11 @@ import pandas as pd import pytz import requests -from pydep.io.wepp import read_cli from pyiem.iemre import hourly_offset from pyiem.util import c2f, mm2inch +from pydep.io.wepp import read_cli + LDATE = (datetime.date.today() - datetime.timedelta(days=1)).strftime( "%Y-%m-%d" ) diff --git a/scripts/cligen/r_factor.py b/scripts/cligen/r_factor.py index 959825a8..86a12f97 100644 --- a/scripts/cligen/r_factor.py +++ b/scripts/cligen/r_factor.py @@ -6,12 +6,13 @@ import numpy as np import pandas as pd from matplotlib.patches import Polygon -from pydep.io.wepp import read_cli from pyiem.plot import MapPlot from pyiem.plot.use_agg import plt from pyiem.util import get_sqlalchemy_conn from tqdm import tqdm +from pydep.io.wepp import read_cli + def plot(): """Plot.""" diff --git a/scripts/climatechange/addstorms_cli.py b/scripts/climatechange/addstorms_cli.py index ecebcb1f..0ea79ae7 100644 --- a/scripts/climatechange/addstorms_cli.py +++ b/scripts/climatechange/addstorms_cli.py @@ -5,10 +5,11 @@ from datetime import date from pandas.io.sql import read_sql -from pydep.io.wepp import read_cli from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_cli + MYHUCS = [x.strip() for x in open("myhucs.txt")] diff --git a/scripts/climatechange/dump_results.py b/scripts/climatechange/dump_results.py index aab7f043..c8acd9b5 100644 --- a/scripts/climatechange/dump_results.py +++ b/scripts/climatechange/dump_results.py @@ -4,10 +4,11 @@ import sys import numpy as np -from pydep.io.wepp import read_env from pyiem.util import get_dbconn, logger from tqdm import tqdm +from pydep.io.wepp import read_env + LOG = logger() diff --git a/scripts/dynamic_tillage/bootstrap_year.py b/scripts/dynamic_tillage/bootstrap_year.py index ebff82e7..77dc8ee1 100644 --- a/scripts/dynamic_tillage/bootstrap_year.py +++ b/scripts/dynamic_tillage/bootstrap_year.py @@ -4,11 +4,12 @@ import click from pandas.io.sql import read_sql -from pydep.tillage import make_tillage from pyiem.database import get_dbconnc, get_sqlalchemy_conn from pyiem.util import logger from sqlalchemy import text +from pydep.tillage import make_tillage + LOG = logger() diff --git a/scripts/dynamic_tillage/compute_smstate.py b/scripts/dynamic_tillage/compute_smstate.py index 93cb735a..fa2dc1bd 100644 --- a/scripts/dynamic_tillage/compute_smstate.py +++ b/scripts/dynamic_tillage/compute_smstate.py @@ -16,12 +16,13 @@ import click import geopandas as gpd import pandas as pd -from pydep.io.dep import read_wb from pyiem.database import get_sqlalchemy_conn from pyiem.util import logger from sqlalchemy import text from tqdm import tqdm +from pydep.io.dep import read_wb + LOG = logger() diff --git a/scripts/gridorder/flowpathlength_totals.py b/scripts/gridorder/flowpathlength_totals.py index c0417e1b..a1869753 100644 --- a/scripts/gridorder/flowpathlength_totals.py +++ b/scripts/gridorder/flowpathlength_totals.py @@ -6,10 +6,11 @@ import pandas as pd import psycopg -from pydep.io.wepp import read_env from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_env + def find_huc12s(): """yield a listing of huc12s with output!""" diff --git a/scripts/gridorder/list_flowpath_delivery.py b/scripts/gridorder/list_flowpath_delivery.py index 23fc1c24..8bee0e33 100644 --- a/scripts/gridorder/list_flowpath_delivery.py +++ b/scripts/gridorder/list_flowpath_delivery.py @@ -6,6 +6,7 @@ import pandas as pd import psycopg from pandas.io.sql import read_sql + from pydep.io.wepp import read_env SCEN2CODE = [None, 12, 13, 14, 0, 15, 16] diff --git a/scripts/gridorder2/dumper.py b/scripts/gridorder2/dumper.py index 36dee556..e2ecefb3 100644 --- a/scripts/gridorder2/dumper.py +++ b/scripts/gridorder2/dumper.py @@ -1,10 +1,11 @@ """Generate the requested output.""" import pandas as pd -from pydep.io.wepp import read_env from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_env + HUCS = ( "102400090102 102300020307 102400020604 102801020801 071000040902 " "070600040608 070802040604 070802090402" diff --git a/scripts/gridorder2/strips_dumper.py b/scripts/gridorder2/strips_dumper.py index e090ea6c..b5b831c8 100644 --- a/scripts/gridorder2/strips_dumper.py +++ b/scripts/gridorder2/strips_dumper.py @@ -2,10 +2,11 @@ import pandas as pd from pandas.io.sql import read_sql -from pydep.io.wepp import read_env from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_env + HUCS = ( "070801050307 070801050702 071000070104 102300030203 102400030406 " "102400030602 102802010203" diff --git a/scripts/import/flowpath2prj.py b/scripts/import/flowpath2prj.py index d30865a0..561ea016 100644 --- a/scripts/import/flowpath2prj.py +++ b/scripts/import/flowpath2prj.py @@ -38,13 +38,14 @@ from math import atan2, degrees, pi import pandas as pd -from pydep.tillage import make_tillage -from pydep.util import load_scenarios from pyiem.database import get_dbconn, get_sqlalchemy_conn from pyiem.util import logger from sqlalchemy import text from tqdm import tqdm +from pydep.tillage import make_tillage +from pydep.util import load_scenarios + LOG = logger() MISSED_SOILS = {} YEARS = 2024 - 2006 diff --git a/scripts/import/flowpath_importer.py b/scripts/import/flowpath_importer.py index ea383622..66f4ae0b 100644 --- a/scripts/import/flowpath_importer.py +++ b/scripts/import/flowpath_importer.py @@ -17,12 +17,13 @@ import numpy as np import pandas as pd import pyproj -from pydep.util import clear_huc12data, get_cli_fname from pyiem.database import get_dbconn from pyiem.util import logger from shapely.geometry import LineString from tqdm import tqdm +from pydep.util import clear_huc12data, get_cli_fname + LOG = logger() print(" * BE CAREFUL! The GeoJSON files may not be 5070, but 26915") print(" * VERIFY that the GeoJSON is the 5070 grid value") diff --git a/scripts/plots/plot_wb.py b/scripts/plots/plot_wb.py index ca5858af..cc1d6d1f 100644 --- a/scripts/plots/plot_wb.py +++ b/scripts/plots/plot_wb.py @@ -6,10 +6,11 @@ import pandas as pd import seaborn as sns -from pydep.io.dep import read_wb from pyiem.plot.use_agg import plt from tqdm import tqdm +from pydep.io.dep import read_wb + def main(argv): """Do things""" diff --git a/scripts/plots/plot_wb_snow.py b/scripts/plots/plot_wb_snow.py index bfb103ed..d7a30488 100644 --- a/scripts/plots/plot_wb_snow.py +++ b/scripts/plots/plot_wb_snow.py @@ -4,9 +4,10 @@ import pandas as pd import seaborn as sns -from pydep.io.dep import read_wb from pyiem.util import get_dbconnstr +from pydep.io.dep import read_wb + def main(argv): """Do things""" diff --git a/scripts/plots/scenario_slope_q_q.py b/scripts/plots/scenario_slope_q_q.py index 495da5de..ec50aba7 100644 --- a/scripts/plots/scenario_slope_q_q.py +++ b/scripts/plots/scenario_slope_q_q.py @@ -7,9 +7,10 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from pydep.io.wepp import read_slp from pyiem.util import utc +from pydep.io.wepp import read_slp + def read_data(): """Do intensive stuff""" diff --git a/scripts/plots/slope_histogram.py b/scripts/plots/slope_histogram.py index 079b2f2f..b3bd9e43 100644 --- a/scripts/plots/slope_histogram.py +++ b/scripts/plots/slope_histogram.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd + from pydep.io.wepp import read_slp diff --git a/scripts/switchgrass/modify_for_switchgrass.py b/scripts/switchgrass/modify_for_switchgrass.py index c7dbd8fd..997507a4 100644 --- a/scripts/switchgrass/modify_for_switchgrass.py +++ b/scripts/switchgrass/modify_for_switchgrass.py @@ -7,9 +7,10 @@ import os import sys -from pydep.io.wepp import read_slp from tqdm import tqdm +from pydep.io.wepp import read_slp + def main(argv): """Go Main Go""" diff --git a/scripts/tillage/flowpath2prj.py b/scripts/tillage/flowpath2prj.py index 56b5ef1b..f374dff4 100644 --- a/scripts/tillage/flowpath2prj.py +++ b/scripts/tillage/flowpath2prj.py @@ -7,10 +7,11 @@ from math import atan2, degrees, pi from psycopg.rows import dict_row -from pydep.util import get_cli_fname from pyiem.database import get_dbconn from tqdm import tqdm +from pydep.util import get_cli_fname + SCENARIO = int(sys.argv[1]) TILLAGE_CLASS = int(sys.argv[2]) MISSED_SOILS = {} diff --git a/scripts/tillage_timing/dump_dataset1.py b/scripts/tillage_timing/dump_dataset1.py index 4416fa58..7b4890fd 100644 --- a/scripts/tillage_timing/dump_dataset1.py +++ b/scripts/tillage_timing/dump_dataset1.py @@ -20,9 +20,10 @@ import glob import pandas as pd +from pyiem.util import logger + from pydep.io.dep import read_wb from pydep.io.wepp import read_env -from pyiem.util import logger LOG = logger() diff --git a/scripts/tillage_timing/dump_mar10_tab1.py b/scripts/tillage_timing/dump_mar10_tab1.py index 78e0e1af..89e23f87 100644 --- a/scripts/tillage_timing/dump_mar10_tab1.py +++ b/scripts/tillage_timing/dump_mar10_tab1.py @@ -4,9 +4,10 @@ import glob import pandas as pd -from pydep.io.wepp import read_env from pyiem.util import logger +from pydep.io.wepp import read_env + LOG = logger() diff --git a/scripts/tillage_timing/dump_mar10_tab2.py b/scripts/tillage_timing/dump_mar10_tab2.py index 5968939c..8352013a 100644 --- a/scripts/tillage_timing/dump_mar10_tab2.py +++ b/scripts/tillage_timing/dump_mar10_tab2.py @@ -4,9 +4,10 @@ import glob import pandas as pd +from pyiem.util import logger + from pydep.io.dep import read_wb from pydep.io.wepp import read_env -from pyiem.util import logger LOG = logger() diff --git a/scripts/tillage_timing/dump_mar10_tab3.py b/scripts/tillage_timing/dump_mar10_tab3.py index 17d1c5c8..22abf674 100644 --- a/scripts/tillage_timing/dump_mar10_tab3.py +++ b/scripts/tillage_timing/dump_mar10_tab3.py @@ -3,9 +3,10 @@ import glob import pandas as pd -from pydep.io.wepp import read_env from pyiem.util import logger +from pydep.io.wepp import read_env + LOG = logger() diff --git a/scripts/tillage_timing/dump_mar10_tab4.py b/scripts/tillage_timing/dump_mar10_tab4.py index 90d653d8..62c536ce 100644 --- a/scripts/tillage_timing/dump_mar10_tab4.py +++ b/scripts/tillage_timing/dump_mar10_tab4.py @@ -3,9 +3,10 @@ import glob import pandas as pd +from pyiem.util import logger + from pydep.io.dep import read_wb from pydep.io.wepp import read_env -from pyiem.util import logger LOG = logger() diff --git a/scripts/tillage_timing/dynamic_tillage_mod_rot.py b/scripts/tillage_timing/dynamic_tillage_mod_rot.py index 0945359e..944e9e15 100644 --- a/scripts/tillage_timing/dynamic_tillage_mod_rot.py +++ b/scripts/tillage_timing/dynamic_tillage_mod_rot.py @@ -5,10 +5,11 @@ import pandas as pd from pandas.io.sql import read_sql -from pydep.io.dep import read_wb from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.dep import read_wb + APR15 = pd.Timestamp(year=2018, month=4, day=15) MAY30 = pd.Timestamp(year=2018, month=5, day=30) THRESHOLDS = {81: 45, 82: 40, 83: 35, 84: 30, 85: 25, 86: 20, 87: 15} diff --git a/scripts/tillage_timing/plot_crop.py b/scripts/tillage_timing/plot_crop.py index 774432e0..923cbc2b 100644 --- a/scripts/tillage_timing/plot_crop.py +++ b/scripts/tillage_timing/plot_crop.py @@ -4,9 +4,10 @@ import sys import matplotlib.dates as mdates -from pydep.io.wepp import read_crop from pyiem.plot.use_agg import plt +from pydep.io.wepp import read_crop + def main(argv): """Go Main Go.""" diff --git a/scripts/tillage_timing/plot_dailydeltas.py b/scripts/tillage_timing/plot_dailydeltas.py index 72cc7e3b..f61cbb13 100644 --- a/scripts/tillage_timing/plot_dailydeltas.py +++ b/scripts/tillage_timing/plot_dailydeltas.py @@ -4,9 +4,10 @@ import sys import matplotlib.dates as mdates -from pydep.io.wepp import read_env from pyiem.plot.use_agg import plt +from pydep.io.wepp import read_env + def main(argv): """Go Main Go.""" diff --git a/scripts/tillage_timing/plot_sm_freq.py b/scripts/tillage_timing/plot_sm_freq.py index a7719000..e7ef52e7 100644 --- a/scripts/tillage_timing/plot_sm_freq.py +++ b/scripts/tillage_timing/plot_sm_freq.py @@ -3,9 +3,10 @@ import calendar import sys -from pydep.io.dep import read_wb from pyiem.plot.use_agg import plt +from pydep.io.dep import read_wb + def main(argv): """Get the tillage date.""" diff --git a/scripts/util/compare_scenarios_1huc12.py b/scripts/util/compare_scenarios_1huc12.py index 76e1a545..bb2dbbd5 100644 --- a/scripts/util/compare_scenarios_1huc12.py +++ b/scripts/util/compare_scenarios_1huc12.py @@ -4,10 +4,11 @@ import numpy as np import pandas as pd -from pydep.io.wepp import read_env from pyiem.plot.use_agg import plt from tqdm import tqdm +from pydep.io.wepp import read_env + MYHUCS = [ "070600060701", "070801020905", diff --git a/scripts/util/compute_wilting_capacity.py b/scripts/util/compute_wilting_capacity.py index fdb20016..d880580c 100644 --- a/scripts/util/compute_wilting_capacity.py +++ b/scripts/util/compute_wilting_capacity.py @@ -7,9 +7,10 @@ import sys import pandas as pd -from pydep.io.dep import read_wb from pyiem.util import get_dbconn, get_sqlalchemy_conn, logger +from pydep.io.dep import read_wb + LOG = logger() diff --git a/scripts/util/custom_datum_summary.py b/scripts/util/custom_datum_summary.py index d0eba1d7..018d7e9d 100644 --- a/scripts/util/custom_datum_summary.py +++ b/scripts/util/custom_datum_summary.py @@ -5,10 +5,11 @@ import pandas as pd from geopandas import read_file from pandas.io.sql import read_sql -from pydep.io.wepp import read_env from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_env + def main(): """Go Main Go.""" diff --git a/scripts/util/diagnose_huc12.py b/scripts/util/diagnose_huc12.py index 47317be6..88770dba 100644 --- a/scripts/util/diagnose_huc12.py +++ b/scripts/util/diagnose_huc12.py @@ -6,9 +6,10 @@ import pandas as pd from pandas.io.sql import read_sql -from pydep.io.wepp import read_env from pyiem.util import get_dbconn +from pydep.io.wepp import read_env + YEARS = datetime.date.today().year - 2007 + 1 CONV = 4.463 diff --git a/scripts/util/dump_ofe_results.py b/scripts/util/dump_ofe_results.py index ca49c40b..70a7fe5c 100644 --- a/scripts/util/dump_ofe_results.py +++ b/scripts/util/dump_ofe_results.py @@ -7,11 +7,12 @@ import click import pandas as pd -from pydep.io.wepp import read_env, read_ofe from pyiem.util import get_dbconn, get_sqlalchemy_conn from sqlalchemy import text from tqdm import tqdm +from pydep.io.wepp import read_env, read_ofe + # 2007 is skipped YEARS = 6.0 diff --git a/scripts/util/dump_wb.py b/scripts/util/dump_wb.py index fb647fbf..d7b552e9 100644 --- a/scripts/util/dump_wb.py +++ b/scripts/util/dump_wb.py @@ -4,6 +4,7 @@ import os import pandas as pd + from pydep.io.dep import read_wb diff --git a/scripts/util/dump_wb_one.py b/scripts/util/dump_wb_one.py index 6e9ad10a..389da19a 100644 --- a/scripts/util/dump_wb_one.py +++ b/scripts/util/dump_wb_one.py @@ -6,6 +6,7 @@ import os import pandas as pd + from pydep.io.dep import read_wb diff --git a/scripts/util/huc12_summary.py b/scripts/util/huc12_summary.py index b1147571..6d9f721f 100644 --- a/scripts/util/huc12_summary.py +++ b/scripts/util/huc12_summary.py @@ -4,10 +4,11 @@ import pandas as pd from pandas.io.sql import read_sql -from pydep.io.wepp import read_env from pyiem.util import get_dbconn from tqdm import tqdm +from pydep.io.wepp import read_env + def main(): """Go Main Go.""" diff --git a/scripts/util/make_dirs.py b/scripts/util/make_dirs.py index 80c56b78..256c8b49 100644 --- a/scripts/util/make_dirs.py +++ b/scripts/util/make_dirs.py @@ -3,10 +3,11 @@ import os import click -from pydep.util import load_scenarios from pyiem.database import get_dbconn from pyiem.util import logger +from pydep.util import load_scenarios + LOG = logger() PREFIXES = ( "crop env man prj run slp sol wb error ofe yld rot grph out " diff --git a/tests/test_util.py b/tests/test_util.py index 3e016ed5..a837b5a2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,9 +1,10 @@ """test pydep.util""" import pytest -from pydep import util from pyiem.database import get_dbconnc +from pydep import util + def test_clear_huc12data(): """Can we clear data?""" From f530a40350621ffe746a72fb0f92a2003f69c7e7 Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 11:57:34 -0500 Subject: [PATCH 5/7] mnt: address lint --- conda_requirements.txt | 2 -- pip_requirements.txt | 3 ++- scripts/cligen/daily_clifile_editor.py | 6 +++--- scripts/tillage/flowpath2prj.py | 4 ---- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/conda_requirements.txt b/conda_requirements.txt index 8c778a14..c48dfdb2 100644 --- a/conda_requirements.txt +++ b/conda_requirements.txt @@ -5,8 +5,6 @@ pandas # image work pillow psycopg -# my hacky -pyiem # Fast geojson reading pyogrio # For testing diff --git a/pip_requirements.txt b/pip_requirements.txt index 6305ed30..5c00ee9a 100644 --- a/pip_requirements.txt +++ b/pip_requirements.txt @@ -1 +1,2 @@ -# none +# bad daryl +git+https://github.com/akrherz/pyIEM.git diff --git a/scripts/cligen/daily_clifile_editor.py b/scripts/cligen/daily_clifile_editor.py index f8852ada..e9f51d58 100644 --- a/scripts/cligen/daily_clifile_editor.py +++ b/scripts/cligen/daily_clifile_editor.py @@ -441,7 +441,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): continue # Need to initialize the breakpoint data if bp is None: - ts = ZEROHOUR + timedelta(minutes=(i * 2)) + ts = ZEROHOUR + timedelta(minutes=i * 2) bp = [bpstr(ts, 0)] accum += intensity lasti = i @@ -452,7 +452,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): if (i + 1) == len(ar): ts = ZEROHOUR.replace(hour=23, minute=59) else: - ts = ZEROHOUR + timedelta(minutes=((i + 1) * 2)) + ts = ZEROHOUR + timedelta(minutes=(i + 1) * 2) bp.append(bpstr(ts, accum)) # To append a last accumulation, we need to have something meaningful # so to not have redundant values, which bombs WEPP @@ -460,7 +460,7 @@ def compute_breakpoint(ar, accumThreshold=2.0, intensityThreshold=1.0): if (lasti + 1) == len(ar): ts = ZEROHOUR.replace(hour=23, minute=59) else: - ts = ZEROHOUR + timedelta(minutes=((lasti + 1) * 2)) + ts = ZEROHOUR + timedelta(minutes=(lasti + 1) * 2) bp.append(bpstr(ts, accum)) return bp diff --git a/scripts/tillage/flowpath2prj.py b/scripts/tillage/flowpath2prj.py index f374dff4..9b2a5dd3 100644 --- a/scripts/tillage/flowpath2prj.py +++ b/scripts/tillage/flowpath2prj.py @@ -177,8 +177,6 @@ def do_flowpath(zone, huc_12, fid, fpath): maxmanagement = row["management"] if row["slope"] < 0.00001: row["slope"] = 0.00001 - # hard coded... - # row['slope'] = slope rows.append(row) if x is None: @@ -200,7 +198,6 @@ def do_flowpath(zone, huc_12, fid, fpath): "%s %3i %4.1f %5.1f %5.1f" % (huc_12, fpath, maxslope, rows[-1]["length"], s) ) - # SLP.write("%s,%.6f\n" % (fid, maxslope)) if rows[-1]["length"] < 1: print("%s,%s has zero length, deleting" % (huc_12, fpath)) @@ -216,7 +213,6 @@ def do_flowpath(zone, huc_12, fid, fpath): (res["clifile"], fpath, huc_12, SCENARIO), ) - # return res["huc8"] = huc_12[:8] res["huc12"] = huc_12 res["envfn"] = "/i/%s/env/%s/%s_%s.env" % ( From 6f4c2f256f3e4272627174fd0c80bd8c93cb5517 Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 12:00:51 -0500 Subject: [PATCH 6/7] ci: remove unused requirements --- conda_requirements.txt | 21 --------------------- environment.yml | 2 -- 2 files changed, 23 deletions(-) delete mode 100644 conda_requirements.txt diff --git a/conda_requirements.txt b/conda_requirements.txt deleted file mode 100644 index c48dfdb2..00000000 --- a/conda_requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -coverage -pyshp -tqdm -pandas -# image work -pillow -psycopg -# Fast geojson reading -pyogrio -# For testing -pytest -pytest-cov -pytest-runner -# interpolation -scipy -# GIS work -shapely -sqlalchemy -# daily auto-tweeter -tweepy -reportlab diff --git a/environment.yml b/environment.yml index 9d7762c0..5969b67f 100644 --- a/environment.yml +++ b/environment.yml @@ -8,8 +8,6 @@ dependencies: # image work - pillow - psycopg - # my hacky - - pyiem # Fast geojson reading - pyogrio # For testing From 88a38a40ac708f070f273edf1e0a6f11362b5d46 Mon Sep 17 00:00:00 2001 From: akrherz Date: Mon, 19 Aug 2024 12:06:31 -0500 Subject: [PATCH 7/7] ci: add pip install step --- .github/workflows/pydep.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/pydep.yml b/.github/workflows/pydep.yml index 077d8a08..9e134c20 100644 --- a/.github/workflows/pydep.yml +++ b/.github/workflows/pydep.yml @@ -40,6 +40,9 @@ jobs: environment-name: prod cache-environment: true + - name: Install Python Dependencies via pip + run: python -m pip install -r pip_requirements.txt + - name: Setup paths run: sh .github/setuppaths.sh