From 478d5ec8071546e2dbf7f06763e8b2deb1681236 Mon Sep 17 00:00:00 2001
From: akrherz <akrherz@iastate.edu>
Date: Fri, 5 Feb 2021 10:39:25 -0600
Subject: [PATCH] some gridorder2 work updates #80

---
 scripts/gridorder2/flowpath_importer.py | 54 ++++++++++++++++++++++---
 scripts/gridorder2/myhucs.txt           |  9 +++++
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/scripts/gridorder2/flowpath_importer.py b/scripts/gridorder2/flowpath_importer.py
index d8795c32..9a1fe5c1 100644
--- a/scripts/gridorder2/flowpath_importer.py
+++ b/scripts/gridorder2/flowpath_importer.py
@@ -14,6 +14,7 @@
 
 import geopandas as gpd
 import pandas as pd
+import numpy as np
 from pyiem.util import get_dbconn, logger
 
 LOG = logger()
@@ -63,6 +64,46 @@ def get_flowpath(cursor, huc12, fpath):
     return cursor.fetchone()[0]
 
 
+def run_checks(df):
+    """Do some sanity checks."""
+    # Find flowpath column
+    fpcol = [x for x in df.columns if x.startswith("fp")][0]
+    gordcol = [x for x in df.columns if x.startswith("gord")][0]
+    fplencol = [x for x in df.columns if x.startswith("fpLen")][0]
+    gdf = df.groupby(fpcol).agg([np.min, np.max])
+    # collapse multiindex
+    gdf.columns = list(map("".join, gdf.columns.values))
+    # Check that grid order starts at 1 and goes to at least 5
+    df2 = gdf[(gdf[f"{gordcol}amin"] > 1) | (gdf[f"{gordcol}amax"] < 5)]
+    cull = []
+    if not df2.empty:
+        for fpath, row in df2.iterrows():
+            print(
+                "GORDER_CHECK FAIL %s %s min:%s max:%s, culling"
+                % (
+                    gordcol,
+                    fpath,
+                    row[f"{gordcol}amin"],
+                    row[f"{gordcol}amax"],
+                )
+            )
+            cull.append(fpath)
+    # Check that fpLen is monotonic
+    for fpath, gdf in df.groupby(fpcol):
+        res = gdf[fplencol].values[1:] - gdf[fplencol].values[:-1]
+        if not all(res > 0):
+            print(
+                "FPLEN %s for %s not monotonic, culling %s"
+                % (fplencol, fpath, min(res))
+            )
+            cull.append(fpath)
+
+    if cull:
+        print("culling %s" % (cull,))
+        df = df[~df[fpcol].isin(cull)]
+    return df
+
+
 def get_data(filename):
     """Converts a GeoJSON file into a pandas dataframe
 
@@ -73,6 +114,11 @@ def get_data(filename):
       gpd.DataFrame with the geojson data included.
     """
     df = gpd.read_file(filename, index="OBJECTID")
+    # Sort along the length column, which orders the points from top
+    # to bottom
+    fplencol = [x for x in df.columns if x.startswith("fpLen")][0]
+    df = df.sort_values(fplencol, ascending=True)
+    df = run_checks(df)
     snapdf = gpd.read_file(
         filename.replace("smpldef3m", "snaps3m"), index="OBJECTID"
     )
@@ -111,8 +157,7 @@ def get_data(filename):
 
 
 def delete_previous(cursor, huc12):
-    """This file is the authority for the HUC12, so we cull previous content.
-    """
+    """This file is the authority for the HUC12, so we cull previous content."""
     cursor.execute(
         """
         DELETE from flowpath_points p USING flowpaths f WHERE
@@ -196,9 +241,6 @@ def process_flowpath(cursor, huc12, db_fid, df, snappt):
     lencolname = "%sLen%s" % (PREFIX, huc12)
     elevcolname = "ep3m%s" % (huc12,)
     gordcolname = "gord_%s" % (huc12,)
-    # Sort along the length column, which orders the points from top
-    # to bottom
-    df = df.sort_values(lencolname, ascending=True)
     # Remove any previous data for this flowpath
     cursor.execute(
         "DELETE from flowpath_points WHERE flowpath = %s", (db_fid,)
@@ -322,7 +364,7 @@ def process(cursor, filename, huc12df, snapdf):
             delete_flowpath(cursor, db_fid)
             print(df)
             print(df[[f"gord_{huc12}", f"fpLen{huc12}"]])
-            sys.exit()
+            # sys.exit()
     return huc12
 
 
diff --git a/scripts/gridorder2/myhucs.txt b/scripts/gridorder2/myhucs.txt
index 672758ab..2758ccbe 100644
--- a/scripts/gridorder2/myhucs.txt
+++ b/scripts/gridorder2/myhucs.txt
@@ -4,3 +4,12 @@
 070802040604
 070802090402
 071000040902
+071000070104
+102300020307
+102300030203
+102400020604
+102400030406
+102400030602
+102400090102
+102801020801
+102802010203