Merge pull request #203 from Riverscapes/dev

February Release
Riverscapes · Feb 19, 2021 · 780b2e5 · 780b2e5
2 parents 199378c + 0b15c74
commit 780b2e5
Show file tree

Hide file tree

Showing 56 changed files with 2,602 additions and 452 deletions.
diff --git a/lib/commons/rscommons/__init__.py b/lib/commons/rscommons/__init__.py
@@ -13,6 +13,8 @@
 from rscommons.classes.vector_classes import GeopackageLayer, GeodatabaseLayer, ShapefileLayer, get_shp_or_gpkg
 from rscommons.classes.vector_base import VectorBase
 
+from rscommons.classes.tempfiles import TempRaster, TempGeopackage
+
 from rscommons.report.rs_report import RSReport
 from rscommons.classes.rs_project import RSLayer, RSProject
 # We don't make XMLBuilder convenient because people should be using RSProject

diff --git a/lib/commons/rscommons/__version__.py b/lib/commons/rscommons/__version__.py
@@ -1 +1 @@
-__version__ = "1.1.1"
+__version__ = "1.1.2"
diff --git a/lib/commons/rscommons/classes/rs_project.py b/lib/commons/rscommons/classes/rs_project.py
@@ -8,12 +8,14 @@
 # Created:     09/25/2015
 # -------------------------------------------------------------------------------
 from __future__ import annotations
+from typing import List, Dict
 import os
 import datetime
 import uuid
 
 import rasterio.shutil
 from osgeo import ogr
+from copy import copy
 
 from rscommons import Logger
 from rscommons.classes.xml_builder import XMLBuilder
@@ -356,7 +358,7 @@ def add_project_raster(self, parent_node, rs_lyr, copy_path=None, replace=False)
                 log.error('Could not find mandatory input "{}" raster at path "{}"'.format(rs_lyr.name, copy_path))
 
             # Rasterio copies datasets efficiently
-            rasterio.shutil.copy(copy_path, file_path)
+            rasterio.shutil.copy(copy_path, file_path, compress='LZW', predictor=2)
             log.info('Raster Copied {} to {}'.format(copy_path, file_path))
 
         nod_dataset = self.add_dataset(parent_node, file_path, rs_lyr, 'Raster', replace)
@@ -425,3 +427,60 @@ def unique_type_id(parent, xml_tag, root_id):
                 i += 1
 
         return '{}{}'.format(root_id, i if i > 1 else '')
+
+    @staticmethod
+    def prefix_keys(dict_in: Dict[str, str], prefix: str) -> Dict[str, str]:
+        """Helper method. Prefix a dictionary's keys
+
+        Args:
+            dict_in (Dict[str, str]): [description]
+            prefix (str): [description]
+
+        Returns:
+            Dict[str, str]: [description]
+        """
+        if dict_in is None:
+            return {}
+        return {'{}{}'.format(prefix, key): val for key, val in dict_in.items()}
+
+    def rs_meta_augment(self, in_proj_files: List[str], rs_id_map: Dict[str, str]) -> None:
+        """Augment the metadata of specific layers with the input's layers
+
+        Args:
+            out_proj_file (str): [description]
+            in_proj_files (List[str]): [description]
+        """
+        wh_prefix = '_rs_wh_'
+        proj_prefix = '_rs_prj_'
+        lyr_prefix = '_rs_lyr_'
+
+        working_id_list = copy(rs_id_map)
+
+        # Loop over input project.rs.xml files
+        found_keys = []
+        for in_prj_path in in_proj_files:
+            in_prj = RSProject(None, in_prj_path)
+
+            # Define our default, generic warehouse and project meta
+            whmeta = self.prefix_keys(in_prj.get_metadata_dict(tag='Warehouse'), wh_prefix)
+            projmeta = self.prefix_keys(in_prj.get_metadata_dict(), proj_prefix)
+
+            # look for any valid mappings and move metadata into them
+            for id_out, id_in in working_id_list.items():
+
+                lyrnod_in = in_prj.XMLBuilder.find('Realizations').find('.//*[@id="{}"]'.format(id_in))
+                lyrmeta = self.prefix_keys(in_prj.get_metadata_dict(lyrnod_in), lyr_prefix)
+                lyrnod_out = self.XMLBuilder.find('Realizations').find('.//*[@id="{}"]'.format(id_out))
+
+                if id_out not in found_keys and lyrnod_in is not None and lyrnod_out is not None:
+                    print('Found mapping for {}=>{}. Moving metadata'.format(id_in, id_out))
+                    found_keys.append(id_out)
+                    self.add_metadata({
+                        **whmeta,
+                        **projmeta,
+                        **lyrmeta,
+                        "{}projType".format(proj_prefix): in_prj.XMLBuilder.find('ProjectType').text,
+                        "{}id".format(lyr_prefix): lyrnod_in.attrib['id'],
+                        "{}guid".format(lyr_prefix): lyrnod_in.attrib['guid'],
+                        "{}path".format(lyr_prefix): lyrnod_in.find('Path').text,
+                    }, lyrnod_out)
diff --git a/lib/commons/rscommons/classes/tempfiles.py b/lib/commons/rscommons/classes/tempfiles.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+import os
+from tempfile import mkstemp
+from rscommons import Logger
+
+
+class TempGISFileException(Exception):
+    """Special exceptions
+
+    Args:
+        Exception ([type]): [description]
+    """
+    pass
+
+
+class TempGISFile():
+    """This is just a loose mapping class to allow us to use Python's 'with' keyword.
+
+    Raises:
+        VectorBaseException: Various
+    """
+    log = Logger('TempGISFile')
+
+    def __init__(self, suffix: str, prefix: str = None):
+        self.suffix = suffix
+        self.prefix = 'rstools_{}'.format(prefix)
+        self.filepath = None
+        self.file = None
+
+    def __enter__(self) -> TempGISFile:
+        """Behaviour on open when using the "with VectorBase():" Syntax
+        """
+        self.file, self.filepath = mkstemp(suffix=self.suffix, text=True)
+        # Immediately close it. This is so windows doesn't hold onto the handle
+        os.close(self.file)
+        return self
+
+    def __exit__(self, _type, _value, _traceback):
+        """Behaviour on close when using the "with VectorBase():" Syntax
+        """
+        try:
+            os.remove(self.filepath)
+        except Exception as e:
+            self.log.warning('Error cleaning up file: {}'.format(self.filepath))
+
+
+class TempRaster(TempGISFile):
+    def __init__(self, prefix: str):
+        super(TempRaster, self).__init__(suffix='.tiff', prefix=prefix)
+
+
+class TempGeopackage(TempGISFile):
+    def __init__(self, prefix: str):
+        super(TempGeopackage, self).__init__(suffix='.gpkg', prefix=prefix)
diff --git a/lib/commons/rscommons/classes/vector_datasource.py b/lib/commons/rscommons/classes/vector_datasource.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 from osgeo import ogr
 from rscommons.classes.logger import Logger
-
+from rscommons.util import safe_remove_file
 
 class DatasetRegistryException(Exception):
     """Special exceptions
@@ -151,4 +151,10 @@ def delete_dataset(self, filepath: str, driver: ogr.Driver):
                 del self._registry[filepath]
 
         # Delete the Dataset
-        driver.DeleteDataSource(filepath)
+        err = driver.DeleteDataSource(filepath)
+
+        # If this is a tempfile there's a possibility of failure. 
+        # In that case just remove the file normally (or try anyway)
+        if err == ogr.OGRERR_FAILURE:
+            safe_remove_file(filepath)
+
diff --git a/lib/commons/rscommons/debug.py b/lib/commons/rscommons/debug.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+import math
+import csv
+import os
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor
+from time import sleep
+import psutil
+import matplotlib.pyplot as plt
+from rscommons import Logger
+
+# https://medium.com/survata-engineering-blog/monitoring-memory-usage-of-a-running-python-program-49f027e3d1ba
+
+
+class ProcStats():
+    headers = ['datetime', 'cpu_percent', 'resident_memory', 'virtual_memory', 'children', 'children_resident', 'children_virtual']
+
+    def __init__(self, cpu_percent, rss_raw: int, vms_raw: int, children: int, children_rss_raw: int, children_vms_raw: int):
+        self.datetime = datetime.now()
+        self.cpu_percent = cpu_percent
+        self.children = children
+        self.rss = rss_raw / float(2 ** 20)
+        self.vms = vms_raw / float(2 ** 20)
+        self.children_rss = children_rss_raw / float(2 ** 20)
+        self.children_vms = children_vms_raw / float(2 ** 20)
+
+    def row(self):
+        return [
+            self.datetime.strftime('%Y-%m-%d %H:%M:%S'),
+            math.ceil(self.cpu_percent),
+            "{:.2f}".format(self.rss),
+            "{:.2f}".format(self.vms),
+            "{}".format(self.children),
+            "{:.2f}".format(self.rss),
+            "{:.2f}".format(self.vms)
+        ]
+
+    def toString(self):
+        return "datetime: {}, cpu_percent: {}, mem_resident: {}Mb, mem_virtual: {}Mb, num_children: {}, mem_children_resident: {}Mb, mem_children_virtual: {}Mb".format(
+            self.datetime.strftime('%Y-%m-%d %H:%M:%S'),
+            math.ceil(self.cpu_percent),
+            "{:.2f}".format(self.rss),
+            "{:.2f}".format(self.vms),
+            "{}".format(self.children),
+            "{:.2f}".format(self.rss),
+            "{:.2f}".format(self.vms)
+        )
+
+    def max(self, tick: ProcStats):
+        self.cpu_percent = max(self.cpu_percent, tick.cpu_percent)
+        self.children = max(self.children, tick.children)
+        self.rss = max(self.rss, tick.rss)
+        self.vms = max(self.vms, tick.vms)
+        self.children_rss = max(self.children_rss, tick.children_rss)
+        self.children_vms = max(self.children_vms, tick.children_vms)
+
+
+class MemoryMonitor:
+    def __init__(self, logfile: str, loop_delay=1):
+        self.keep_measuring = True
+        self.filepath = logfile
+        self.loop_delay = loop_delay
+        self.process = psutil.Process(os.getpid())
+        self.headers_written = False
+        self.max_stats = ProcStats(0, 0, 0, 0, 0, 0)
+
+    def write_line(self, arr, mode='a'):
+        with open(self.filepath, mode, newline='', encoding='utf-8') as csvfile:
+            csvwriter = csv.writer(csvfile)
+            csvwriter.writerow(arr)
+
+    def getstats(self) -> ProcStats:
+        cpu_percent = self.process.cpu_percent()
+        mem_info = self.process.memory_info()
+        children = 0
+        children_rss = 0
+        children_vms = 0
+
+        for child in self.process.children():
+            child_mem = child.memory_info()
+            children_rss += child_mem.rss
+            children_vms += child_mem.vms
+            children += 1
+
+        stats = ProcStats(cpu_percent, mem_info.rss, mem_info.vms, children, children_rss, children_vms)
+        self.max_stats.max(stats)
+        return stats
+
+    def measure_usage(self):
+        self.write_line(ProcStats.headers, 'w')
+        while self.keep_measuring:
+            self.write_line(self.getstats().row())
+            sleep(self.loop_delay)
+        return self.max_stats
+
+    def write_plot(self, imgpath: str):
+        x = []
+        data = {}
+        with open(self.filepath) as csvfile:
+            reader = csv.DictReader(csvfile)
+            for row in reader:
+                for key in row.keys():
+                    if key == 'datetime':
+                        x.append(row[key])
+                    else:
+                        if key in ['children', 'cpu_percent']:
+                            val = int(row[key])
+                        else:
+                            val = float(row[key])
+                        if key not in data:
+                            data[key] = [val]
+                        else:
+                            data[key].append(val)
+
+        chart_title = 'Process stats'
+        xlabel = 'time'
+        ylabel = 'Mb'
+
+        plt.clf()
+
+        fig, ax = plt.subplots()
+        ax.title.set_text(chart_title)
+        ax2 = ax.twinx()
+        #  ['datetime', 'cpu_percent', 'resident_memory', 'virtual_memory', 'children', 'children_resident', 'children_virtual']
+        for key in ['children_resident', 'children_virtual', 'resident_memory', 'virtual_memory']:
+            if key in data:
+                ax.plot(x, data[key], label=key)
+            ax2._get_lines.get_next_color()
+
+        ax.set_xlabel(xlabel)
+        ax.set_ylabel(ylabel)
+
+        for key in ['cpu_percent', 'children']:
+            if key in data:
+                ax2.plot(x, data[key], label=key)
+
+        ax.legend(loc='lower left')
+        ax2.legend(loc='lower right')
+
+        freq = math.floor(len(x) / 10)
+        if freq == 0:
+            freq = 1
+        ax.set_xticks(x[::freq])
+        ax.set_xticklabels(x[::freq], rotation=45)
+        ax.grid(True)
+
+        # plt.tight_layout()
+        fig.set_size_inches(8, 6)
+        fig.savefig(imgpath, format='png', dpi=300)
+
+
+def ThreadRun(callback, memlogfile: str, *args, **kwargs):
+    log = Logger('Debug')
+    with ThreadPoolExecutor() as executor:
+        memmon = MemoryMonitor(memlogfile, 1)
+        mem_thread = executor.submit(memmon.measure_usage)
+        try:
+            fn_thread = executor.submit(callback, *args, **kwargs)
+            result = fn_thread.result()
+        finally:
+            memmon.keep_measuring = False
+            max_obj = mem_thread.result()
+            log.debug('MaxStats: {}'.format(max_obj))
+            memmon.write_plot(os.path.splitext(memlogfile)[0] + '.png')
+
+        return result, max_obj.toString()
diff --git a/lib/commons/rscommons/dotenv.py b/lib/commons/rscommons/dotenv.py
@@ -1,4 +1,5 @@
 import codecs
+from typing import List, Dict
 import re
 import os
 import argparse
@@ -50,18 +51,29 @@ def parse_args_env(parser: argparse.ArgumentParser, env_path=None):
 
     # Try and make substitutions for all the {env:ENVNAME} parameters we find
     for k, v in vars(args).items():
-        if type(v) is str:
-            m = re.match(pattern, v)
-            if m:
-                envname = m.group(1)
-                # There is a precedence here:
-                if envname in env:
-                    sub = env[envname]
-                elif envname in os.environ:
-                    sub = os.environ[envname]
-                else:
-                    raise Exception('COULD NOT FIND ENVIRONMENT VARIABLE: {}'.format(envname))
-                # Finally, make the substitution
-                setattr(args, k, str(Path(re.sub(pattern, sub.replace("\\", "/"), v))))
+        new_val = replace_env_varts(pattern, v, os.environ)
+        setattr(args, k, new_val)
 
     return args
+
+
+def replace_env_varts(pattern: str, value_str: str, env: Dict[str, str]):
+    if type(value_str) is str:
+        new_str = value_str
+
+        def replace(m):
+            envname = m.group(1)
+            if envname in env:
+                sub = env[envname]
+            elif envname in os.environ:
+                sub = os.environ[envname]
+            else:
+                raise Exception('COULD NOT FIND ENVIRONMENT VARIABLE: {}'.format(envname))
+            # Finally, make the substitution
+            return sub.replace("\\", "/")
+
+        new_str = str(Path(re.sub(pattern, replace, new_str)))
+
+        return new_str
+    else:
+        return value_str