Skip to content

Commit

Permalink
Merge pull request #203 from Riverscapes/dev
Browse files Browse the repository at this point in the history
February Release
  • Loading branch information
MattReimer authored Feb 19, 2021
2 parents 199378c + 0b15c74 commit 780b2e5
Show file tree
Hide file tree
Showing 56 changed files with 2,602 additions and 452 deletions.
2 changes: 2 additions & 0 deletions lib/commons/rscommons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from rscommons.classes.vector_classes import GeopackageLayer, GeodatabaseLayer, ShapefileLayer, get_shp_or_gpkg
from rscommons.classes.vector_base import VectorBase

from rscommons.classes.tempfiles import TempRaster, TempGeopackage

from rscommons.report.rs_report import RSReport
from rscommons.classes.rs_project import RSLayer, RSProject
# We don't make XMLBuilder convenient because people should be using RSProject
Expand Down
2 changes: 1 addition & 1 deletion lib/commons/rscommons/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.1"
__version__ = "1.1.2"
61 changes: 60 additions & 1 deletion lib/commons/rscommons/classes/rs_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
# Created: 09/25/2015
# -------------------------------------------------------------------------------
from __future__ import annotations
from typing import List, Dict
import os
import datetime
import uuid

import rasterio.shutil
from osgeo import ogr
from copy import copy

from rscommons import Logger
from rscommons.classes.xml_builder import XMLBuilder
Expand Down Expand Up @@ -356,7 +358,7 @@ def add_project_raster(self, parent_node, rs_lyr, copy_path=None, replace=False)
log.error('Could not find mandatory input "{}" raster at path "{}"'.format(rs_lyr.name, copy_path))

# Rasterio copies datasets efficiently
rasterio.shutil.copy(copy_path, file_path)
rasterio.shutil.copy(copy_path, file_path, compress='LZW', predictor=2)
log.info('Raster Copied {} to {}'.format(copy_path, file_path))

nod_dataset = self.add_dataset(parent_node, file_path, rs_lyr, 'Raster', replace)
Expand Down Expand Up @@ -425,3 +427,60 @@ def unique_type_id(parent, xml_tag, root_id):
i += 1

return '{}{}'.format(root_id, i if i > 1 else '')

@staticmethod
def prefix_keys(dict_in: Dict[str, str], prefix: str) -> Dict[str, str]:
"""Helper method. Prefix a dictionary's keys
Args:
dict_in (Dict[str, str]): [description]
prefix (str): [description]
Returns:
Dict[str, str]: [description]
"""
if dict_in is None:
return {}
return {'{}{}'.format(prefix, key): val for key, val in dict_in.items()}

def rs_meta_augment(self, in_proj_files: List[str], rs_id_map: Dict[str, str]) -> None:
"""Augment the metadata of specific layers with the input's layers
Args:
out_proj_file (str): [description]
in_proj_files (List[str]): [description]
"""
wh_prefix = '_rs_wh_'
proj_prefix = '_rs_prj_'
lyr_prefix = '_rs_lyr_'

working_id_list = copy(rs_id_map)

# Loop over input project.rs.xml files
found_keys = []
for in_prj_path in in_proj_files:
in_prj = RSProject(None, in_prj_path)

# Define our default, generic warehouse and project meta
whmeta = self.prefix_keys(in_prj.get_metadata_dict(tag='Warehouse'), wh_prefix)
projmeta = self.prefix_keys(in_prj.get_metadata_dict(), proj_prefix)

# look for any valid mappings and move metadata into them
for id_out, id_in in working_id_list.items():

lyrnod_in = in_prj.XMLBuilder.find('Realizations').find('.//*[@id="{}"]'.format(id_in))
lyrmeta = self.prefix_keys(in_prj.get_metadata_dict(lyrnod_in), lyr_prefix)
lyrnod_out = self.XMLBuilder.find('Realizations').find('.//*[@id="{}"]'.format(id_out))

if id_out not in found_keys and lyrnod_in is not None and lyrnod_out is not None:
print('Found mapping for {}=>{}. Moving metadata'.format(id_in, id_out))
found_keys.append(id_out)
self.add_metadata({
**whmeta,
**projmeta,
**lyrmeta,
"{}projType".format(proj_prefix): in_prj.XMLBuilder.find('ProjectType').text,
"{}id".format(lyr_prefix): lyrnod_in.attrib['id'],
"{}guid".format(lyr_prefix): lyrnod_in.attrib['guid'],
"{}path".format(lyr_prefix): lyrnod_in.find('Path').text,
}, lyrnod_out)
54 changes: 54 additions & 0 deletions lib/commons/rscommons/classes/tempfiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from __future__ import annotations
import os
from tempfile import mkstemp
from rscommons import Logger


class TempGISFileException(Exception):
"""Special exceptions
Args:
Exception ([type]): [description]
"""
pass


class TempGISFile():
"""This is just a loose mapping class to allow us to use Python's 'with' keyword.
Raises:
VectorBaseException: Various
"""
log = Logger('TempGISFile')

def __init__(self, suffix: str, prefix: str = None):
self.suffix = suffix
self.prefix = 'rstools_{}'.format(prefix)
self.filepath = None
self.file = None

def __enter__(self) -> TempGISFile:
"""Behaviour on open when using the "with VectorBase():" Syntax
"""
self.file, self.filepath = mkstemp(suffix=self.suffix, text=True)
# Immediately close it. This is so windows doesn't hold onto the handle
os.close(self.file)
return self

def __exit__(self, _type, _value, _traceback):
"""Behaviour on close when using the "with VectorBase():" Syntax
"""
try:
os.remove(self.filepath)
except Exception as e:
self.log.warning('Error cleaning up file: {}'.format(self.filepath))


class TempRaster(TempGISFile):
def __init__(self, prefix: str):
super(TempRaster, self).__init__(suffix='.tiff', prefix=prefix)


class TempGeopackage(TempGISFile):
def __init__(self, prefix: str):
super(TempGeopackage, self).__init__(suffix='.gpkg', prefix=prefix)
10 changes: 8 additions & 2 deletions lib/commons/rscommons/classes/vector_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations
from osgeo import ogr
from rscommons.classes.logger import Logger

from rscommons.util import safe_remove_file

class DatasetRegistryException(Exception):
"""Special exceptions
Expand Down Expand Up @@ -151,4 +151,10 @@ def delete_dataset(self, filepath: str, driver: ogr.Driver):
del self._registry[filepath]

# Delete the Dataset
driver.DeleteDataSource(filepath)
err = driver.DeleteDataSource(filepath)

# If this is a tempfile there's a possibility of failure.
# In that case just remove the file normally (or try anyway)
if err == ogr.OGRERR_FAILURE:
safe_remove_file(filepath)

166 changes: 166 additions & 0 deletions lib/commons/rscommons/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from __future__ import annotations
import math
import csv
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from time import sleep
import psutil
import matplotlib.pyplot as plt
from rscommons import Logger

# https://medium.com/survata-engineering-blog/monitoring-memory-usage-of-a-running-python-program-49f027e3d1ba


class ProcStats():
headers = ['datetime', 'cpu_percent', 'resident_memory', 'virtual_memory', 'children', 'children_resident', 'children_virtual']

def __init__(self, cpu_percent, rss_raw: int, vms_raw: int, children: int, children_rss_raw: int, children_vms_raw: int):
self.datetime = datetime.now()
self.cpu_percent = cpu_percent
self.children = children
self.rss = rss_raw / float(2 ** 20)
self.vms = vms_raw / float(2 ** 20)
self.children_rss = children_rss_raw / float(2 ** 20)
self.children_vms = children_vms_raw / float(2 ** 20)

def row(self):
return [
self.datetime.strftime('%Y-%m-%d %H:%M:%S'),
math.ceil(self.cpu_percent),
"{:.2f}".format(self.rss),
"{:.2f}".format(self.vms),
"{}".format(self.children),
"{:.2f}".format(self.rss),
"{:.2f}".format(self.vms)
]

def toString(self):
return "datetime: {}, cpu_percent: {}, mem_resident: {}Mb, mem_virtual: {}Mb, num_children: {}, mem_children_resident: {}Mb, mem_children_virtual: {}Mb".format(
self.datetime.strftime('%Y-%m-%d %H:%M:%S'),
math.ceil(self.cpu_percent),
"{:.2f}".format(self.rss),
"{:.2f}".format(self.vms),
"{}".format(self.children),
"{:.2f}".format(self.rss),
"{:.2f}".format(self.vms)
)

def max(self, tick: ProcStats):
self.cpu_percent = max(self.cpu_percent, tick.cpu_percent)
self.children = max(self.children, tick.children)
self.rss = max(self.rss, tick.rss)
self.vms = max(self.vms, tick.vms)
self.children_rss = max(self.children_rss, tick.children_rss)
self.children_vms = max(self.children_vms, tick.children_vms)


class MemoryMonitor:
def __init__(self, logfile: str, loop_delay=1):
self.keep_measuring = True
self.filepath = logfile
self.loop_delay = loop_delay
self.process = psutil.Process(os.getpid())
self.headers_written = False
self.max_stats = ProcStats(0, 0, 0, 0, 0, 0)

def write_line(self, arr, mode='a'):
with open(self.filepath, mode, newline='', encoding='utf-8') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(arr)

def getstats(self) -> ProcStats:
cpu_percent = self.process.cpu_percent()
mem_info = self.process.memory_info()
children = 0
children_rss = 0
children_vms = 0

for child in self.process.children():
child_mem = child.memory_info()
children_rss += child_mem.rss
children_vms += child_mem.vms
children += 1

stats = ProcStats(cpu_percent, mem_info.rss, mem_info.vms, children, children_rss, children_vms)
self.max_stats.max(stats)
return stats

def measure_usage(self):
self.write_line(ProcStats.headers, 'w')
while self.keep_measuring:
self.write_line(self.getstats().row())
sleep(self.loop_delay)
return self.max_stats

def write_plot(self, imgpath: str):
x = []
data = {}
with open(self.filepath) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
for key in row.keys():
if key == 'datetime':
x.append(row[key])
else:
if key in ['children', 'cpu_percent']:
val = int(row[key])
else:
val = float(row[key])
if key not in data:
data[key] = [val]
else:
data[key].append(val)

chart_title = 'Process stats'
xlabel = 'time'
ylabel = 'Mb'

plt.clf()

fig, ax = plt.subplots()
ax.title.set_text(chart_title)
ax2 = ax.twinx()
# ['datetime', 'cpu_percent', 'resident_memory', 'virtual_memory', 'children', 'children_resident', 'children_virtual']
for key in ['children_resident', 'children_virtual', 'resident_memory', 'virtual_memory']:
if key in data:
ax.plot(x, data[key], label=key)
ax2._get_lines.get_next_color()

ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)

for key in ['cpu_percent', 'children']:
if key in data:
ax2.plot(x, data[key], label=key)

ax.legend(loc='lower left')
ax2.legend(loc='lower right')

freq = math.floor(len(x) / 10)
if freq == 0:
freq = 1
ax.set_xticks(x[::freq])
ax.set_xticklabels(x[::freq], rotation=45)
ax.grid(True)

# plt.tight_layout()
fig.set_size_inches(8, 6)
fig.savefig(imgpath, format='png', dpi=300)


def ThreadRun(callback, memlogfile: str, *args, **kwargs):
log = Logger('Debug')
with ThreadPoolExecutor() as executor:
memmon = MemoryMonitor(memlogfile, 1)
mem_thread = executor.submit(memmon.measure_usage)
try:
fn_thread = executor.submit(callback, *args, **kwargs)
result = fn_thread.result()
finally:
memmon.keep_measuring = False
max_obj = mem_thread.result()
log.debug('MaxStats: {}'.format(max_obj))
memmon.write_plot(os.path.splitext(memlogfile)[0] + '.png')

return result, max_obj.toString()
38 changes: 25 additions & 13 deletions lib/commons/rscommons/dotenv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
from typing import List, Dict
import re
import os
import argparse
Expand Down Expand Up @@ -50,18 +51,29 @@ def parse_args_env(parser: argparse.ArgumentParser, env_path=None):

# Try and make substitutions for all the {env:ENVNAME} parameters we find
for k, v in vars(args).items():
if type(v) is str:
m = re.match(pattern, v)
if m:
envname = m.group(1)
# There is a precedence here:
if envname in env:
sub = env[envname]
elif envname in os.environ:
sub = os.environ[envname]
else:
raise Exception('COULD NOT FIND ENVIRONMENT VARIABLE: {}'.format(envname))
# Finally, make the substitution
setattr(args, k, str(Path(re.sub(pattern, sub.replace("\\", "/"), v))))
new_val = replace_env_varts(pattern, v, os.environ)
setattr(args, k, new_val)

return args


def replace_env_varts(pattern: str, value_str: str, env: Dict[str, str]):
if type(value_str) is str:
new_str = value_str

def replace(m):
envname = m.group(1)
if envname in env:
sub = env[envname]
elif envname in os.environ:
sub = os.environ[envname]
else:
raise Exception('COULD NOT FIND ENVIRONMENT VARIABLE: {}'.format(envname))
# Finally, make the substitution
return sub.replace("\\", "/")

new_str = str(Path(re.sub(pattern, replace, new_str)))

return new_str
else:
return value_str
Loading

0 comments on commit 780b2e5

Please sign in to comment.