From ec683b1d616b687edd58065d366fc3faa9a064ff Mon Sep 17 00:00:00 2001 From: shankari Date: Sun, 24 Jan 2021 16:34:53 -0800 Subject: [PATCH] Get the tour model generation code to work again (#788) * Get the tour model generation code to work again The big changes were to move the visualization imports, notably matplotlib, into the individual visualization functions. It is terrible practice to mix computation and viz, not the least because the dependencies for the computation are unnecessarily bloated by viz modules other changes: - remove invalid and unused reference to edb directly - change print -> print() to support python3 - remove erroneous close paranthesis - add pykov to the computation environment Testing done: Without the fixes: ``` (emission) kshankar-35069s:e-mission-server kshankar$ ./e-mission-py.bash emission/pipeline/model_stage.py storage not configured, falling back to sample, default configuration Connecting to database URL localhost Traceback (most recent call last): File "emission/pipeline/model_stage.py", line 13, in import emission.storage.decorations.tour_model_queries as esdtmq File "/Users/kshankar/e-mission/e-mission-server/emission/storage/decorations/tour_model_queries.py", line 12, in import emission.storage.decorations.common_place_queries as esdcpq File "/Users/kshankar/e-mission/e-mission-server/emission/storage/decorations/common_place_queries.py", line 16, in import pykov as pk ModuleNotFoundError: No module named 'pykov' ``` and multiple other errors With the fixes: ``` Cleanedplace({'source': 'DwellSegmentationTimeFilter', 'enter_ts': 1610812590, 'enter_local_dt': {'year': 2021, 'month': 1, 'day': 16, 'hour': 8, 'minute': 56, 'second': 30, 'weekday': 5, 'timezone': 'America/Denver'}, 'enter_fmt_time': '2021-01-16T08:56:30-07:00', 'location': {'type': 'Point', 'coordinates': [-104.891193, 39.7660244]}, 'raw_places': [ObjectId('60031cc37dc9bcbfa487c31e'), ObjectId('60031cc37dc9bcbfa487c31e')], 'ending_trip': ObjectId('60031cc47dc9bcbfa487c327'), 'starting_trip': ObjectId('60039b632fb2ebf61146462a'), 'exit_ts': 1610843452.733473, 'exit_fmt_time': '2021-01-16T17:30:52.733473-07:00', 'exit_local_dt': {'year': 2021, 'month': 1, 'day': 16, 'hour': 17, 'minute': 30, 'second': 52, 'weekday': 5, 'timezone': 'America/Denver'}, 'duration': 30862.733473062515})], 'end': 0, 'start': 1, 'start_coords': [-104.90605494323428, 39.764846852552445], 'end_coords': [-104.89127443959791, 39.765978419090914]} 2021-01-24 15:07:42,885:DEBUG:After creating map, number of places is 2 2021-01-24 15:07:42,885:DEBUG:Adding 55 places for this place 2021-01-24 15:07:42,889:DEBUG:Adding 55 places for this place ``` * Revert changes to the webserver conf and the notebook additions Which should really go into a different PR --- emission/analysis/modelling/tour_model/featurization.py | 2 +- emission/analysis/modelling/tour_model/similarity.py | 6 +++--- emission/pipeline/model_stage.py | 5 ++--- setup/environment36.yml | 1 + 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/emission/analysis/modelling/tour_model/featurization.py b/emission/analysis/modelling/tour_model/featurization.py index f015c077c..e20f55baf 100644 --- a/emission/analysis/modelling/tour_model/featurization.py +++ b/emission/analysis/modelling/tour_model/featurization.py @@ -11,7 +11,6 @@ from builtins import object from past.utils import old_div import logging -import matplotlib.pyplot as plt import numpy from sklearn.cluster import KMeans from sklearn import metrics @@ -142,6 +141,7 @@ def check_clusters(self): #map the clusters #TODO - move this to a file in emission.analysis.plotting to map clusters from the database def map_clusters(self): + import matplotlib.pyplot as plt from matplotlib import colors as matcol colormap = plt.cm.get_cmap() diff --git a/emission/analysis/modelling/tour_model/similarity.py b/emission/analysis/modelling/tour_model/similarity.py index aa088ed5c..044b2ef19 100644 --- a/emission/analysis/modelling/tour_model/similarity.py +++ b/emission/analysis/modelling/tour_model/similarity.py @@ -11,9 +11,6 @@ from past.utils import old_div import logging import math -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt import numpy from sklearn import metrics from numpy.linalg import norm @@ -147,6 +144,9 @@ def match(self,a,bin): #create the histogram def graph(self): + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt bars = [0] * len(self.bins) for i in range(len(self.bins)): bars[i] = len(self.bins[i]) diff --git a/emission/pipeline/model_stage.py b/emission/pipeline/model_stage.py index ade19b1ee..8bfd74b34 100644 --- a/emission/pipeline/model_stage.py +++ b/emission/pipeline/model_stage.py @@ -11,7 +11,6 @@ import emission.net.usercache.abstract_usercache as enua import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.decorations.tour_model_queries as esdtmq -from emission.core.get_database import get_db, get_mode_db, get_section_db import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.analysis.intake.segmentation.trip_segmentation as eaist @@ -59,7 +58,7 @@ def run_model_pipeline(process_number, uuid_list): try: run_model_pipeline_for_user(uuid) except Exception as e: - print "dang flabbit failed on error %s" % e + print("dang flabbit failed on error %s" % e) def run_mode_inference_pipeline_for_user(uuid): @@ -107,7 +106,7 @@ def run_mode_pipeline_for_user(MIP, uuid): timerange = get_time_range_for_mode_inference(uuid) MIP.runPipelineModelStage(uuid, timerange) - mark_mode_inference_done_for_user(uuid, MIP.getLastTimestamp}) + mark_mode_inference_done_for_user(uuid, MIP.getLastTimestamp) if __name__ == '__main__': diff --git a/setup/environment36.yml b/setup/environment36.yml index 136117c18..b65e2daf1 100644 --- a/setup/environment36.yml +++ b/setup/environment36.yml @@ -24,4 +24,5 @@ dependencies: - pyfcm==1.4.7 - pygeocoder==1.2.5 - pymongo==3.11.0 + - pykov==0.1 prefix: /Users/shankari/OSS/anaconda/envs/emission