From e8fdb2a2952391e337dfae4e39fd59e846e12fdf Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 16 Jan 2018 06:50:44 -0800 Subject: [PATCH 1/2] Match ground truth to existing trips Basically, the match is the most recent entry whose start_ts falls within the trip, as justified here. https://github.com/e-mission/e-mission-server/issues/532#issuecomment-357861952 Includes unit tests for the no match, single match and multiple match cases Bonus change: convert logging -> print to ensure that later debug logging configuration sticks. If the first log statement is on import, before the logging is configured, the default level will be WARNING. --- .../net/ext_service/geocoder/nominatim.py | 4 +- emission/storage/decorations/trip_queries.py | 21 +++++ .../storage/timeseries/abstract_timeseries.py | 3 + .../tests/storageTests/TestTripQueries.py | 81 ++++++++++++++++++- 4 files changed, 106 insertions(+), 3 deletions(-) diff --git a/emission/net/ext_service/geocoder/nominatim.py b/emission/net/ext_service/geocoder/nominatim.py index cba340a4a..901f321c5 100644 --- a/emission/net/ext_service/geocoder/nominatim.py +++ b/emission/net/ext_service/geocoder/nominatim.py @@ -17,13 +17,13 @@ googlemaps_key_file = open("conf/net/ext_service/googlemaps.json") GOOGLE_MAPS_KEY = json.load(googlemaps_key_file)["api_key"] except: - logging.warning("google maps key not configured, falling back to nominatim") + print("google maps key not configured, falling back to nominatim") try: nominatim_file = open("conf/net/ext_service/nominatim.json") NOMINATIM_QUERY_URL = json.load(nominatim_file)["query_url"] except: - logging.warning("nominatim not configured either, place decoding must happen on the client") + print("nominatim not configured either, place decoding must happen on the client") class Geocoder(object): diff --git a/emission/storage/decorations/trip_queries.py b/emission/storage/decorations/trip_queries.py index 0824f43ec..9a450b242 100644 --- a/emission/storage/decorations/trip_queries.py +++ b/emission/storage/decorations/trip_queries.py @@ -69,3 +69,24 @@ def get_stops_for_trip(key, user_id, trip_id): "data.enter_ts", pymongo.ASCENDING) return [ecwe.Entry(doc) for doc in stop_doc_cursor] +def get_user_input_for_trip(trip_key, user_id, trip_id, user_input_key): + ts = esta.TimeSeries.get_time_series(user_id) + trip_obj = ts.get_entry_from_id(trip_key, trip_id) + return get_user_input_for_trip_object(ts, trip_obj, user_input_key) + +def get_user_input_for_trip_object(ts, trip_obj, user_input_key): + tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts) + # In general, all candiates will have the same start_ts, so no point in + # sorting by it. Only exception to general rule is when user first provides + # input before the pipeline is run, and then overwrites after pipeline is + # run + potential_candidates = ts.get_data_df(user_input_key, tq) + if len(potential_candidates) == 0: + return None + + sorted_pc = potential_candidates.sort_values(by="metadata_write_ts") + most_recent_entry_id = potential_candidates._id.iloc[-1] + logging.debug("most recent entry has id %s" % most_recent_entry_id) + ret_val = ts.get_entry_from_id(user_input_key, most_recent_entry_id) + logging.debug("and is mapped to entry %s" % ret_val) + return ret_val diff --git a/emission/storage/timeseries/abstract_timeseries.py b/emission/storage/timeseries/abstract_timeseries.py index ee9306671..32be5fb18 100644 --- a/emission/storage/timeseries/abstract_timeseries.py +++ b/emission/storage/timeseries/abstract_timeseries.py @@ -44,6 +44,9 @@ def find_entries(self, key_list=None, time_query=None, geo_query=None, def get_entry_at_ts(self, key, ts_key, ts): pass + def get_entry_from_id(self, key, entry_id): + pass + def get_data_df(self, key, time_query = None, geo_query=None, extra_query_list=None): """ Returns a dataframe of the specified entries. A single key is required, diff --git a/emission/tests/storageTests/TestTripQueries.py b/emission/tests/storageTests/TestTripQueries.py index b3d8befaf..5ab81a6a3 100644 --- a/emission/tests/storageTests/TestTripQueries.py +++ b/emission/tests/storageTests/TestTripQueries.py @@ -20,6 +20,7 @@ import emission.storage.timeseries.abstract_timeseries as esta import emission.core.get_database as edb +import emission.core.wrapper.userlabel as ecul import emission.core.wrapper.rawtrip as ecwrt import emission.core.wrapper.section as ecwc import emission.core.wrapper.stop as ecws @@ -29,7 +30,10 @@ class TestTripQueries(unittest.TestCase): def setUp(self): self.testUserId = uuid.uuid3(uuid.NAMESPACE_URL, "mailto:test@test.me") - edb.get_analysis_timeseries_db().remove({'user_id': self.testUserId}) + edb.get_analysis_timeseries_db().delete_many({'user_id': self.testUserId}) + + def tearDown(self): + edb.get_analysis_timeseries_db().delete_many({'user_id': self.testUserId}) def create_fake_trip(self): return etsa.createNewTripLike(self, esda.RAW_TRIP_KEY, ecwrt.Rawtrip) @@ -63,6 +67,81 @@ def testQueryStopsForTrip(self): ret_entries = esdt.get_raw_stops_for_trip(self.testUserId, new_trip.get_id()) self.assertEqual([entry.data for entry in ret_entries], [new_stop]) + def testUserInputForTripNoInputs(self): + """ + Test the case in which the user has not provided any inputs + """ + new_trip = self.create_fake_trip() + user_input = esdt.get_user_input_for_trip(esda.RAW_TRIP_KEY, self.testUserId, new_trip.get_id(), "manual/mode_confirm") + self.assertIsNone(user_input) + + def testUserInputForTripOneInput(self): + """ + Test the case in which the user has not provided any inputs + """ + MODE_CONFIRM_KEY = "manual/mode_confirm" + + new_trip = self.create_fake_trip() + new_mc = ecul.Userlabel() + new_mc["start_ts"] = new_trip.data.start_ts + 1 + new_mc["end_ts"] = new_trip.data.end_ts + 1 + ts = esta.TimeSeries.get_time_series(self.testUserId) + ts.insert_data(self.testUserId, MODE_CONFIRM_KEY, new_mc) + + user_input = esdt.get_user_input_for_trip(esda.RAW_TRIP_KEY, self.testUserId, + new_trip.get_id(), MODE_CONFIRM_KEY) + + self.assertEqual(new_mc, user_input.data) + + def testUserInputForTripOneInput(self): + """ + Test the case in which the user has provided exactly one input + """ + MODE_CONFIRM_KEY = "manual/mode_confirm" + + new_trip = self.create_fake_trip() + new_mc = ecul.Userlabel() + new_mc["start_ts"] = new_trip.data.start_ts + 1 + new_mc["end_ts"] = new_trip.data.end_ts + 1 + ts = esta.TimeSeries.get_time_series(self.testUserId) + ts.insert_data(self.testUserId, MODE_CONFIRM_KEY, new_mc) + + user_input = esdt.get_user_input_for_trip(esda.RAW_TRIP_KEY, self.testUserId, + new_trip.get_id(), MODE_CONFIRM_KEY) + + self.assertEqual(new_mc, user_input.data) + + def testUserInputForTripTwoInput(self): + """ + Test the case in which the user has provided two inputs + """ + MODE_CONFIRM_KEY = "manual/mode_confirm" + + ts = esta.TimeSeries.get_time_series(self.testUserId) + + new_trip = self.create_fake_trip() + new_mc = ecul.Userlabel() + new_mc["start_ts"] = new_trip.data.start_ts + 1 + new_mc["end_ts"] = new_trip.data.end_ts + 1 + new_mc["label"] = "car" + ts.insert_data(self.testUserId, MODE_CONFIRM_KEY, new_mc) + user_input = esdt.get_user_input_for_trip(esda.RAW_TRIP_KEY, self.testUserId, + new_trip.get_id(), MODE_CONFIRM_KEY) + + # WHen there is only one input, it is a car + self.assertEqual(new_mc, user_input.data) + self.assertEqual(user_input.data.label, "car") + + new_mc["label"] = "bike" + ts.insert_data(self.testUserId, MODE_CONFIRM_KEY, new_mc) + + user_input = esdt.get_user_input_for_trip(esda.RAW_TRIP_KEY, self.testUserId, + new_trip.get_id(), MODE_CONFIRM_KEY) + + # When it is overridden, it is a bike + self.assertEqual(new_mc, user_input.data) + self.assertEqual(user_input.data.label, "bike") + if __name__ == '__main__': import emission.tests.common as etc etc.configLogging() From 61019a27e2fcf9c3fb72d4afeb1e45db8bce302b Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 16 Jan 2018 12:17:20 -0800 Subject: [PATCH 2/2] Add a real-life test as well just to make sure that everything works fine --- ...gle_positional_indexer.dec-12.mode_confirm | 277 +++++++++++++ ..._positional_indexer.dec-12.purpose_confirm | 387 ++++++++++++++++++ .../tests/storageTests/TestTripQueries.py | 47 +++ 3 files changed, 711 insertions(+) create mode 100644 emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm create mode 100644 emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm diff --git a/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm b/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm new file mode 100644 index 000000000..d72d9a334 --- /dev/null +++ b/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm @@ -0,0 +1,277 @@ +[ + { + "_id": { + "$oid": "5a5d7005c442d44402e67a21" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072708.88161, + "platform": "ios", + "read_ts": 0, + "key": "manual/mode_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 28, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:28.881610-08:00" + }, + "data": { + "end_ts": 1481563931.135, + "label": "bike", + "start_ts": 1481563177.681, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 19, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:19:37.681000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 32, + "second": 11, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:32:11.135000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a25" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072722.571955, + "platform": "ios", + "read_ts": 0, + "key": "manual/mode_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 42, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:42.571955-08:00" + }, + "data": { + "end_ts": 1481565307.256, + "label": "bike", + "start_ts": 1481564056.911, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 34, + "second": 16, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:34:16.911000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 55, + "second": 7, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:55:07.256000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a29" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072729.998598, + "platform": "ios", + "read_ts": 0, + "key": "manual/mode_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 49, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:49.998598-08:00" + }, + "data": { + "end_ts": 1481593788.738, + "label": "bike", + "start_ts": 1481592524.076, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 28, + "second": 44, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T17:28:44.076000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 49, + "second": 48, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T17:49:48.738000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a31" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072761.494732, + "platform": "ios", + "read_ts": 0, + "key": "manual/mode_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 21, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:21.494732-08:00" + }, + "data": { + "end_ts": 1481597197.113, + "label": "walk", + "start_ts": 1481594091.492, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 54, + "second": 51, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T17:54:51.492000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 18, + "minute": 46, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T18:46:37.113000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a35" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072773.816434, + "platform": "ios", + "read_ts": 0, + "key": "manual/mode_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 33, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:33.816434-08:00" + }, + "data": { + "end_ts": 1481601004.01, + "label": "bike", + "start_ts": 1481598124.35, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 19, + "minute": 2, + "second": 4, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T19:02:04.350000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 19, + "minute": 50, + "second": 4, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T19:50:04.010000-08:00" + } + } +] \ No newline at end of file diff --git a/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm b/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm new file mode 100644 index 000000000..a3e2c056e --- /dev/null +++ b/emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm @@ -0,0 +1,387 @@ +[ + { + "_id": { + "$oid": "5a5d7005c442d44402e67a23" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072714.47254, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 34, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:34.472540-08:00" + }, + "data": { + "end_ts": 1481563931.135, + "label": "school", + "start_ts": 1481563177.681, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 19, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:19:37.681000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 32, + "second": 11, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:32:11.135000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a27" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072725.328583, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 45, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:45.328583-08:00" + }, + "data": { + "end_ts": 1481565307.256, + "label": "school", + "start_ts": 1481564056.911, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 34, + "second": 16, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:34:16.911000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 55, + "second": 7, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:55:07.256000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a2b" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072736.493696, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 18, + "second": 56, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:18:56.493696-08:00" + }, + "data": { + "end_ts": 1481593788.738, + "label": "pick_drop", + "start_ts": 1481592524.076, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 28, + "second": 44, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T17:28:44.076000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 49, + "second": 48, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T17:49:48.738000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a2d" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072741.888691, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 1, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:01.888691-08:00" + }, + "data": { + "end_ts": 1481565307.256, + "label": "pick_drop", + "start_ts": 1481564056.911, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 34, + "second": 16, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:34:16.911000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 55, + "second": 7, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:55:07.256000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a2f" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072746.459487, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 6, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:06.459487-08:00" + }, + "data": { + "end_ts": 1481563931.135, + "label": "pick_drop", + "start_ts": 1481563177.681, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 19, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T09:19:37.681000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 9, + "minute": 32, + "second": 11, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T09:32:11.135000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a33" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072769.4334102, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 29, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:29.433410-08:00" + }, + "data": { + "end_ts": 1481597197.113, + "label": "Not a trip", + "start_ts": 1481594091.492, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 17, + "minute": 54, + "second": 51, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T17:54:51.492000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 18, + "minute": 46, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T18:46:37.113000-08:00" + } + }, + { + "_id": { + "$oid": "5a5d7005c442d44402e67a37" + }, + "user_id": { + "$uuid": "881137a35f07417a9c12530d2ceb23ea" + }, + "metadata": { + "time_zone": "America/Los_Angeles", + "plugin": "none", + "write_ts": 1516072777.245432, + "platform": "ios", + "read_ts": 0, + "key": "manual/purpose_confirm", + "type": "message", + "write_local_dt": { + "year": 2018, + "month": 1, + "day": 15, + "hour": 19, + "minute": 19, + "second": 37, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "write_fmt_time": "2018-01-15T19:19:37.245432-08:00" + }, + "data": { + "end_ts": 1481601004.01, + "label": "pick_drop", + "start_ts": 1481598124.35, + "start_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 19, + "minute": 2, + "second": 4, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "start_fmt_time": "2016-12-12T19:02:04.350000-08:00", + "end_local_dt": { + "year": 2016, + "month": 12, + "day": 12, + "hour": 19, + "minute": 50, + "second": 4, + "weekday": 0, + "timezone": "America/Los_Angeles" + }, + "end_fmt_time": "2016-12-12T19:50:04.010000-08:00" + } + } +] \ No newline at end of file diff --git a/emission/tests/storageTests/TestTripQueries.py b/emission/tests/storageTests/TestTripQueries.py index 5ab81a6a3..a02ddf904 100644 --- a/emission/tests/storageTests/TestTripQueries.py +++ b/emission/tests/storageTests/TestTripQueries.py @@ -11,6 +11,8 @@ import logging import uuid import json +import bson.json_util as bju +import numpy as np # Our imports import emission.storage.decorations.trip_queries as esdt @@ -142,6 +144,51 @@ def testUserInputForTripTwoInput(self): self.assertEqual(new_mc, user_input.data) self.assertEqual(user_input.data.label, "bike") + def testUserInputRealData(self): + np.random.seed(61297777) + dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12" + etc.setupRealExample(self, dataFile) + self.testUserId = self.testUUID + # At this point, we have only raw data, no trips + etc.runIntakePipeline(self.testUUID) + # At this point, we have trips + + # Let's retrieve them + ts = esta.TimeSeries.get_time_series(self.testUUID) + ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None) + self.assertEqual(len(ct_df), 4) + + # Now, let's load the mode_confirm and purpose_confirm objects + mode_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm"), + object_hook=bju.object_hook) + self.assertEqual(len(mode_confirm_list), 5) + + purpose_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm"), + object_hook=bju.object_hook) + self.assertEqual(len(purpose_confirm_list), 7) + + for mc in mode_confirm_list: + mc["user_id"] = self.testUUID + ts.insert(mc) + + for pc in purpose_confirm_list: + pc["user_id"] = self.testUUID + ts.insert(pc) + + mc_label_list = [] + pc_label_list = [] + for trip_id in ct_df._id: + mc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY, + self.testUserId, ct_df._id[0], "manual/mode_confirm") + mc_label_list.append(mc.data.label) + + pc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY, + self.testUserId, ct_df._id[0], "manual/purpose_confirm") + pc_label_list.append(pc.data.label) + + self.assertEqual(mc_label_list, 4 * ['bike']) + self.assertEqual(pc_label_list, 4 * ['pick_drop']) + if __name__ == '__main__': import emission.tests.common as etc etc.configLogging()