From 16e79e217d37ee10f2bab1ebf044ed13f55f16d3 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Mon, 23 Dec 2024 18:50:21 -0800 Subject: [PATCH] Added unit tests to verify that the stats are generated in both cases: (i) when there is new data and (ii) when there is no new data. --- emission/analysis/result/user_stat.py | 4 +- .../analysisTests/intakeTests/TestUserStat.py | 108 +++++++++++++++++- 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/emission/analysis/result/user_stat.py b/emission/analysis/result/user_stat.py index fa1d7ac95..27d633cec 100644 --- a/emission/analysis/result/user_stat.py +++ b/emission/analysis/result/user_stat.py @@ -61,9 +61,9 @@ def get_and_store_user_stats(user_id: str, trip_key: str) -> None: end_ts_result = ts.get_first_value_for_field(trip_key, "data.end_ts", pymongo.DESCENDING) end_ts = None if end_ts_result == -1 else end_ts_result - total_trips = ts.find_entries_count(key_list=["analysis/confirmed_trip"]) + total_trips = ts.find_entries_count(key_list=[trip_key]) labeled_trips = ts.find_entries_count( - key_list=["analysis/confirmed_trip"], + key_list=[trip_key], extra_query_list=[{'data.user_input': {'$ne': {}}}] ) diff --git a/emission/tests/analysisTests/intakeTests/TestUserStat.py b/emission/tests/analysisTests/intakeTests/TestUserStat.py index 207aa0a98..8cae238b1 100644 --- a/emission/tests/analysisTests/intakeTests/TestUserStat.py +++ b/emission/tests/analysisTests/intakeTests/TestUserStat.py @@ -57,7 +57,7 @@ def tearDown(self): edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_profile_db().delete_one({"user_id": self.testUUID}) - def testGetAndStoreUserStats(self): + def testGetAndStoreUserStatsDefault(self): """ Test get_and_store_user_stats for the user to ensure that user statistics are correctly aggregated and stored in the user profile. @@ -75,7 +75,7 @@ def testGetAndStoreUserStats(self): self.assertIn("pipeline_range", profile, "User profile should contain 'pipeline_range'.") self.assertIn("last_call_ts", profile, "User profile should contain 'last_call_ts'.") - expected_total_trips = 5 + expected_total_trips = 8 expected_labeled_trips = 0 self.assertEqual(profile["total_trips"], expected_total_trips, @@ -115,6 +115,110 @@ def testLastCall(self): f"Expected last_call_ts to be {expected_last_call_ts}, got {actual_last_call_ts}" ) + def testGetAndStoreUserStatsSecondRunNoNewData(self): + """ + Case (ii): Verify stats remain unchanged if we run the pipeline again + without adding new data. + """ + # Check stats after the initial run (from setUp()). + initial_profile = edb.get_profile_db().find_one({"user_id": self.testUUID}) + self.assertIsNotNone(initial_profile, "User profile should exist after first run.") + initial_total_trips = initial_profile["total_trips"] + initial_labeled_trips = initial_profile["labeled_trips"] + + # Run the pipeline again, but don't add any new data + etc.runIntakePipeline(self.testUUID) + + # Stats should remain the same + updated_profile = edb.get_profile_db().find_one({"user_id": self.testUUID}) + self.assertIsNotNone(updated_profile, "Profile should still exist.") + self.assertEqual( + updated_profile["total_trips"], + initial_total_trips, + f"Expected total_trips to remain {initial_total_trips}, got {updated_profile['total_trips']}" + ) + self.assertEqual( + updated_profile["labeled_trips"], + initial_labeled_trips, + f"Expected labeled_trips to remain {initial_labeled_trips}, got {updated_profile['labeled_trips']}" + ) + + + def testGetAndStoreUserStatsNewData(self): + """ + Case (i): Verify stats are updated properly when new data is inserted + from shankari_2015-aug-27 without modifying the original data and the pipeline is rerun. + We then assert the actual number of total trips (e.g., from 8 to 18). + """ + # 1. Retrieve the initial user profile after setUp() + initial_profile = edb.get_profile_db().find_one({"user_id": self.testUUID}) + self.assertIsNotNone(initial_profile, "User profile should exist after the first run.") + + # 2. Assert that the initial total trips are as expected (8 trips) + expected_initial_trips = 8 + self.assertEqual( + initial_profile["total_trips"], + expected_initial_trips, + f"Expected initial total_trips to be {expected_initial_trips}, got {initial_profile['total_trips']}" + ) + + # Store initial trips count and labeled trips for later comparison + initial_total_trips = initial_profile["total_trips"] + initial_labeled_trips = initial_profile["labeled_trips"] + + # 3. Load and prepare new data from shankari_2015-aug-27 + new_entries = [] + aug27_file_path = "emission/tests/data/real_examples/shankari_2015-aug-27" + + try: + with open(aug27_file_path) as fp: + # Load entries using the existing JSON wrapper + aug27_entries = json.load(fp, object_hook=esj.wrapped_object_hook) + for entry in aug27_entries: + # Replace the user_id UUID with self.testUUID + entry['user_id'] = self.testUUID + + # Remove the '_id' field to let MongoDB assign a new one + if '_id' in entry: + del entry['_id'] + + # Append the modified entry to the new_entries list + new_entries.append(entry) + + except FileNotFoundError: + self.fail(f"New data file not found at path: {aug27_file_path}") + except json.JSONDecodeError as e: + self.fail(f"JSON decoding failed for file {aug27_file_path}: {e}") + + # 4. Insert the new entries into the timeseries collection + if new_entries: + edb.get_timeseries_db().insert_many(new_entries) + else: + self.fail("No new entries were loaded from the new data file.") + + # 5. Run the pipeline again to process the newly inserted entries + etc.runIntakePipeline(self.testUUID) + + # 6. Retrieve the updated user profile after processing new data + updated_profile = edb.get_profile_db().find_one({"user_id": self.testUUID}) + self.assertIsNotNone(updated_profile, "Profile should exist after inserting new data.") + + # 7. Assert that the total trips have increased from 8 to 18 + expected_final_trips = 18 + self.assertEqual( + updated_profile["total_trips"], + expected_final_trips, + f"Expected total_trips to be {expected_final_trips}, got {updated_profile['total_trips']}" + ) + + # 8. Ensure that labeled_trips is not less than it was before + self.assertGreaterEqual( + updated_profile["labeled_trips"], + initial_labeled_trips, + f"Expected labeled_trips >= {initial_labeled_trips}, got {updated_profile['labeled_trips']}" + ) + + if __name__ == '__main__': # Configure logging for the test etc.configLogging()