From 94639d40bbf793240fa245f7f17526f7ed6560a8 Mon Sep 17 00:00:00 2001 From: Shankari Date: Fri, 3 Jan 2025 18:46:11 -0800 Subject: [PATCH] Catch pymongo errors, log details and affected document We recently encountered an issue in which `usercache/put` calls were failing because of an error with the message format. It was not easy to determine the object that was generating the error, since we only log successful updates. This change catches all mongo errors and logs the full backtrace, including the document that generated the error. Testing done: - Added additional debugging statements to `sync_phone_to_server` in `emission/net/api/usercache.py` ``` update_query = {'user_id': uuid, 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"]} + time.sleep(2) + logging.debug("After sleep, continuing to processing") ``` - Started up a local server, and logged in from the emulator - Started a trip, started location tracking and ended the trip - While the `usercache/put` was processing the entries (slowed down because of the sleep), killed the local DB - put failed with an error message highlighting the document that was being saved (although it did not matter in for this error since it was a connection error and not a document format error) ``` 2025-01-03 12:11:54,717:DEBUG:12994228224:After sleep, continuing to processing 2025-01-03 12:11:54,720:DEBUG:12994228224:Updated result for user = 34da08c9-e7a7-4f91-bf65-bf6b6d970c32, key = stats/client_time, write_ts = 1735933996.229703 = {'n': 1, 'nModified': 0, 'upserted': ObjectId('6778448afacd6df071652448'), 'ok': 1.0, 'updatedExisting': False} 2025-01-03 12:11:56,726:DEBUG:12994228224:After sleep, continuing to processing 2025-01-03 12:11:56,728:DEBUG:12994228224:Updated result for user = 34da08c9-e7a7-4f91-bf65-bf6b6d970c32, key = stats/client_time, write_ts = 1735933996.2422519 = {'n': 1, 'nModified': 0, 'upserted': ObjectId('6778448cfacd6df07165244a'), 'ok': 1.0, 'updatedExisting': False} 2025-01-03 12:11:58,732:DEBUG:12994228224:After sleep, continuing to processing 2025-01-03 12:12:29,131:ERROR:12994228224:In sync_phone_to_server, while executing update_query={'user_id': UUID('34da08c9-e7a7-4f91-bf65-bf6b6d970c32'), 'metadata.type': 'message', 'metadata.write_ts': 1735933996.3793979, 'metadata.key': 'stats/client_time'} on document={'$set': {'data': {'ts': 1735933996.369, 'client_app_version': '1.9.6', 'name': 'onboarding_state', 'client_os_version': '18.1', 'reading': {'route': 1, 'opcode': 'nrelop_dev-emulator-study_default_testdbfail'}}, 'metadata': {'time_zone': 'America/Los_Angeles', 'plugin': 'none', 'write_ts': 1735933996.3793979, 'platform': 'ios', 'read_ts': 0, 'key': 'stats/client_time', 'type': 'message'}, 'user_id': UUID('34da08c9-e7a7-4f91-bf65-bf6b6d970c32')}} 2025-01-03 12:12:29,133:ERROR:12994228224:localhost:27017: [Errno 61] Connection refused, Timeout: 30s, Topology Description: ]> ``` --- emission/net/api/usercache.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/emission/net/api/usercache.py b/emission/net/api/usercache.py index d80cd03df..b2066b243 100644 --- a/emission/net/api/usercache.py +++ b/emission/net/api/usercache.py @@ -51,14 +51,19 @@ def sync_phone_to_server(uuid, data_from_phone): 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"]} - result = usercache_db.update_one(update_query, - document, - upsert=True) - logging.debug("Updated result for user = %s, key = %s, write_ts = %s = %s" % - (uuid, data["metadata"]["key"], data["metadata"]["write_ts"], result.raw_result)) + try: + result = usercache_db.update_one(update_query, + document, + upsert=True) + logging.debug("Updated result for user = %s, key = %s, write_ts = %s = %s" % + (uuid, data["metadata"]["key"], data["metadata"]["write_ts"], result.raw_result)) - # I am not sure how to trigger a writer error to test this - # and whether this is the format expected from the server in the rawResult - if 'ok' in result.raw_result and result.raw_result['ok'] != 1.0: - logging.error("In sync_phone_to_server, err = %s" % result.raw_result['writeError']) - raise Exception() + # I am not sure how to trigger a writer error to test this + # and whether this is the format expected from the server in the rawResult + if 'ok' in result.raw_result and result.raw_result['ok'] != 1.0: + logging.error("In sync_phone_to_server, err = %s" % result.raw_result['writeError']) + raise Exception() + except pymongo.errors.PyMongoError as e: + logging.error(f"In sync_phone_to_server, while executing {update_query=} on {document=}") + logging.exception(e) + raise