From 1c2ee3ea3860f6c9b6545606f2a4e6933489fa3b Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 25 Jun 2020 19:42:21 +0000 Subject: [PATCH] bug: Add additional logging around APNS HTTP2 connectivity Issue #1393 notes, sending push messages across APNS seems to work great after a deploy, but then degrades after a week or so. #1394 is a possible work-around (by double-pooling connections and using a dedicated connection terminator), but it's messy and hacky. What's really needed is a bit more visibility into what may be happening, and that will involve logging all APNS communication exceptions reliably. Issue #1393 --- autopush/router/apnsrouter.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/autopush/router/apnsrouter.py b/autopush/router/apnsrouter.py index eb04575c..b72a33c7 100644 --- a/autopush/router/apnsrouter.py +++ b/autopush/router/apnsrouter.py @@ -147,29 +147,32 @@ def _route(self, notification, router_data): apns_id = str(uuid.uuid4()).lower() # APNs may force close a connection on us without warning. # if that happens, retry the message. - success = False try: apns_client.send(router_token=router_token, payload=payload, apns_id=apns_id) - success = True - except ConnectionError: - self.metrics.increment("notification.bridge.connection.error", - tags=make_tags( - self._base_tags, - application=rel_channel, - reason="connection_error")) - except (HTTP20Error, socket.error): + except Exception as e: + # We sometimes see strange errors around sending push notifications + # to APNS. We get reports that after a new deployment things work, + # but then after a week or so, messages across the APNS bridge + # start to fail. The connections appear to be working correctly, + # so we don't think that this is a problem related to how we're + # connecting. + if isinstance(e, ConnectionError): + reason = "connection_error" + elif isinstance(e, (HTTP20Error, socket.error)): + reason = "http2_error" + else: + reason = "unknown" self.metrics.increment("notification.bridge.connection.error", tags=make_tags(self._base_tags, application=rel_channel, - reason="http2_error")) - if not success: + reason=reason)) raise RouterException( - "Server error", + "APNS Router Error: {}".format(str(e)), status_code=502, response_body="APNS returned an error processing request", - log_exception=False, ) + location = "%s/m/%s" % (self.conf.endpoint_url, notification.version) self.metrics.increment("notification.bridge.sent", tags=make_tags(self._base_tags,