Skip to content

Commit

Permalink
Update docstrings and comments
Browse files Browse the repository at this point in the history
Signed-off-by: Jesse Whitehouse <[email protected]>
  • Loading branch information
Jesse Whitehouse committed Aug 8, 2023
1 parent 10180a2 commit 9e92ecb
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 36 deletions.
130 changes: 94 additions & 36 deletions src/databricks/sql/auth/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,50 @@ def get(cls, value: str):

class DatabricksRetryPolicy(Retry):
"""
Implements the entirety of our retry policy across connectors.
Same as urllib3.Retry but implements retry_wait_min and retry_wait_max as part of its exponential backoff calculation
Retry Policy arguments are defined in the docstring for our ThriftBackend class
delay_default is the time in seconds the connector will wait between requests polling for a GetOperationStatus request.
Retry.allowed_methods is set to POST
Retry.status_forcelist is set to codes 429 and 503
kwargs is used by urllib3 to pass state between successive Retry iterations.
Some of these parameters are counters that change from one retry to the next
Others are like local constants that do not change between retries
Implements our v3 retry policy by extending urllib3's robust default retry behaviour.
See `should_retry()` for details about we do and do not retry.
:param delay_min:
Float of seconds for the minimum delay between retries. Passed to urllib3 as its
backoff_factor.
:param delay_max:
Float of seconds for the maximum delay between retries. Passed to urllib3 as its
backoff_max
:param stop_after_attempts_count:
Integer maximum number of attempts that will be retried. Passed to urllib3 as its
total.
:param stop_after_attempts_duration:
Float of maximum number of seconds from the beginning of the first request that a
request may be retried. This behaviour is not implemented in urllib3.
:param delay_default:
Float of seconds the connector will wait between sucessive GetOperationStatus
requests. This parameter is not used to retry failed network requests. We include
it in this class to keep all retry behaviour encapsulated in this file.
:param force_dangerous_codes:
List of integer HTTP status codes that the connector will retry, even for dangerous
commands like ExecuteStatement. This is passed to urllib3 by extending its status_forcelist
:param _retry_start_time:
Float unix timestamp. Used to monitor the overall request duration across successive
retries. Never set this value directly. Use self.start_retry_time() instead. Users
never set this value. It is set by ThriftBackend immediately before issuing a network
request.
:param _command_type:
CommandType of the current request being retried. Used to modify retry behaviour based
on the type of Thrift command being issued. See self.should_retry() for details. Users
never set this value directly. It is set by ThriftBackend immediately before issuing
a network request.
:param urllib3_kwargs:
Dictionary of arguments that are passed to Retry.__init__. Any setting of Retry() that
Databricks does not override or extend may be modified here.
"""

def __init__(
Expand Down Expand Up @@ -130,7 +159,7 @@ def new(self, **urllib3_incremented_counters: typing.Any) -> Retry:
The arguments it passes to `.new()` (total, connect, read, etc.) are those which modified by `.increment()`.
Since our subclass has a new __init__ signature and requires custom state variables, we override the method
to pipe our Databricks-specific state while preserving the super-class's behaviour behaviour.
to pipe our Databricks-specific state while preserving the super-class's behaviour.
"""

# These arguments will match the function signature for self.__init__
Expand Down Expand Up @@ -196,7 +225,9 @@ def get_operation_status_delay(self) -> float:
return self.delay_default

def start_retry_timer(self):
"""Timer is used to monitor the overall time across successive requests"""
"""Timer is used to monitor the overall time across successive requests
Should only be called by ThriftBackend before sending a Thrift command"""
self._retry_start_time = time.time()

def check_timer_duration(self):
Expand All @@ -212,17 +243,13 @@ def check_proposed_wait(self, proposed_wait: Union[int, float]) -> None:
f"Retry request would exceed Retry policy max retry duration of {self.stop_after_attempts_duration} seconds"
)

def get_backoff_time(self) -> float:
"""Calls urllib3's built-in get_backoff_time but raises an exception if stop_after_attempts_duration would be exceeded.
If proposed backoff exceeds the configured delay_max, the delay_max is used instead"""

proposed_backoff = super().get_backoff_time()
proposed_backoff = min(proposed_backoff, self.delay_max)
self.check_proposed_wait(proposed_backoff)
def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: # type: ignore
"""Sleeps for the duration specified in the response Retry-After header, if present
return proposed_backoff
A MaxRetryDurationError will be raised if doing so would exceed self.max_attempts_duration
def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: # type: ignore
This method is only called by urllib3 internals.
"""
retry_after = self.get_retry_after(response)
if retry_after:
self.check_proposed_wait(retry_after)
Expand All @@ -231,18 +258,47 @@ def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: # type: ignore

return False

def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]:
"""We retry by default with the following exceptions:
def get_backoff_time(self) -> float:
"""Calls urllib3's built-in get_backoff_time.
Any 200 status code -> Because the request succeeded
Any 501 status code -> Because it's not recoverable ever. Raise a NonRecoverableNetworkError.
Any 404 if the command was CancelOperation or CloseSession and this is not the first request
Any ExecuteStatement command unless it has code 429 or 504
Never returns a value larger than self.delay_max
A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
in the previous unsuccessful request.
"""

# TODO: Port in the NoRetryReason enum for logging?
# TODO: something needs to go here about max wall-clock duration.
# It doesn't belong in the backoff calculation method
proposed_backoff = super().get_backoff_time()
proposed_backoff = min(proposed_backoff, self.delay_max)
self.check_proposed_wait(proposed_backoff)

return proposed_backoff

def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]:
"""This method encapsulates the connector's approach to retries.
We always retry a request unless one of these conditions is met:
1. The request received a 200 (Success) status code
Because the request succeeded. No retry is required.
2. The request received a 501 (Not Implemented) status code
Because this request can never succeed.
3. The request received a 404 (Not Found) code and the request CommandType
was CloseSession or CloseOperation. This code indicates that the session
or cursor was already closed. Further retries will always return the same
code.
4. The request CommandType was ExecuteStatement and the HTTP code does not
appear in the default status_forcelist or force_dangerous_codes list. By
default, thisd means ExecuteStatement is only retried for codes 429 and 503.
This limit prevents automatically retrying non-idempotent commands that could
be destructive.
5. OSErrors (handled automatically by urllib3 outside this method)
6. Redirects (handled automatically by urllib3 outside this method)
Returns True if the request should be retried. Returns False or raises an exception
if a retry would violate the configured policy.
"""

# Request succeeded. Don't retry.
if status_code == 200:
Expand Down Expand Up @@ -309,7 +365,9 @@ def is_retry(
self, method: str, status_code: int, has_retry_after: bool = False
) -> bool:
"""
Overriden to support varied behaviour based on the current CommandType
Called by urllib3 when determining whether or not to retry
Logs a debug message if the request will be retried
"""

should_retry, msg = self.should_retry(method, status_code)
Expand Down
1 change: 1 addition & 0 deletions src/databricks/sql/thrift_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ def __init__(

self._request_lock = threading.RLock()

# TODO: Move this bounding logic into DatabricksRetryPolicy for v3 (PECO-918)
def _initialize_retry_args(self, kwargs):
# Configure retries & timing: use user-settings or defaults, and bound
# by policy. Log.warn when given param gets restricted.
Expand Down

0 comments on commit 9e92ecb

Please sign in to comment.