From b0618ea31ecfd91e2d4fe4002de7229743c0f218 Mon Sep 17 00:00:00 2001 From: Delan Azabani Date: Mon, 30 Dec 2024 13:17:17 +0800 Subject: [PATCH] cohost2json: implement retries with exponential backoff (#33) --- Cargo.lock | 1 + Cargo.toml | 1 + nix/package.nix | 2 +- src/command/cohost2json.rs | 76 +++++++++++++++++++++++++++++++++++--- 4 files changed, 74 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ca881f..e7df5c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,6 +171,7 @@ version = "1.3.1" dependencies = [ "ammonia", "askama", + "bytes", "chrono", "clap", "comrak", diff --git a/Cargo.toml b/Cargo.toml index 4bf8aae..4aa41d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ eula = false [dependencies] ammonia = "4.0.0" askama = "0.12.1" +bytes = "1.7.1" chrono = "0.4.38" clap = { version = "4.5.23", features = ["derive"] } comrak = "0.28.0" diff --git a/nix/package.nix b/nix/package.nix index e1ce457..b96db8b 100644 --- a/nix/package.nix +++ b/nix/package.nix @@ -24,7 +24,7 @@ rustPlatform.buildRustPackage { }; # don't forget to update this hash when Cargo.lock or ${version} changes! - cargoHash = "sha256-kkzroGu5+h5g3qcjwsXsgBSF3uFaECfYmfm1hHHb1uE="; + cargoHash = "sha256-r4ZZ3IdM4DGeBtfx9DsBiy4vijM2UmT19G3YpnTHmzA="; meta = { description = "cohost-compatible blog engine and feed reader"; diff --git a/src/command/cohost2json.rs b/src/command/cohost2json.rs index c799798..16f23e4 100644 --- a/src/command/cohost2json.rs +++ b/src/command/cohost2json.rs @@ -2,15 +2,19 @@ use std::{ env::{self}, fs::{create_dir_all, File}, path::Path, + str, + time::Duration, }; +use bytes::Bytes; use jane_eyre::eyre::{self, bail, OptionExt}; use reqwest::{ header::{self, HeaderMap, HeaderValue}, - Client, + Client, Response, }; use scraper::{selector::Selector, Html}; use serde::de::DeserializeOwned; +use tokio::time::sleep; use tracing::{error, info, warn}; use crate::cohost::{ @@ -170,12 +174,74 @@ pub async fn main(args: Cohost2json) -> eyre::Result<()> { Ok(()) } +async fn get_text(client: &Client, url: &str) -> eyre::Result { + get_with_retries(client, url, text).await +} + async fn get_json(client: &Client, url: &str) -> eyre::Result { - info!("GET {url}"); - Ok(client.get(url).send().await?.json().await?) + get_with_retries(client, url, json).await } -async fn get_text(client: &Client, url: &str) -> eyre::Result { +async fn get_with_retries( + client: &Client, + url: &str, + mut and_then: impl FnMut(Bytes) -> eyre::Result, +) -> eyre::Result { + let mut retries = 4; + let mut wait = Duration::from_secs(4); + loop { + let result = get_response_once(client, url).await; + let status = result + .as_ref() + .map_or(None, |response| Some(response.status())); + let result = match match result { + Ok(response) => Ok(response.bytes().await), + Err(error) => Err(error), + } { + Ok(Ok(bytes)) => Ok(bytes), + Ok(Err(error)) | Err(error) => Err::(error.into()), + }; + // retry requests if they are neither client errors (http 4xx), nor if they are successful + // (http 2xx) and the given fallible transformation fails. this includes server errors + // (http 5xx), and requests that failed in a way that yields no response. + let error = if status.is_some_and(|s| s.is_client_error()) { + // client errors (http 4xx) should not be retried. + bail!("GET request failed (no retries): http {:?}: {url}", status); + } else if status.is_some_and(|s| s.is_success()) { + // apply the given fallible transformation to the response body. + // if that succeeds, we succeed, otherwise we retry. + let result = result.and_then(&mut and_then); + if result.is_ok() { + return result; + } + result.err() + } else { + // when retrying server errors (http 5xx), error is None. + // when retrying failures with no response, error is Some. + result.err() + }; + if retries == 0 { + bail!( + "GET request failed (after retries): http {:?}: {url}", + status, + ); + } + warn!(?wait, ?status, url, ?error, "retrying failed GET request"); + sleep(wait).await; + wait *= 2; + retries -= 1; + } +} + +async fn get_response_once(client: &Client, url: &str) -> reqwest::Result { info!("GET {url}"); - Ok(client.get(url).send().await?.text().await?) + client.get(url).send().await +} + +fn text(body: Bytes) -> eyre::Result { + Ok(str::from_utf8(&body)?.to_owned()) +} + +fn json(body: Bytes) -> eyre::Result { + Ok(serde_json::from_slice(&body)?) }