From 5b05398cdbf8e8f6af4aeeb1f8fa247f6d54bba1 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Wed, 18 Dec 2024 23:38:45 +0200 Subject: [PATCH 1/2] Capture supervisor logs in the log file --- Cargo.lock | 16 ----- Cargo.toml | 1 - src/main.rs | 183 +++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 137 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2b0d9ad..2e2ceb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5063,12 +5063,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "is_ci" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -12145,7 +12139,6 @@ dependencies = [ "subspace-rpc-primitives", "subspace-runtime-primitives", "subspace-service", - "supports-color", "sys-locale", "tempfile", "thiserror 2.0.1", @@ -12797,15 +12790,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "supports-color" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8775305acf21c96926c900ad056abeef436701108518cf890020387236ac5a77" -dependencies = [ - "is_ci", -] - [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 4c31f6b..82e1384 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,7 +105,6 @@ subspace-proof-of-space-gpu = { git = "https://github.com/subspace/subspace", re subspace-rpc-primitives = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" } subspace-runtime-primitives = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" } subspace-service = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" } -supports-color = "3.0.1" sys-locale = "0.3.1" tempfile = "3.13.0" thiserror = "2.0.1" diff --git a/src/main.rs b/src/main.rs index f251d67..97987ac 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,7 @@ use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::process::{ExitCode, Termination}; use std::rc::Rc; +use std::sync::{Arc, Mutex}; use std::thread::available_parallelism; use std::time::{Duration, Instant}; use std::{env, fs, io, process}; @@ -38,8 +39,9 @@ use subspace_farmer::utils::run_future_in_dedicated_thread; use subspace_proof_of_space::chia::ChiaTable; use tracing::{debug, error, info, warn}; use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::fmt::Layer; use tracing_subscriber::prelude::*; -use tracing_subscriber::EnvFilter; +use tracing_subscriber::{EnvFilter, Registry}; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -70,13 +72,48 @@ fn raise_fd_limit() { } Err(error) => { warn!( - "Failed to increase file descriptor limit for the process due to an error: {}.", + "Failed to increase file descriptor limit for the process due to an error: {}", error ); } } } +struct MultiWriter { + first: Arc>, + second: W2, +} + +impl Write for MultiWriter +where + W1: Write, + W2: Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + // Write both first + let result1 = self + .first + .lock() + .expect("Must not panic during write, crash otherwise; qed") + .write_all(buf); + let result2 = self.second.write_all(buf); + // Check errors afterwards + result1.and(result2).map(|()| buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + // flush both first + let result1 = self + .first + .lock() + .expect("Must not panic during write, crash otherwise; qed") + .flush(); + let result2 = self.second.flush(); + // Check errors afterwards + result1.and(result2) + } +} + #[derive(Debug, Copy, Clone)] enum AppStatusCode { Exit, @@ -176,38 +213,25 @@ impl Cli { fn app(self) -> AppStatusCode { let maybe_app_data_dir = Self::app_data_dir(); - { - let layer = tracing_subscriber::fmt::layer() - // TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on - // Windows terminal doesn't support the same colors as bash does - .with_ansi(if cfg!(windows) { - false - } else { - supports_color::on(supports_color::Stream::Stderr).is_some() - }); - let filter = EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(); - if WINDOWS_SUBSYSTEM_WINDOWS { - if let Some(app_data_dir) = &maybe_app_data_dir { - let logger = std::sync::Mutex::new(Self::new_logger(app_data_dir)); - let layer = layer.with_writer(logger); - - tracing_subscriber::registry() - .with(layer.with_filter(filter)) - .init(); - } else { - tracing_subscriber::registry() - .with(layer.with_filter(filter)) - .init(); - } - #[cfg(windows)] - std::panic::set_hook(Box::new(tracing_panic::panic_hook)); + if WINDOWS_SUBSYSTEM_WINDOWS { + let (layer, filter) = Self::tracing_logger_init_common(); + + if let Some(app_data_dir) = &maybe_app_data_dir { + let logger = Mutex::new(Self::new_logger(app_data_dir)); + let layer = layer.with_writer(logger); + + tracing_subscriber::registry() + .with(layer.with_filter(filter)) + .init(); } else { tracing_subscriber::registry() .with(layer.with_filter(filter)) .init(); } + #[cfg(windows)] + std::panic::set_hook(Box::new(tracing_panic::panic_hook)); + } else { + Self::tracing_logger_init_simple(); } info!( @@ -336,6 +360,8 @@ impl Cli { let mut last_start; let program = Self::child_program()?; + let mut logger_initialized = false; + let mut maybe_logger = None; loop { let mut args = vec!["--child-process".to_string()]; @@ -363,11 +389,38 @@ impl Cli { .unchecked() .reader()?; - let mut logger = Self::new_logger(app_data_dir); + let logger = match maybe_logger.clone() { + Some(logger) => logger, + None => { + let logger = Arc::new(Mutex::new(Self::new_logger(app_data_dir))); + maybe_logger.replace(Arc::clone(&logger)); + + if !logger_initialized { + logger_initialized = true; + + let (layer, filter) = Self::tracing_logger_init_common(); + + let layer = layer.with_writer({ + let logger = Arc::clone(&logger); + + move || MultiWriter { + first: Arc::clone(&logger), + second: io::stderr(), + } + }); + + tracing_subscriber::registry() + .with(layer.with_filter(filter)) + .init(); + } + + logger + } + }; let mut log_read_buffer = vec![0u8; LOG_READ_BUFFER]; - let mut stdout = io::stdout(); + let mut stderr = io::stderr(); loop { match expression.read(&mut log_read_buffer) { Ok(bytes_count) => { @@ -376,12 +429,15 @@ impl Cli { } let write_result: io::Result<()> = try { - stdout.write_all(&log_read_buffer[..bytes_count])?; - logger.write_all(&log_read_buffer[..bytes_count])?; + stderr.write_all(&log_read_buffer[..bytes_count])?; + logger + .lock() + .expect("Must not panic, crash if it does; qed") + .write_all(&log_read_buffer[..bytes_count])?; }; if let Err(error) = write_result { - eprintln!("Error while writing output of child process: {error}"); + error!(%error, "Error while writing output of child process"); break; } } @@ -390,15 +446,19 @@ impl Cli { // Try again continue; } - eprintln!("Error while reading output of child process: {error}"); + error!(%error, "Error while reading output of child process"); break; } } } - stdout.flush()?; - if let Err(error) = logger.flush() { - eprintln!("Error while flushing logs: {error}"); + stderr.flush()?; + let flush_result = logger + .lock() + .expect("Must not panic, crash if it does; qed") + .flush(); + if let Err(error) = flush_result { + error!(%error, "Error while flushing logs"); } match expression.try_wait()? { @@ -411,6 +471,12 @@ impl Cli { } } } else if WINDOWS_SUBSYSTEM_WINDOWS { + if !logger_initialized { + logger_initialized = true; + + Self::tracing_logger_init_simple(); + } + Self::maybe_force_renderer(cmd(&program, args)) .stdin_null() .stdout_null() @@ -420,9 +486,15 @@ impl Cli { .run()? .status } else { - eprintln!("App data directory doesn't exist, not creating log file"); + if !logger_initialized { + logger_initialized = true; + + Self::tracing_logger_init_simple(); + } + + error!("App data directory doesn't exist, not creating log file"); Self::maybe_force_renderer(cmd(&program, args)) - // We use non-zero status codes and they don't mean error necessarily + // We use non-zero status codes, and they don't mean error necessarily .unchecked() .run()? .status @@ -431,15 +503,15 @@ impl Cli { match exit_status.code() { Some(status_code) => match AppStatusCode::from_status_code(status_code) { AppStatusCode::Exit => { - eprintln!("Application exited gracefully"); + error!("Application exited gracefully"); break; } AppStatusCode::Restart => { - eprintln!("Restarting application"); + error!("Restarting application"); continue; } AppStatusCode::Unknown(status_code) => { - eprintln!("Application exited with unexpected status code {status_code}"); + error!(%status_code, "Application exited with unexpected status code"); process::exit(status_code); } }, @@ -448,14 +520,14 @@ impl Cli { { use std::os::unix::process::ExitStatusExt; - eprintln!( + error!( "Application terminated by signal {:?}", exit_status.signal() ); } #[cfg(not(unix))] { - eprintln!("Application terminated by signal"); + error!("Application terminated by signal"); } if last_start.elapsed() >= MIN_RUNTIME_DURATION_FOR_AUTORESTART { self.after_crash = true; @@ -475,7 +547,7 @@ impl Cli { .and_then(|app_data_dir| { if !app_data_dir.exists() { if let Err(error) = fs::create_dir_all(&app_data_dir) { - eprintln!( + error!( "App data directory \"{}\" doesn't exist and can't be created: {}", app_data_dir.display(), error @@ -488,6 +560,25 @@ impl Cli { }) } + fn tracing_logger_init_simple() { + let (layer, filter) = Self::tracing_logger_init_common(); + + tracing_subscriber::registry() + .with(layer.with_filter(filter)) + .init(); + } + + fn tracing_logger_init_common() -> (Layer, EnvFilter) { + let layer = tracing_subscriber::fmt::layer() + // No escape sequences in logs since we write them to files + .with_ansi(false); + let filter = EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .from_env_lossy(); + + (layer, filter) + } + fn new_logger(app_data_dir: &Path) -> FileRotate { FileRotate::new( app_data_dir.join("space-acres.log"), From 89505bd11ba4be6f7738fb24d76169a1379842e5 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Wed, 18 Dec 2024 23:40:39 +0200 Subject: [PATCH 2/2] Handle unknown status code exit as a reason for restart --- src/main.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main.rs b/src/main.rs index 97987ac..3c7d373 100644 --- a/src/main.rs +++ b/src/main.rs @@ -512,6 +512,12 @@ impl Cli { } AppStatusCode::Unknown(status_code) => { error!(%status_code, "Application exited with unexpected status code"); + + if last_start.elapsed() >= MIN_RUNTIME_DURATION_FOR_AUTORESTART { + self.after_crash = true; + continue; + } + process::exit(status_code); } },