diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index b98cf706d343..47fc9cb7fe9d 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -34,6 +34,7 @@ //! -r http://pg-ext-s3-gateway \ //! ``` use std::collections::HashMap; +use std::ffi::OsString; use std::fs::File; use std::path::Path; use std::process::exit; @@ -44,7 +45,7 @@ use std::{thread, time::Duration}; use anyhow::{Context, Result}; use chrono::Utc; -use clap::Arg; +use clap::Parser; use compute_tools::disk_quota::set_disk_quota; use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static; use signal_hook::consts::{SIGQUIT, SIGTERM}; @@ -73,10 +74,75 @@ use utils::failpoint_support; // in-case of not-set environment var const BUILD_TAG_DEFAULT: &str = "latest"; +// Compatibility hack: if the control plane specified any remote-ext-config +// use the default value for extension storage proxy gateway. +// Remove this once the control plane is updated to pass the gateway URL +fn parse_remote_ext_config(arg: &str) -> Result { + if arg.starts_with("http") { + Ok(arg.trim_end_matches('/').to_string()) + } else { + Ok("http://pg-ext-s3-gateway".to_string()) + } +} + +#[derive(Parser)] +#[command(rename_all = "kebab-case")] +struct Cli { + #[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")] + pub pgbin: String, + + #[arg(short = 'r', long, value_parser = parse_remote_ext_config)] + pub remote_ext_config: Option, + + #[arg(long, default_value_t = 3080)] + pub http_port: u16, + + #[arg(short = 'D', long, value_name = "DATADIR")] + pub pgdata: String, + + #[arg(short = 'C', long, value_name = "DATABASE_URL")] + pub connstr: String, + + #[cfg(target_os = "linux")] + #[arg(long, default_value = "neon-postgres")] + pub cgroup: String, + + #[cfg(target_os = "linux")] + #[arg( + long, + default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor" + )] + pub filecache_connstr: String, + + #[cfg(target_os = "linux")] + #[arg(long, default_value = "0.0.0.0:10301")] + pub vm_monitor_addr: String, + + #[arg(long, action = clap::ArgAction::SetTrue)] + pub resize_swap_on_bind: bool, + + #[arg(long)] + pub set_disk_quota_for_fs: Option, + + #[arg(short = 's', long = "spec", group = "spec")] + pub spec_json: Option, + + #[arg(short = 'S', long, group = "spec-path")] + pub spec_path: Option, + + #[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])] + pub compute_id: Option, + + #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")] + pub control_plane_uri: Option, +} + fn main() -> Result<()> { - let scenario = failpoint_support::init(); + let cli = Cli::parse(); - let (build_tag, clap_args) = init()?; + let build_tag = init()?; + + let scenario = failpoint_support::init(); // enable core dumping for all child processes setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?; @@ -85,13 +151,11 @@ fn main() -> Result<()> { // Enter startup tracing context let _startup_context_guard = startup_context_from_env(); - let cli_args = process_cli(&clap_args)?; - - let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?; + let cli_spec = try_spec_from_cli(&cli)?; - let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?; + let compute = wait_spec(build_tag, &cli, cli_spec)?; - start_postgres(&clap_args, wait_spec_result)? + start_postgres(&cli, compute)? // Startup is finished, exit the startup tracing span }; @@ -108,7 +172,7 @@ fn main() -> Result<()> { deinit_and_exit(wait_pg_result); } -fn init() -> Result<(String, clap::ArgMatches)> { +fn init() -> Result { init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?; @@ -123,66 +187,7 @@ fn init() -> Result<(String, clap::ArgMatches)> { .to_string(); info!("build_tag: {build_tag}"); - Ok((build_tag, cli().get_matches())) -} - -fn process_cli(matches: &clap::ArgMatches) -> Result { - let pgbin_default = "postgres"; - let pgbin = matches - .get_one::("pgbin") - .map(|s| s.as_str()) - .unwrap_or(pgbin_default); - - let ext_remote_storage = matches - .get_one::("remote-ext-config") - // Compatibility hack: if the control plane specified any remote-ext-config - // use the default value for extension storage proxy gateway. - // Remove this once the control plane is updated to pass the gateway URL - .map(|conf| { - if conf.starts_with("http") { - conf.trim_end_matches('/') - } else { - "http://pg-ext-s3-gateway" - } - }); - - let http_port = *matches - .get_one::("http-port") - .expect("http-port is required"); - let pgdata = matches - .get_one::("pgdata") - .expect("PGDATA path is required"); - let connstr = matches - .get_one::("connstr") - .expect("Postgres connection string is required"); - let spec_json = matches.get_one::("spec"); - let spec_path = matches.get_one::("spec-path"); - let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind"); - let set_disk_quota_for_fs = matches.get_one::("set-disk-quota-for-fs"); - - Ok(ProcessCliResult { - connstr, - pgdata, - pgbin, - ext_remote_storage, - http_port, - spec_json, - spec_path, - resize_swap_on_bind, - set_disk_quota_for_fs, - }) -} - -struct ProcessCliResult<'clap> { - connstr: &'clap str, - pgdata: &'clap str, - pgbin: &'clap str, - ext_remote_storage: Option<&'clap str>, - http_port: u16, - spec_json: Option<&'clap String>, - spec_path: Option<&'clap String>, - resize_swap_on_bind: bool, - set_disk_quota_for_fs: Option<&'clap String>, + Ok(build_tag) } fn startup_context_from_env() -> Option { @@ -235,19 +240,9 @@ fn startup_context_from_env() -> Option { } } -fn try_spec_from_cli( - matches: &clap::ArgMatches, - ProcessCliResult { - spec_json, - spec_path, - .. - }: &ProcessCliResult, -) -> Result { - let compute_id = matches.get_one::("compute-id"); - let control_plane_uri = matches.get_one::("control-plane-uri"); - +fn try_spec_from_cli(cli: &Cli) -> Result { // First, try to get cluster spec from the cli argument - if let Some(spec_json) = spec_json { + if let Some(ref spec_json) = cli.spec_json { info!("got spec from cli argument {}", spec_json); return Ok(CliSpecParams { spec: Some(serde_json::from_str(spec_json)?), @@ -256,7 +251,7 @@ fn try_spec_from_cli( } // Second, try to read it from the file if path is provided - if let Some(spec_path) = spec_path { + if let Some(ref spec_path) = cli.spec_path { let file = File::open(Path::new(spec_path))?; return Ok(CliSpecParams { spec: Some(serde_json::from_reader(file)?), @@ -264,17 +259,20 @@ fn try_spec_from_cli( }); } - let Some(compute_id) = compute_id else { + if cli.compute_id.is_none() { panic!( "compute spec should be provided by one of the following ways: \ --spec OR --spec-path OR --control-plane-uri and --compute-id" ); }; - let Some(control_plane_uri) = control_plane_uri else { + if cli.control_plane_uri.is_none() { panic!("must specify both --control-plane-uri and --compute-id or none"); }; - match get_spec_from_control_plane(control_plane_uri, compute_id) { + match get_spec_from_control_plane( + cli.control_plane_uri.as_ref().unwrap(), + cli.compute_id.as_ref().unwrap(), + ) { Ok(spec) => Ok(CliSpecParams { spec, live_config_allowed: true, @@ -298,21 +296,12 @@ struct CliSpecParams { fn wait_spec( build_tag: String, - ProcessCliResult { - connstr, - pgdata, - pgbin, - ext_remote_storage, - resize_swap_on_bind, - set_disk_quota_for_fs, - http_port, - .. - }: ProcessCliResult, + cli: &Cli, CliSpecParams { spec, live_config_allowed, }: CliSpecParams, -) -> Result { +) -> Result> { let mut new_state = ComputeState::new(); let spec_set; @@ -324,7 +313,7 @@ fn wait_spec( } else { spec_set = false; } - let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?; + let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?; let conn_conf = postgres::config::Config::from_str(connstr.as_str()) .context("cannot build postgres config from connstr")?; let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str()) @@ -333,14 +322,14 @@ fn wait_spec( connstr, conn_conf, tokio_conn_conf, - pgdata: pgdata.to_string(), - pgbin: pgbin.to_string(), - pgversion: get_pg_version_string(pgbin), - http_port, + pgdata: cli.pgdata.clone(), + pgbin: cli.pgbin.clone(), + pgversion: get_pg_version_string(&cli.pgbin), + http_port: cli.http_port, live_config_allowed, state: Mutex::new(new_state), state_changed: Condvar::new(), - ext_remote_storage: ext_remote_storage.map(|s| s.to_string()), + ext_remote_storage: cli.remote_ext_config.clone(), ext_download_progress: RwLock::new(HashMap::new()), build_tag, }; @@ -357,7 +346,7 @@ fn wait_spec( // Launch http service first, so that we can serve control-plane requests // while configuration is still in progress. let _http_handle = - launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread"); + launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread"); if !spec_set { // No spec provided, hang waiting for it. @@ -389,27 +378,12 @@ fn wait_spec( launch_lsn_lease_bg_task_for_static(&compute); - Ok(WaitSpecResult { - compute, - resize_swap_on_bind, - set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(), - }) -} - -struct WaitSpecResult { - compute: Arc, - resize_swap_on_bind: bool, - set_disk_quota_for_fs: Option, + Ok(compute) } fn start_postgres( - // need to allow unused because `matches` is only used if target_os = "linux" - #[allow(unused_variables)] matches: &clap::ArgMatches, - WaitSpecResult { - compute, - resize_swap_on_bind, - set_disk_quota_for_fs, - }: WaitSpecResult, + cli: &Cli, + compute: Arc, ) -> Result<(Option, StartPostgresResult)> { // We got all we need, update the state. let mut state = compute.state.lock().unwrap(); @@ -437,7 +411,7 @@ fn start_postgres( let mut delay_exit = false; // Resize swap to the desired size if the compute spec says so - if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) { + if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) { // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion // *before* starting postgres. // @@ -464,9 +438,9 @@ fn start_postgres( // Set disk quota if the compute spec says so if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) = - (disk_quota_bytes, set_disk_quota_for_fs) + (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref()) { - match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) { + match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) { Ok(()) => { let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display. info!(%disk_quota_bytes, %size_mib, "set disk quota"); @@ -509,13 +483,7 @@ fn start_postgres( if #[cfg(target_os = "linux")] { use std::env; use tokio_util::sync::CancellationToken; - let vm_monitor_addr = matches - .get_one::("vm-monitor-addr") - .expect("--vm-monitor-addr should always be set because it has a default arg"); - let file_cache_connstr = matches.get_one::("filecache-connstr"); - let cgroup = matches.get_one::("cgroup"); - // Only make a runtime if we need to. // Note: it seems like you can make a runtime in an inner scope and // if you start a task in it it won't be dropped. However, make it // in the outermost scope just to be safe. @@ -538,15 +506,15 @@ fn start_postgres( let pgconnstr = if disable_lfc_resizing.unwrap_or(false) { None } else { - file_cache_connstr.cloned() + Some(cli.filecache_connstr.clone()) }; let vm_monitor = rt.as_ref().map(|rt| { rt.spawn(vm_monitor::start( Box::leak(Box::new(vm_monitor::Args { - cgroup: cgroup.cloned(), + cgroup: Some(cli.cgroup.clone()), pgconnstr, - addr: vm_monitor_addr.clone(), + addr: cli.vm_monitor_addr.clone(), })), token.clone(), )) @@ -702,105 +670,6 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! { exit(exit_code.unwrap_or(1)) } -fn cli() -> clap::Command { - // Env variable is set by `cargo` - let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown"); - clap::Command::new("compute_ctl") - .version(version) - .arg( - Arg::new("http-port") - .long("http-port") - .value_name("HTTP_PORT") - .default_value("3080") - .value_parser(clap::value_parser!(u16)) - .required(false), - ) - .arg( - Arg::new("connstr") - .short('C') - .long("connstr") - .value_name("DATABASE_URL") - .required(true), - ) - .arg( - Arg::new("pgdata") - .short('D') - .long("pgdata") - .value_name("DATADIR") - .required(true), - ) - .arg( - Arg::new("pgbin") - .short('b') - .long("pgbin") - .default_value("postgres") - .value_name("POSTGRES_PATH"), - ) - .arg( - Arg::new("spec") - .short('s') - .long("spec") - .value_name("SPEC_JSON"), - ) - .arg( - Arg::new("spec-path") - .short('S') - .long("spec-path") - .value_name("SPEC_PATH"), - ) - .arg( - Arg::new("compute-id") - .short('i') - .long("compute-id") - .value_name("COMPUTE_ID"), - ) - .arg( - Arg::new("control-plane-uri") - .short('p') - .long("control-plane-uri") - .value_name("CONTROL_PLANE_API_BASE_URI"), - ) - .arg( - Arg::new("remote-ext-config") - .short('r') - .long("remote-ext-config") - .value_name("REMOTE_EXT_CONFIG"), - ) - // TODO(fprasx): we currently have default arguments because the cloud PR - // to pass them in hasn't been merged yet. We should get rid of them once - // the PR is merged. - .arg( - Arg::new("vm-monitor-addr") - .long("vm-monitor-addr") - .default_value("0.0.0.0:10301") - .value_name("VM_MONITOR_ADDR"), - ) - .arg( - Arg::new("cgroup") - .long("cgroup") - .default_value("neon-postgres") - .value_name("CGROUP"), - ) - .arg( - Arg::new("filecache-connstr") - .long("filecache-connstr") - .default_value( - "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor", - ) - .value_name("FILECACHE_CONNSTR"), - ) - .arg( - Arg::new("resize-swap-on-bind") - .long("resize-swap-on-bind") - .action(clap::ArgAction::SetTrue), - ) - .arg( - Arg::new("set-disk-quota-for-fs") - .long("set-disk-quota-for-fs") - .value_name("SET_DISK_QUOTA_FOR_FS") - ) -} - /// When compute_ctl is killed, send also termination signal to sync-safekeepers /// to prevent leakage. TODO: it is better to convert compute_ctl to async and /// wait for termination which would be easy then. @@ -810,7 +679,14 @@ fn handle_exit_signal(sig: i32) { exit(1); } -#[test] -fn verify_cli() { - cli().debug_assert() +#[cfg(test)] +mod test { + use clap::CommandFactory; + + use super::Cli; + + #[test] + fn verify_cli() { + Cli::command().debug_assert() + } }