From f493e6d37c03eed13aec76012bb2ef191deff27b Mon Sep 17 00:00:00 2001 From: Jorge Prendes Date: Wed, 29 Jan 2025 17:53:36 +0000 Subject: [PATCH] use a dedicated zygote process per container Signed-off-by: Jorge Prendes --- .../src/sys/unix/container/container.rs | 114 ++++++++++++++++++ .../src/sys/unix/container/instance.rs | 101 ++++++---------- .../src/sys/unix/container/mod.rs | 1 + 3 files changed, 155 insertions(+), 61 deletions(-) create mode 100644 crates/containerd-shim-wasm/src/sys/unix/container/container.rs diff --git a/crates/containerd-shim-wasm/src/sys/unix/container/container.rs b/crates/containerd-shim-wasm/src/sys/unix/container/container.rs new file mode 100644 index 000000000..ccd6564c8 --- /dev/null +++ b/crates/containerd-shim-wasm/src/sys/unix/container/container.rs @@ -0,0 +1,114 @@ +use std::cell::RefCell; +use std::mem::transmute; + +use anyhow::{anyhow, Context}; +use libcontainer::container::Container as YoukiContainer; +use libcontainer::signal::Signal; +use serde::de::DeserializeOwned; +use serde::Serialize; +use zygote::{WireError, Zygote}; + +thread_local! { + // The youki's Container will live in a static inside the zygote process. + // Reserve some space for it here. + static CONTAINER: RefCell> = RefCell::default(); +} + +// The exposed container is just a wrapper around the zygore process +pub struct Container(Zygote); + +// Contructor methods +impl Container { + pub fn build( + f: fn(Arg) -> anyhow::Result, + arg: Arg, + ) -> anyhow::Result { + let zygote = Zygote::global().spawn(); + let container = Container(zygote); + container.run_init(f, arg)?; + + Ok(container) + } +} + +// Wrap the youki's Container methods that we use +impl Container { + pub fn pid(&self) -> anyhow::Result { + self.run(|c, _| Ok(c.pid().map(|pid| pid.as_raw())), ())? + .context("Faield to obtain PID") + } + + pub fn start(&self) -> anyhow::Result<()> { + self.run(|c, _| Ok(c.start()?), ()) + } + pub fn kill(&self, signal: u32) -> anyhow::Result<()> { + self.run( + |c, signal| { + let signal = Signal::try_from(signal as i32).context("invalid signal number")?; + Ok(c.kill(signal, true)?) + }, + signal, + ) + } + pub fn delete(&self) -> anyhow::Result<()> { + self.run(|c, _| Ok(c.delete(true)?), ()) + } +} + +impl Container { + fn run_impl< + Arg: Serialize + DeserializeOwned + 'static, + T: Serialize + DeserializeOwned + 'static, + >( + &self, + f: fn(&mut Option, Arg) -> anyhow::Result, + arg: Arg, + ) -> anyhow::Result { + self.0 + .run( + |(f, arg)| { + let f: fn(&mut Option, Arg) -> anyhow::Result = + unsafe { transmute(f) }; + let x = CONTAINER.with_borrow_mut(|c| -> Result { + Ok(f(c, arg).map_err(|e| std::io::Error::other(e))?) + }); + x + }, + (f as usize, arg), + ) + .map_err(|e| anyhow!(e)) + } + + fn run_init( + &self, + f: fn(Arg) -> anyhow::Result, + arg: Arg, + ) -> anyhow::Result<()> { + self.run_impl( + |c: &mut Option, (f, arg): (usize, Arg)| -> anyhow::Result<()> { + let f: fn(Arg) -> anyhow::Result = unsafe { transmute(f) }; + *c = Some(f(arg)?); + Ok(()) + }, + (f as usize, arg), + ) + } + + fn run< + Arg: Serialize + DeserializeOwned + 'static, + T: Serialize + DeserializeOwned + 'static, + >( + &self, + f: fn(&mut YoukiContainer, Arg) -> anyhow::Result, + arg: Arg, + ) -> anyhow::Result { + self.run_impl( + |c: &mut Option, (f, arg): (usize, Arg)| -> anyhow::Result { + let f: fn(&mut YoukiContainer, Arg) -> anyhow::Result = unsafe { transmute(f) }; + let c = c.as_mut().expect("Container not initialized"); + f(c, arg) + }, + (f as usize, arg), + ) + } +} diff --git a/crates/containerd-shim-wasm/src/sys/unix/container/instance.rs b/crates/containerd-shim-wasm/src/sys/unix/container/instance.rs index 8694f1444..a08e47f51 100644 --- a/crates/containerd-shim-wasm/src/sys/unix/container/instance.rs +++ b/crates/containerd-shim-wasm/src/sys/unix/container/instance.rs @@ -1,21 +1,17 @@ use std::marker::PhantomData; use std::path::Path; -use std::sync::Mutex; use std::thread; use std::time::Duration; -use anyhow::Context; use chrono::{DateTime, Utc}; use libcontainer::container::builder::ContainerBuilder; -use libcontainer::container::Container; -use libcontainer::signal::Signal; use libcontainer::syscall::syscall::SyscallType; use nix::errno::Errno; use nix::sys::wait::{waitid, Id as WaitID, WaitPidFlag, WaitStatus}; use nix::unistd::Pid; use oci_spec::image::Platform; -use zygote::{WireError, Zygote}; +use super::container::Container; use crate::container::Engine; use crate::sandbox::async_utils::AmbientRuntime as _; use crate::sandbox::instance_utils::determine_rootdir; @@ -30,7 +26,7 @@ const DEFAULT_CONTAINER_ROOT_DIR: &str = "/run/containerd"; pub struct Instance { exit_code: WaitableCell<(u32, DateTime)>, - container: Mutex, + container: Container, id: String, _phantom: PhantomData, } @@ -49,48 +45,44 @@ impl SandboxInstance for Instance { (vec![], Platform::default()) }); - let (root, state) = Zygote::global() - .run( - |(id, cfg, modules, platform)| -> Result<_, WireError> { - let namespace = cfg.get_namespace(); - - let bundle = cfg.get_bundle().to_path_buf(); - let rootdir = Path::new(DEFAULT_CONTAINER_ROOT_DIR).join(E::name()); - let rootdir = determine_rootdir(&bundle, &namespace, rootdir)?; - let engine = E::default(); - - let mut builder = ContainerBuilder::new(id.clone(), SyscallType::Linux) - .with_executor(Executor::new(engine, modules, platform)) - .with_root_path(rootdir.clone())?; - - if let Ok(f) = open(cfg.get_stdin()) { - builder = builder.with_stdin(f); - } - if let Ok(f) = open(cfg.get_stdout()) { - builder = builder.with_stdout(f); - } - if let Ok(f) = open(cfg.get_stderr()) { - builder = builder.with_stderr(f); - } - - let Container { root, state } = builder - .as_init(&bundle) - .as_sibling(true) - .with_systemd(false) - .build()?; - - // Container is not serializable, but its parts are - Ok((root, state)) - }, - (id.clone(), cfg.clone(), modules, platform), - ) - .map_err(|e| SandboxError::Others(e.to_string()))?; - let container = Container { root, state }; + let container = Container::build( + |(id, cfg, modules, platform)| { + let namespace = cfg.get_namespace(); + + let bundle = cfg.get_bundle().to_path_buf(); + let rootdir = Path::new(DEFAULT_CONTAINER_ROOT_DIR).join(E::name()); + let rootdir = determine_rootdir(&bundle, &namespace, rootdir)?; + let engine = E::default(); + + let mut builder = ContainerBuilder::new(id.clone(), SyscallType::Linux) + .with_executor(Executor::new(engine, modules, platform)) + .with_root_path(rootdir.clone())?; + + if let Ok(f) = open(cfg.get_stdin()) { + builder = builder.with_stdin(f); + } + if let Ok(f) = open(cfg.get_stdout()) { + builder = builder.with_stdout(f); + } + if let Ok(f) = open(cfg.get_stderr()) { + builder = builder.with_stderr(f); + } + + let container = builder + .as_init(&bundle) + .as_sibling(true) + .with_systemd(false) + .build()?; + + Ok(container) + }, + (id.clone(), cfg.clone(), modules, platform), + )?; Ok(Self { id, exit_code: WaitableCell::new(), - container: Mutex::new(container), + container, _phantom: Default::default(), }) } @@ -104,10 +96,8 @@ impl SandboxInstance for Instance { // make sure we have an exit code by the time we finish (even if there's a panic) let guard = self.exit_code.set_guard_with(|| (137, Utc::now())); - let mut container = self.container.lock().expect("Poisoned mutex"); - let pid = container.pid().context("failed to get pid")?.as_raw(); - - container.start()?; + let pid = self.container.pid()?; + self.container.start()?; let exit_code = self.exit_code.clone(); thread::spawn(move || { @@ -137,15 +127,7 @@ impl SandboxInstance for Instance { #[cfg_attr(feature = "tracing", tracing::instrument(parent = tracing::Span::current(), skip_all, level = "Info"))] fn kill(&self, signal: u32) -> Result<(), SandboxError> { log::info!("sending signal {signal} to instance: {}", self.id); - let signal = Signal::try_from(signal as i32).map_err(|err| { - SandboxError::InvalidArgument(format!("invalid signal number: {}", err)) - })?; - - self.container - .lock() - .expect("Poisoned mutex") - .kill(signal, true)?; - + self.container.kill(signal)?; Ok(()) } @@ -154,10 +136,7 @@ impl SandboxInstance for Instance { #[cfg_attr(feature = "tracing", tracing::instrument(parent = tracing::Span::current(), skip_all, level = "Info"))] fn delete(&self) -> Result<(), SandboxError> { log::info!("deleting instance: {}", self.id); - self.container - .lock() - .expect("Poisoned mutex") - .delete(true)?; + self.container.delete()?; Ok(()) } diff --git a/crates/containerd-shim-wasm/src/sys/unix/container/mod.rs b/crates/containerd-shim-wasm/src/sys/unix/container/mod.rs index 4bbabc2e5..f8d448e16 100644 --- a/crates/containerd-shim-wasm/src/sys/unix/container/mod.rs +++ b/crates/containerd-shim-wasm/src/sys/unix/container/mod.rs @@ -1,2 +1,3 @@ +mod container; mod executor; pub mod instance;