From 0690bc7c00622b91f06e0fdb2da1f63a9ac2516d Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Dec 2023 17:45:14 +1100 Subject: [PATCH] dmz: don't use runc-dmz in complicated capability setups Due to the fact that runc-dmz is an intermediate binary without any special set-capability file attributes, using runc-dmz for containers with a non-root user can result in different capability sets being applied after the second execve(). Linux capabilities are quite complicated, and there are loads of different interactions between file and process capability sets, so we should just go with the most conservative rule to determine if we can't use runc-dmz -- if the inheritable, permitted, and bounding sets are not equal to the ambient set then we don't use runc-dmz. Fixes: dac417174654 ("runc-dmz: reduce memfd binary cloning cost with small C binary") Signed-off-by: Aleksa Sarai --- libcontainer/container_linux.go | 36 +++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index f36abaf1032..4bc17308be5 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -454,11 +454,43 @@ func slicesContains[S ~[]E, E comparable](slice S, needle E) bool { return false } -func isDmzBinarySafe(c *configs.Config) bool { +// No longer needed in Go 1.21. +func slicesEqual[S ~[]E, E comparable](s1, s2 S) bool { + if len(s1) != len(s2) { + return false + } + for idx := range s1 { + if s1[idx] != s2[idx] { + return false + } + } + return true +} + +func shouldUseDmzBinary(p *Process, c *configs.Config) bool { + // Older versions container-selinux (< 2.224.0) had permission issues when + // using runc-dmz, so don't use runc-dmz for those systems. if !dmz.WorksWithSELinux(c) { return false } + // runc-dmz changes how capabilities are inherited by the process "runc + // init" execs if the container process is not root and the capabilities + // are not in the ambient set. There is no nice solution for this problem, + // so we can't use runc-dmz unless the four non-effective capability sets + // are the same. + // + // TODO: Unfortunately, our configuration format is such that we don't know + // the uid/gid of the container process at the moment. See newProcess() in + // utils_linux.go. So we have to assume that any User other than "0:0" is + // not root. + if p.User != "0:0" && c.Capabilities != nil && + (!slicesEqual(c.Capabilities.Inheritable, c.Capabilities.Ambient) || + !slicesEqual(c.Capabilities.Bounding, c.Capabilities.Ambient) || + !slicesEqual(c.Capabilities.Permitted, c.Capabilities.Ambient)) { + return false + } + // Because we set the dumpable flag in nsexec, the only time when it is // unsafe to use runc-dmz is when the container process would be able to // race against "runc init" and bypass the ptrace_may_access() checks. @@ -513,7 +545,7 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) { exePath = "/proc/self/exe" } else { var err error - if isDmzBinarySafe(c.config) { + if shouldUseDmzBinary(p, c.config) { dmzExe, err = dmz.Binary(c.stateDir) if err == nil { // We can use our own executable without cloning if we are using