From 03faf1031fb6ccc7977a7f05b2a45ba7ef5016ab Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 23 Jun 2024 16:31:57 -0700 Subject: [PATCH] libct: speedup process.Env handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation sets all the environment variables passed in Process.Env in the current process, one by one, then uses os.Environ to read those back. As pointed out in [1], this is slow, as runc calls os.Setenv for every variable, and there may be a few thousands of those. Looking into how os.Setenv is implemented, it is indeed slow, especially when cgo is enabled. Looking into why it was implemented the way it is, I found commit 9744d72c and traced it to [2], which discusses the actual reasons. It boils down to these two: - HOME is not passed into container as it is set in setupUser by os.Setenv and has no effect on config.Env; - there is a need to deduplication of environment variables. Yet it was decided in [2] to not go ahead with this patch, but later [3] was opened with the carry of this patch, and merged. Now, from what I see: 1. Passing environment to exec is way faster than using os.Setenv and os.Environ (tests show ~20x speed improvement in a simple Go test, and ~3x improvement in real-world test, see below). 2. Setting environment variables in the runc context may result is some ugly side effects (think GODEBUG, LD_PRELOAD, or _LIBCONTAINER_*). 3. Nothing in runtime spec says that the environment needs to be deduplicated, or the order of preference (whether the first or the last value of a variable with the same name is to be used). We should stick to what we have in order to maintain backward compatibility. So, this patch: - switches to passing env directly to exec; - adds deduplication mechanism to retain backward compatibility; - takes care to set PATH from process.Env in the current process (so that supplied PATH is used to find the binary to execute), also to retain backward compatibility; - adds HOME to process.Env if not set; - ensures any StartContainer CommandHook entries with no environment set explicitly are run with the same environment as before. Thanks to @lifubang who noticed that peculiarity. The benchmark added by the previous commit shows ~3x improvement: │ before │ after │ │ sec/op │ sec/op vs base │ ExecInBigEnv-20 61.53m ± 1% 21.87m ± 16% -64.46% (p=0.000 n=10) [1]: https://github.com/opencontainers/runc/pull/1983 [2]: https://github.com/docker-archive/libcontainer/pull/418 [3]: https://github.com/docker-archive/libcontainer/pull/432 Signed-off-by: Kir Kolyshkin --- libcontainer/env.go | 59 +++++++++++++++++++++++++++++ libcontainer/env_test.go | 40 +++++++++++++++++++ libcontainer/init_linux.go | 54 ++++++++------------------ libcontainer/setns_init_linux.go | 5 ++- libcontainer/standard_init_linux.go | 13 ++++++- 5 files changed, 129 insertions(+), 42 deletions(-) create mode 100644 libcontainer/env.go create mode 100644 libcontainer/env_test.go diff --git a/libcontainer/env.go b/libcontainer/env.go new file mode 100644 index 00000000000..d205cb65014 --- /dev/null +++ b/libcontainer/env.go @@ -0,0 +1,59 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "slices" + "strings" +) + +// prepareEnv processes a list of environment variables, preparing it +// for direct consumption by unix.Exec. In particular, it: +// - validates each variable is in the NAME=VALUE format and +// contains no \0 (nil) bytes; +// - removes any duplicates (keeping only the last value for each key) +// - sets PATH for the current process, if found in the list. +// +// It returns the deduplicated environment, a flag telling whether HOME +// is present in the input, and an error. +func prepareEnv(env []string) ([]string, bool, error) { + if env == nil { + return nil, false, nil + } + // Deduplication code based on dedupEnv from Go 1.22 os/exec. + + // Construct the output in reverse order, to preserve the + // last occurrence of each key. + out := make([]string, 0, len(env)) + saw := make(map[string]bool, len(env)) + for n := len(env); n > 0; n-- { + kv := env[n-1] + i := strings.IndexByte(kv, '=') + if i == -1 { + return nil, false, errors.New("invalid environment variable: missing '='") + } + if i == 0 { + return nil, false, errors.New("invalid environment variable: name cannot be empty") + } + key := kv[:i] + if saw[key] { // Duplicate. + continue + } + saw[key] = true + if strings.IndexByte(kv, 0) >= 0 { + return nil, false, fmt.Errorf("invalid environment variable %q: contains nul byte (\\x00)", key) + } + if key == "PATH" { + // Needs to be set as it is used for binary lookup. + if err := os.Setenv("PATH", kv[i+1:]); err != nil { + return nil, false, err + } + } + out = append(out, kv) + } + // Restore the original order. + slices.Reverse(out) + + return out, saw["HOME"], nil +} diff --git a/libcontainer/env_test.go b/libcontainer/env_test.go new file mode 100644 index 00000000000..72d63b4af23 --- /dev/null +++ b/libcontainer/env_test.go @@ -0,0 +1,40 @@ +package libcontainer + +import ( + "slices" + "testing" +) + +func TestPrepareEnvDedup(t *testing.T) { + tests := []struct { + env, wantEnv []string + }{ + { + env: []string{}, + wantEnv: []string{}, + }, + { + env: []string{"HOME=/root", "FOO=bar"}, + wantEnv: []string{"HOME=/root", "FOO=bar"}, + }, + { + env: []string{"A=a", "A=b", "A=c"}, + wantEnv: []string{"A=c"}, + }, + { + env: []string{"TERM=vt100", "HOME=/home/one", "HOME=/home/two", "TERM=xterm", "HOME=/home/three", "FOO=bar"}, + wantEnv: []string{"TERM=xterm", "HOME=/home/three", "FOO=bar"}, + }, + } + + for _, tc := range tests { + env, _, err := prepareEnv(tc.env) + if err != nil { + t.Error(err) + continue + } + if !slices.Equal(env, tc.wantEnv) { + t.Errorf("want %v, got %v", tc.wantEnv, env) + } + } +} diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index d23153e9b3d..418332bae5a 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -11,7 +11,6 @@ import ( "runtime" "runtime/debug" "strconv" - "strings" "syscall" "github.com/containerd/console" @@ -185,8 +184,8 @@ func startInitialization() (retErr error) { defer pidfdSocket.Close() } - // clear the current process's environment to clean any libcontainer - // specific env vars. + // From here on, we don't need current process environment. It is not + // used directly anywhere below this point, but let's clear it anyway. os.Clearenv() defer func() { @@ -209,9 +208,11 @@ func startInitialization() (retErr error) { } func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe *os.File) error { - if err := populateProcessEnvironment(config.Env); err != nil { + env, homeSet, err := prepareEnv(config.Env) + if err != nil { return err } + config.Env = env // Clean the RLIMIT_NOFILE cache in go runtime. // Issue: https://github.com/opencontainers/runc/issues/4195 @@ -225,6 +226,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock pidfdSocket: pidfdSocket, config: config, logPipe: logPipe, + addHome: !homeSet, } return i.Init() case initStandard: @@ -236,36 +238,13 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock config: config, fifoFile: fifoFile, logPipe: logPipe, + addHome: !homeSet, } return i.Init() } return fmt.Errorf("unknown init type %q", t) } -// populateProcessEnvironment loads the provided environment variables into the -// current processes's environment. -func populateProcessEnvironment(env []string) error { - for _, pair := range env { - name, val, ok := strings.Cut(pair, "=") - if !ok { - return errors.New("invalid environment variable: missing '='") - } - if name == "" { - return errors.New("invalid environment variable: name cannot be empty") - } - if strings.IndexByte(name, 0) >= 0 { - return fmt.Errorf("invalid environment variable %q: name contains nul byte (\\x00)", name) - } - if strings.IndexByte(val, 0) >= 0 { - return fmt.Errorf("invalid environment variable %q: value contains nul byte (\\x00)", name) - } - if err := os.Setenv(name, val); err != nil { - return err - } - } - return nil -} - // verifyCwd ensures that the current directory is actually inside the mount // namespace root of the current process. func verifyCwd() error { @@ -294,8 +273,8 @@ func verifyCwd() error { // finalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaked file descriptors -// before executing the command inside the namespace -func finalizeNamespace(config *initConfig) error { +// before executing the command inside the namespace. +func finalizeNamespace(config *initConfig, addHome bool) error { // Ensure that all unwanted fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container @@ -341,7 +320,7 @@ func finalizeNamespace(config *initConfig) error { if err := system.SetKeepCaps(); err != nil { return fmt.Errorf("unable to set keep caps: %w", err) } - if err := setupUser(config); err != nil { + if err := setupUser(config, addHome); err != nil { return fmt.Errorf("unable to setup user: %w", err) } // Change working directory AFTER the user has been set up, if we haven't done it yet. @@ -459,8 +438,9 @@ func syncParentSeccomp(pipe *syncSocket, seccompFd int) error { return readSync(pipe, procSeccompDone) } -// setupUser changes the groups, gid, and uid for the user inside the container -func setupUser(config *initConfig) error { +// setupUser changes the groups, gid, and uid for the user inside the container, +// and appends user's HOME to config.Env if addHome is true. +func setupUser(config *initConfig, addHome bool) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: 0, @@ -541,11 +521,9 @@ func setupUser(config *initConfig) error { return err } - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return err - } + // If we didn't get HOME already, set it based on the user's HOME. + if addHome { + config.Env = append(config.Env, "HOME="+execUser.Home) } return nil } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 92c6ef77030..8cc65b7e8ec 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -25,6 +25,7 @@ type linuxSetnsInit struct { pidfdSocket *os.File config *initConfig logPipe *os.File + addHome bool } func (l *linuxSetnsInit) getSessionRingName() string { @@ -100,7 +101,7 @@ func (l *linuxSetnsInit) Init() error { return err } } - if err := finalizeNamespace(l.config); err != nil { + if err := finalizeNamespace(l.config, l.addHome); err != nil { return err } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { @@ -153,5 +154,5 @@ func (l *linuxSetnsInit) Init() error { if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { return err } - return system.Exec(name, l.config.Args, os.Environ()) + return system.Exec(name, l.config.Args, l.config.Env) } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 9f7fa45d533..21f3c5d5fd9 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -27,6 +27,7 @@ type linuxStandardInit struct { fifoFile *os.File logPipe *os.File config *initConfig + addHome bool } func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { @@ -189,7 +190,7 @@ func (l *linuxStandardInit) Init() error { return err } } - if err := finalizeNamespace(l.config); err != nil { + if err := finalizeNamespace(l.config, l.addHome); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death @@ -197,6 +198,14 @@ func (l *linuxStandardInit) Init() error { if err := pdeath.Restore(); err != nil { return fmt.Errorf("can't restore pdeath signal: %w", err) } + + // In case we have any StartContainer hooks to run, and they don't + // have environment configured explicitly, make sure they will be run + // with the same environment as container's init. + if h := l.config.Config.Hooks[configs.StartContainer]; len(h) > 0 { + h.SetDefaultEnv(l.config.Env) + } + // Compare the parent from the initial start of the init process and make // sure that it did not change. if the parent changes that means it died // and we were reparented to something else so we should just kill ourself @@ -287,5 +296,5 @@ func (l *linuxStandardInit) Init() error { if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { return err } - return system.Exec(name, l.config.Args, os.Environ()) + return system.Exec(name, l.config.Args, l.config.Env) }