From 255880cade422ff25abf47fc940066952bd3d578 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 11 Oct 2024 19:07:11 -0700 Subject: [PATCH] libct/cg: retry opening a cgroup file From time to time we see unexpected errors (ENODEV or ENOENT) trying to write to a cgroup file. This is probably due to some races in the kernel. Let's implement a retry when we see ENOENT or ENODEV, but only if a directory exists. Note this can have a negative effect on performance when some cgroup features are disabled. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/file.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libcontainer/cgroups/file.go b/libcontainer/cgroups/file.go index 78c5bcf0d37..4e12ec892ae 100644 --- a/libcontainer/cgroups/file.go +++ b/libcontainer/cgroups/file.go @@ -5,7 +5,7 @@ import ( "errors" "fmt" "os" - "path" + "path/filepath" "strconv" "strings" "sync" @@ -25,7 +25,24 @@ func OpenFile(dir, file string, flags int) (*os.File, error) { if dir == "" { return nil, fmt.Errorf("no directory specified for %s", file) } - return openFile(dir, file, flags) + path := filepath.Join(dir, utils.CleanPath(file)) + dir = filepath.Dir(path) + try := 0 + const maxRetries = 3 + for { + fd, err := openFile(path, flags) + switch { + case err == nil: + return fd, nil + case errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENODEV): + if try > maxRetries || unix.Access(dir, unix.F_OK) != nil { + return nil, err + } + try++ + default: + return nil, err + } + } } // ReadFile reads data from a cgroup file in dir. @@ -140,14 +157,13 @@ func prepareOpenat2() error { return prepErr } -func openFile(dir, file string, flags int) (*os.File, error) { +func openFile(path string, flags int) (*os.File, error) { mode := os.FileMode(0) if TestMode && flags&os.O_WRONLY != 0 { // "emulate" cgroup fs for unit tests flags |= os.O_TRUNC | os.O_CREATE mode = 0o600 } - path := path.Join(dir, utils.CleanPath(file)) if prepareOpenat2() != nil { return openFallback(path, flags, mode) }