Skip to content

Commit

Permalink
CNI: migrate from persistent state to ephemeral state during restart (#…
Browse files Browse the repository at this point in the history
…25093)

In #24650 we switched to using ephemeral state for CNI plugins, so that when a
host reboots and we lose all the allocations we don't end up trying to use IPs
we created in network namespaces we just destroyed. Unfortunately upgrade
testing missed that in a non-reboot scenario, the existing CNI state was being
used by plugins like the ipam plugin to hand out the "next available" IP
address. So with no state carried over, we might allocate new addresses that
conflict with existing allocations. (This can be avoided by draining the node
first.)

As a compatibility shim, copy the old CNI state directory to the new CNI state
directory during agent startup, if the new CNI state directory doesn't already
exist.

Ref: #24650
  • Loading branch information
tgross authored Feb 12, 2025
1 parent f0d3c28 commit 716df52
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .changelog/25093.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
cni: Fixed a bug where CNI state was not migrated after upgrade, resulting in IP collisions
```
21 changes: 21 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/envoy"
"github.com/hashicorp/nomad/helper/escapingfs"
"github.com/hashicorp/nomad/helper/goruntime"
"github.com/hashicorp/nomad/helper/group"
"github.com/hashicorp/nomad/helper/pointer"
Expand Down Expand Up @@ -762,6 +763,26 @@ func (c *Client) init() error {
// setup the nsd check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)

// COMPAT(1.12.0): remove in Nomad 1.12.0
oldCNIDir := "/var/lib/cni/networks/nomad"
newCNIDir := "/var/run/cni/nomad"
if _, err := os.Stat(newCNIDir); os.IsNotExist(err) {
if _, err := os.Stat(oldCNIDir); err == nil {
err := escapingfs.CopyDir(oldCNIDir, newCNIDir)
if err != nil {
c.logger.Error("failed to migrate existing CNI state",
"error", err, "src", oldCNIDir, "dest", newCNIDir)
} else {
err := os.RemoveAll(oldCNIDir)
if err != nil {
c.logger.Error("migrated CNI state but could not remove old state",
"error", err, "src", oldCNIDir, "dest", newCNIDir)
}
c.logger.Info("migrated CNI state", "src", oldCNIDir, "dest", newCNIDir)
}
}
}

return nil
}

Expand Down
58 changes: 58 additions & 0 deletions helper/escapingfs/copydir.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package escapingfs

import (
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
)

// CopyDir copies a directory's contents to a new location, returning an error
// on symlinks. This implementation is roughly the same as the stdlib os.CopyDir
// but with th e important difference that we preserve file modes.
func CopyDir(src, dst string) error {
srcFs := os.DirFS(src)

return fs.WalkDir(srcFs, ".", func(oldPath string, d fs.DirEntry, err error) error {
if err != nil {
return err
}

newPath := filepath.Join(dst, oldPath)
if d.IsDir() {
info, err := d.Info()
if err != nil {
return fmt.Errorf("could not stat directory: %v", err)
}
return os.MkdirAll(newPath, info.Mode())
}
if !d.Type().IsRegular() {
return fmt.Errorf("copying cannot traverse symlinks")
}

r, err := srcFs.Open(oldPath)
if err != nil {
return fmt.Errorf("could not open existing file: %v", err)
}
defer r.Close()
info, err := r.Stat()
if err != nil {
return fmt.Errorf("could not stat file: %v", err)
}

w, err := os.OpenFile(newPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode())
if err != nil {
return err
}

if _, err := io.Copy(w, r); err != nil {
w.Close()
return fmt.Errorf("could not copy file: %v", err)
}
return w.Close()
})
}
41 changes: 41 additions & 0 deletions helper/escapingfs/copydir_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package escapingfs

import (
"io/fs"
"os"
"path/filepath"
"testing"

"github.com/shoenig/test/must"
"golang.org/x/sys/unix"
)

func TestCopyDir(t *testing.T) {

testDir := t.TempDir()
src := filepath.Join(testDir, "src")
dst := filepath.Join(testDir, "dst")

must.NoError(t, os.Mkdir(src, 0700))
must.NoError(t, os.WriteFile(filepath.Join(src, "foo"), []byte("foo"), 0770))
must.NoError(t, os.WriteFile(filepath.Join(src, "bar"), []byte("bar"), 0555))
must.NoError(t, os.Mkdir(filepath.Join(src, "bazDir"), 0700))
must.NoError(t, os.WriteFile(filepath.Join(src, "bazDir", "baz"), []byte("baz"), 0555))

err := CopyDir(src, dst)
must.NoError(t, err)

// This is really how you have to retrieve umask. See `man 2 umask`
umask := unix.Umask(0)
unix.Umask(umask)

must.FileContains(t, filepath.Join(dst, "foo"), "foo")
must.FileMode(t, filepath.Join(dst, "foo"), fs.FileMode(0o770&(^umask)))
must.FileContains(t, filepath.Join(dst, "bar"), "bar")
must.FileMode(t, filepath.Join(dst, "bar"), fs.FileMode(0o555&(^umask)))
must.FileContains(t, filepath.Join(dst, "bazDir", "baz"), "baz")
must.FileMode(t, filepath.Join(dst, "bazDir", "baz"), fs.FileMode(0o555&(^umask)))
}

0 comments on commit 716df52

Please sign in to comment.