Skip to content

Commit

Permalink
Merge pull request #1399 from tkatila/prepare-0.26.1
Browse files Browse the repository at this point in the history
Prepare 0.26.1
  • Loading branch information
mythi authored May 3, 2023
2 parents d7a14ab + 4e3ef52 commit c17149f
Show file tree
Hide file tree
Showing 60 changed files with 514 additions and 87 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ pipeline {
stage('make test-with-kind') {
steps {
dir(path: "$REPO_DIR") {
sh "make test-with-kind REG=intel/ TAG=0.26.0"
sh "make test-with-kind REG=intel/ TAG=0.26.1"
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ clean:

ORG?=intel
REG?=$(ORG)/
TAG?=0.26.0
TAG?=0.26.1
export TAG

e2e-fpga:
Expand Down
79 changes: 74 additions & 5 deletions cmd/gpu_plugin/gpu_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strings"
Expand All @@ -39,6 +40,7 @@ const (
devfsDriDirectory = "/dev/dri"
gpuDeviceRE = `^card[0-9]+$`
controlDeviceRE = `^controlD[0-9]+$`
pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$"
vendorString = "0x8086"

// Device plugin settings.
Expand Down Expand Up @@ -145,32 +147,86 @@ func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
return deviceIds
}

// Returns a slice of by-path Mounts for a cardPath&Name.
// by-path files are searched from the given bypathDir.
// In the by-path dir, any files that start with "pci-<pci addr>" will be added to mounts.
func (dp *devicePlugin) bypathMountsForPci(cardPath, cardName, bypathDir string) []pluginapi.Mount {
linkPath, err := os.Readlink(cardPath)
if err != nil {
return nil
}

// Fetches the pci address for a drm card by reading the
// symbolic link that the /sys/class/drm/cardX points to.
// ../../devices/pci0000:00/0000:00:02.0/drm/card
// -------------------------^^^^^^^^^^^^---------.
pciAddress := filepath.Base(strings.TrimSuffix(linkPath, filepath.Join("drm", cardName)))

if !dp.pciAddressReg.MatchString(pciAddress) {
klog.Warningf("Invalid pci address for %s: %s", cardPath, pciAddress)

return nil
}

files, err := os.ReadDir(bypathDir)
if err != nil {
klog.Warningf("Failed to read by-path directory: %+v", err)

return nil
}

linkPrefix := "pci-" + pciAddress

var mounts []pluginapi.Mount

for _, f := range files {
if strings.HasPrefix(f.Name(), linkPrefix) {
absPath := path.Join(bypathDir, f.Name())

mounts = append(mounts, pluginapi.Mount{
ContainerPath: absPath,
HostPath: absPath,
ReadOnly: true,
})
}
}

return mounts
}

type devicePlugin struct {
gpuDeviceReg *regexp.Regexp
controlDeviceReg *regexp.Regexp
pciAddressReg *regexp.Regexp

scanTicker *time.Ticker
scanDone chan bool

resMan rm.ResourceManager

sysfsDir string
devfsDir string
sysfsDir string
devfsDir string
bypathDir string

// Note: If restarting the plugin with a new policy, the allocations for existing pods remain with old policy.
policy preferredAllocationPolicyFunc
options cliOptions

bypathFound bool
}

func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugin {
dp := &devicePlugin{
sysfsDir: sysfsDir,
devfsDir: devfsDir,
bypathDir: path.Join(devfsDir, "/by-path"),
options: options,
gpuDeviceReg: regexp.MustCompile(gpuDeviceRE),
controlDeviceReg: regexp.MustCompile(controlDeviceRE),
pciAddressReg: regexp.MustCompile(pciAddressRE),
scanTicker: time.NewTicker(scanPeriod),
scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it
bypathFound: true,
}

if options.resourceManagement {
Expand All @@ -192,6 +248,12 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi
dp.policy = nonePolicy
}

if _, err := os.ReadDir(dp.bypathDir); err != nil {
klog.Warningf("failed to read by-path dir: $+v", err)

dp.bypathFound = false
}

return dp
}

Expand Down Expand Up @@ -299,7 +361,9 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
continue
}

drmFiles, err := os.ReadDir(path.Join(dp.sysfsDir, f.Name(), "device/drm"))
cardPath := path.Join(dp.sysfsDir, f.Name())

drmFiles, err := os.ReadDir(path.Join(cardPath, "device/drm"))
if err != nil {
return nil, errors.Wrap(err, "Can't read device folder")
}
Expand Down Expand Up @@ -338,15 +402,20 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
}

if len(nodes) > 0 {
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)
mounts := []pluginapi.Mount{}
if dp.bypathFound {
mounts = dp.bypathMountsForPci(cardPath, f.Name(), dp.bypathDir)
}

deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil, nil)

for i := 0; i < dp.options.sharedDevNum; i++ {
devID := fmt.Sprintf("%s-%d", f.Name(), i)
// Currently only one device type (i915) is supported.
// TODO: check model ID to differentiate device models.
devTree.AddDevice(deviceType, devID, deviceInfo)

rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, mounts, nil)
}
}
}
Expand Down
132 changes: 131 additions & 1 deletion cmd/gpu_plugin/gpu_plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ import (
"flag"
"os"
"path"
"path/filepath"
"reflect"
"testing"

"github.com/pkg/errors"
"k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
"k8s.io/utils/strings/slices"

"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
Expand All @@ -43,6 +45,7 @@ type mockNotifier struct {
func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
n.monitorCount = len(newDeviceTree[monitorType])
n.devCount = len(newDeviceTree[deviceType])

n.scanDone <- true
}

Expand Down Expand Up @@ -190,7 +193,11 @@ func TestScan(t *testing.T) {
sysfsfiles: map[string][]byte{
"card0/device/vendor": []byte("0x8086"),
},
devfsdirs: []string{"card0"},
devfsdirs: []string{
"card0",
"by-path/pci-0000:00:00.0-card",
"by-path/pci-0000:00:00.0-render",
},
expectedDevs: 1,
},
{
Expand Down Expand Up @@ -314,3 +321,126 @@ func TestScan(t *testing.T) {
})
}
}

// Would be nice to combine these with the overall Scan unit tests.
func createBypathTestFiles(t *testing.T, card, root, linkFile string, bypathFiles []string) (string, string) {
drmPath := path.Join(root, "sys/class/drm/", card)
devPath := path.Join(root, "sys", linkFile)
byPath := path.Join(root, "by-path")

if linkFile != "" {
if err := os.MkdirAll(filepath.Dir(devPath), os.ModePerm); err != nil {
t.Fatal("Couldn't create test dev dir", err)
}

if err := os.MkdirAll(filepath.Dir(drmPath), os.ModePerm); err != nil {
t.Fatal("Couldn't create test drm dir", err)
}

if err := os.WriteFile(devPath, []byte{0}, os.ModePerm); err != nil {
t.Fatal("Couldn't create card file", err)
}

if err := os.Symlink(devPath, drmPath); err != nil {
t.Fatal("Couldn't create symlink between pci path and sysfs drm path")
}
}

if len(bypathFiles) > 0 {
if err := os.MkdirAll(byPath, os.ModePerm); err != nil {
t.Fatal("Mkdir failed:", byPath)
}

for _, f := range bypathFiles {
if err := os.WriteFile(path.Join(byPath, f), []byte{1}, os.ModePerm); err != nil {
t.Fatal("WriteFile failed:", path.Join(byPath, f))
}
}
}

return drmPath, byPath
}

func TestBypath(t *testing.T) {
type testData struct {
desc string
linkpath string
bypathFiles []string
mountCount int
}

const cardName string = "card0"

tds := []testData{
{
"card with two by-path files",
"00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName,
[]string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"},
2,
},
{
"different by-path files",
"00.10.2/00.334.302/0.0.1.00/0000:ff:05.0/drm/" + cardName,
[]string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"},
0,
},
{
"invalid pci address",
"00.10.2/00.334.302/0.0.1.00/000:ff:05.1/drm/" + cardName,
[]string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"},
0,
},
{
"symlink without card",
"00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm",
[]string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"},
0,
},
{
"no symlink",
"",
[]string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"},
0,
},
{
"no by-path files",
"00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName,
[]string{},
0,
},
}

for _, td := range tds {
root, err := os.MkdirTemp("", "test_bypath_mounting")
if err != nil {
t.Fatalf("can't create temporary directory: %+v", err)
}
// dirs/files need to be removed for the next test
defer os.RemoveAll(root)

plugin := newDevicePlugin("/", "/", cliOptions{})

drmPath, byPath := createBypathTestFiles(t, cardName, root, td.linkpath, td.bypathFiles)

mounts := plugin.bypathMountsForPci(drmPath, cardName, byPath)

if len(mounts) != td.mountCount {
t.Errorf("%s: Wrong number of mounts %d vs. %d", td.desc, len(mounts), td.mountCount)
}

absPaths := []string{}
for _, link := range td.bypathFiles {
absPaths = append(absPaths, path.Join(byPath, link))
}

for _, mount := range mounts {
if !slices.Contains(absPaths, mount.ContainerPath) {
t.Errorf("%s: containerpath is incorrect: %s", td.desc, mount.ContainerPath)
}

if !slices.Contains(absPaths, mount.HostPath) {
t.Errorf("%s: hostpath is incorrect: %s", td.desc, mount.HostPath)
}
}
}
}
Loading

0 comments on commit c17149f

Please sign in to comment.