diff --git a/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py b/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py index 6e67be5d57b..2bd2d484673 100644 --- a/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py +++ b/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py @@ -1,5 +1,6 @@ import pytest import subprocess +import os from pathlib import Path @@ -63,14 +64,21 @@ def test_readonly_volume(mount_path, access_type, vm_mode, gid): def test_mount_volume_group(): # Scenario - # 1. create volume and publish volume without mount volume group - # 2. create directory and file - # 3. unpublish volume + # 1. create volume and stage it + # 2. create directory and file in the staging directory # 4. create new group with specified GID # 5. publish volume with mount volume group GID # 6. check that mounted dir and existing files have specified GID # 7. create new directory and file # 8. check that new directory and file have specified GID + # 9. unpublish volume + # 10. create new file in staging directory and change ownership + # 11. publish volume with mount volume group GID + # 12. Verify that the new file doesn't have the specified GID. + # The change won't take effect because the GID of the mount directory + # matches the GID of the volume group. + + stage_path = Path("/var/lib/kubelet/plugins/kubernetes.io/csi/nbs.csi.nebius.ai/a/globalmount") env, run = csi.init() try: volume_name = "example-disk" @@ -81,6 +89,11 @@ def test_mount_volume_group(): env.csi.create_volume(name=volume_name, size=volume_size) env.csi.stage_volume(volume_name, access_type) + stage_test_dir1 = stage_path / "testdir1" + stage_test_dir1.mkdir() + stage_test_file1 = stage_test_dir1 / "testfile1" + stage_test_file1.touch() + gid = 1013 result = subprocess.run( ["groupadd", "-g", str(gid), "test_group_" + str(gid)], @@ -92,23 +105,13 @@ def test_mount_volume_group(): pod_id, volume_name, pod_name, - access_type + access_type, + volume_mount_group=str(gid) ) mount_path = Path("/var/lib/kubelet/pods") / pod_id / "volumes/kubernetes.io~csi" / volume_name / "mount" test_dir1 = mount_path / "testdir1" - test_dir1.mkdir() test_file1 = test_dir1 / "testfile1" - test_file1.touch() - - env.csi.unpublish_volume(pod_id, volume_name, access_type) - env.csi.publish_volume( - pod_id, - volume_name, - pod_name, - access_type, - volume_mount_group=str(gid) - ) assert gid == mount_path.stat().st_gid assert gid == test_dir1.stat().st_gid @@ -122,6 +125,24 @@ def test_mount_volume_group(): test_dir2.mkdir() assert gid == test_dir2.stat().st_gid + env.csi.unpublish_volume(pod_id, volume_name, access_type) + + stage_test_file3 = stage_test_dir1 / "testfile3" + stage_test_file3.touch() + os.chown(stage_test_file3, os.getuid(), os.getgid()) + assert gid != stage_test_file3.stat().st_gid + + env.csi.publish_volume( + pod_id, + volume_name, + pod_name, + access_type, + volume_mount_group=str(gid) + ) + + test_file3 = test_dir1 / "testfile3" + assert gid != test_file3.stat().st_gid + except subprocess.CalledProcessError as e: csi.log_called_process_error(e) raise diff --git a/cloud/blockstore/tools/csi_driver/internal/driver/node.go b/cloud/blockstore/tools/csi_driver/internal/driver/node.go index 44c0572104b..8a72402a645 100644 --- a/cloud/blockstore/tools/csi_driver/internal/driver/node.go +++ b/cloud/blockstore/tools/csi_driver/internal/driver/node.go @@ -11,9 +11,9 @@ import ( "log" "math" "os" - "os/exec" "path/filepath" "regexp" + "strconv" "strings" "sync" @@ -21,6 +21,7 @@ import ( nbsapi "github.com/ydb-platform/nbs/cloud/blockstore/public/api/protos" nbsclient "github.com/ydb-platform/nbs/cloud/blockstore/public/sdk/go/client" "github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/mounter" + "github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/volume" nfsapi "github.com/ydb-platform/nbs/cloud/filestore/public/api/protos" nfsclient "github.com/ydb-platform/nbs/cloud/filestore/public/sdk/go/client" "golang.org/x/sys/unix" @@ -658,11 +659,15 @@ func (s *nodeService) nodePublishDiskAsFilesystem( return err } - if mnt != nil && mnt.VolumeMountGroup != "" && !req.Readonly { - cmd := exec.Command("chown", "-R", ":"+mnt.VolumeMountGroup, req.TargetPath) - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s to %q: %w, output %q", - mnt.VolumeMountGroup, req.TargetPath, err, out) + if mnt != nil && mnt.VolumeMountGroup != "" { + fsGroup, err := strconv.ParseInt(mnt.VolumeMountGroup, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse volume mount group: %w", err) + } + + err = volume.SetVolumeOwnership(req.TargetPath, &fsGroup, req.Readonly) + if err != nil { + return fmt.Errorf("failed to set volume ownership: %w", err) } } @@ -741,14 +746,6 @@ func (s *nodeService) nodeStageDiskAsFilesystem( return fmt.Errorf("failed to format or mount filesystem: %w", err) } - if mnt != nil && mnt.VolumeMountGroup != "" { - cmd := exec.Command("chown", "-R", ":"+mnt.VolumeMountGroup, req.StagingTargetPath) - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s to %q: %w, output %q", - mnt.VolumeMountGroup, req.StagingTargetPath, err, out) - } - } - if err := os.Chmod(req.StagingTargetPath, targetPerm); err != nil { return fmt.Errorf("failed to chmod target path: %w", err) } diff --git a/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go b/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go new file mode 100644 index 00000000000..ec7f6da4bfe --- /dev/null +++ b/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go @@ -0,0 +1,210 @@ +//go:build linux +// +build linux + +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volume + +import ( + "path/filepath" + "syscall" + + "os" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/volume/util/types" +) + +const ( + rwMask = os.FileMode(0660) + roMask = os.FileMode(0440) + execMask = os.FileMode(0110) +) + +// SetVolumeOwnership modifies the given volume to be owned by +// fsGroup, and sets SetGid so that newly created files are owned by +// fsGroup. If fsGroup is nil nothing is done. +func SetVolumeOwnership(mounter Mounter, dir string, fsGroup *int64, fsGroupChangePolicy *v1.PodFSGroupChangePolicy, completeFunc func(types.CompleteFuncParam)) error { + if fsGroup == nil { + return nil + } + + timer := time.AfterFunc(30*time.Second, func() { + klog.Warningf("Setting volume ownership for %s and fsGroup set. If the volume has a lot of files then setting volume ownership could be slow, see https://github.com/kubernetes/kubernetes/issues/69699", dir) + }) + defer timer.Stop() + + if skipPermissionChange(mounter, dir, fsGroup, fsGroupChangePolicy) { + klog.V(3).InfoS("Skipping permission and ownership change for volume", "path", dir) + return nil + } + + err := walkDeep(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + return changeFilePermission(path, fsGroup, mounter.GetAttributes().ReadOnly, info) + }) + if completeFunc != nil { + completeFunc(types.CompleteFuncParam{ + Err: &err, + }) + } + return err +} + +func changeFilePermission(filename string, fsGroup *int64, readonly bool, info os.FileInfo) error { + err := os.Lchown(filename, -1, int(*fsGroup)) + if err != nil { + klog.ErrorS(err, "Lchown failed", "path", filename) + } + + // chmod passes through to the underlying file for symlinks. + // Symlinks have a mode of 777 but this really doesn't mean anything. + // The permissions of the underlying file are what matter. + // However, if one reads the mode of a symlink then chmods the symlink + // with that mode, it changes the mode of the underlying file, overridden + // the defaultMode and permissions initialized by the volume plugin, which + // is not what we want; thus, we skip chmod for symlinks. + if info.Mode()&os.ModeSymlink != 0 { + return nil + } + + mask := rwMask + if readonly { + mask = roMask + } + + if info.IsDir() { + mask |= os.ModeSetgid + mask |= execMask + } + + err = os.Chmod(filename, info.Mode()|mask) + if err != nil { + klog.ErrorS(err, "chmod failed", "path", filename) + } + + return nil +} + +func skipPermissionChange(mounter Mounter, dir string, fsGroup *int64, fsGroupChangePolicy *v1.PodFSGroupChangePolicy) bool { + if fsGroupChangePolicy == nil || *fsGroupChangePolicy != v1.FSGroupChangeOnRootMismatch { + klog.V(4).InfoS("Perform recursive ownership change for directory", "path", dir) + return false + } + return !requiresPermissionChange(dir, fsGroup, mounter.GetAttributes().ReadOnly) +} + +func requiresPermissionChange(rootDir string, fsGroup *int64, readonly bool) bool { + fsInfo, err := os.Stat(rootDir) + if err != nil { + klog.ErrorS(err, "Performing recursive ownership change on rootDir because reading permissions of root volume failed", "path", rootDir) + return true + } + stat, ok := fsInfo.Sys().(*syscall.Stat_t) + if !ok || stat == nil { + klog.ErrorS(nil, "Performing recursive ownership change on rootDir because reading permissions of root volume failed", "path", rootDir) + return true + } + + if int(stat.Gid) != int(*fsGroup) { + klog.V(4).InfoS("Expected group ownership of volume did not match with Gid", "path", rootDir, "GID", stat.Gid) + return true + } + unixPerms := rwMask + + if readonly { + unixPerms = roMask + } + + // if rootDir is not a directory then we should apply permission change anyways + if !fsInfo.IsDir() { + return true + } + unixPerms |= execMask + filePerm := fsInfo.Mode().Perm() + + // We need to check if actual permissions of root directory is a superset of permissions required by unixPerms. + // This is done by checking if permission bits expected in unixPerms is set in actual permissions of the directory. + // We use bitwise AND operation to check set bits. For example: + // unixPerms: 770, filePerms: 775 : 770&775 = 770 (perms on directory is a superset) + // unixPerms: 770, filePerms: 770 : 770&770 = 770 (perms on directory is a superset) + // unixPerms: 770, filePerms: 750 : 770&750 = 750 (perms on directory is NOT a superset) + // We also need to check if setgid bits are set in permissions of the directory. + if (unixPerms&filePerm != unixPerms) || (fsInfo.Mode()&os.ModeSetgid == 0) { + klog.V(4).InfoS("Performing recursive ownership change on rootDir because of mismatching mode", "path", rootDir) + return true + } + return false +} + +// readDirNames reads the directory named by dirname and returns +// a list of directory entries. +// We are not using filepath.readDirNames because we do not want to sort files found in a directory before changing +// permissions for performance reasons. +func readDirNames(dirname string) ([]string, error) { + f, err := os.Open(dirname) + if err != nil { + return nil, err + } + names, err := f.Readdirnames(-1) + f.Close() + if err != nil { + return nil, err + } + return names, nil +} + +// walkDeep can be used to traverse directories and has two minor differences +// from filepath.Walk: +// - List of files/dirs is not sorted for performance reasons +// - callback walkFunc is invoked on root directory after visiting children dirs and files +func walkDeep(root string, walkFunc filepath.WalkFunc) error { + info, err := os.Lstat(root) + if err != nil { + return walkFunc(root, nil, err) + } + return walk(root, info, walkFunc) +} + +func walk(path string, info os.FileInfo, walkFunc filepath.WalkFunc) error { + if !info.IsDir() { + return walkFunc(path, info, nil) + } + names, err := readDirNames(path) + if err != nil { + return err + } + for _, name := range names { + filename := filepath.Join(path, name) + fileInfo, err := os.Lstat(filename) + if err != nil { + if err := walkFunc(filename, fileInfo, err); err != nil { + return err + } + } else { + err = walk(filename, fileInfo, walkFunc) + if err != nil { + return err + } + } + } + return walkFunc(path, info, nil) +} diff --git a/cloud/blockstore/tools/csi_driver/internal/volume/ya.make b/cloud/blockstore/tools/csi_driver/internal/volume/ya.make new file mode 100644 index 00000000000..f2df5539b7e --- /dev/null +++ b/cloud/blockstore/tools/csi_driver/internal/volume/ya.make @@ -0,0 +1,7 @@ +GO_LIBRARY() + +SRCS( + volume_linux.go +) + +END() diff --git a/cloud/blockstore/tools/csi_driver/internal/ya.make b/cloud/blockstore/tools/csi_driver/internal/ya.make index 5c9c1c7aaf1..6ba42a1a0f9 100644 --- a/cloud/blockstore/tools/csi_driver/internal/ya.make +++ b/cloud/blockstore/tools/csi_driver/internal/ya.make @@ -1,4 +1,5 @@ RECURSE( driver mounter + volume )