From 5a0fd5b8829657f487db594d90c5b31841c78a66 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 12 Nov 2024 19:48:40 +0400 Subject: [PATCH] refactor: move early initialization functions to pre-initialize phase Fixes #8900 Closes #9687 (contains splitting of late mounts) The benefits: * tasks run _before_ controllers are started * tasks can register `defer` to undo actions This decomposes sequencer tasks a bit. Signed-off-by: Andrey Smirnov --- internal/app/machined/main.go | 43 ++- .../app/machined/pkg/runtime/controller.go | 2 + .../runtime/v1alpha1/v1alpha1_sequencer.go | 23 -- .../v1alpha1/v1alpha1_sequencer_tasks.go | 349 ------------------ .../runtime/v1alpha2/v1alpha2_controller.go | 11 +- internal/app/machined/pkg/startup/cgroups.go | 205 ++++++++++ internal/app/machined/pkg/startup/ima.go | 85 +++++ .../app/machined/pkg/startup/os_release.go | 60 +++ internal/app/machined/pkg/startup/startup.go | 51 +++ internal/app/machined/pkg/startup/tasks.go | 133 +++++++ internal/pkg/mount/v2/pseudo.go | 6 + 11 files changed, 579 insertions(+), 389 deletions(-) create mode 100644 internal/app/machined/pkg/startup/cgroups.go create mode 100644 internal/app/machined/pkg/startup/ima.go create mode 100644 internal/app/machined/pkg/startup/os_release.go create mode 100644 internal/app/machined/pkg/startup/startup.go create mode 100644 internal/app/machined/pkg/startup/tasks.go diff --git a/internal/app/machined/main.go b/internal/app/machined/main.go index fb2d01aada..7fef2f8241 100644 --- a/internal/app/machined/main.go +++ b/internal/app/machined/main.go @@ -23,6 +23,7 @@ import ( "github.com/siderolabs/go-cmd/pkg/cmd/proc/reaper" debug "github.com/siderolabs/go-debug" "github.com/siderolabs/go-procfs/procfs" + "go.uber.org/zap" "golang.org/x/sys/unix" "github.com/siderolabs/talos/internal/app/apid" @@ -30,6 +31,7 @@ import ( "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/internal/app/machined/pkg/runtime/emergency" v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1" + startuptasks "github.com/siderolabs/talos/internal/app/machined/pkg/startup" "github.com/siderolabs/talos/internal/app/machined/pkg/system" "github.com/siderolabs/talos/internal/app/machined/pkg/system/services" "github.com/siderolabs/talos/internal/app/maintenance" @@ -161,18 +163,10 @@ func runDebugServer(ctx context.Context) { } } -//nolint:gocyclo func run() error { - errCh := make(chan error) - // Limit GOMAXPROCS. startup.LimitMaxProcs(constants.MachinedMaxProcs) - // Set the PATH env var. - if err := os.Setenv("PATH", constants.PATH); err != nil { - return errors.New("error setting PATH") - } - // Initialize the controller without a config. c, err := v1alpha1runtime.NewController() if err != nil { @@ -181,10 +175,35 @@ func run() error { revertSetState(c.Runtime().State().V1Alpha2().Resources()) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + logger, err := c.V1Alpha2().MakeLogger("early-startup") + if err != nil { + return err + } + + start := time.Now() + + // Run startup tasks, and then run the entrypoint. + return startuptasks.RunTasks(ctx, logger, c.Runtime(), append( + startuptasks.DefaultTasks(), + func(ctx context.Context, log *zap.Logger, _ runtime.Runtime, _ startuptasks.NextTaskFunc) error { + logger.Info("early startup done", zap.Duration("duration", time.Since(start))) + + return runEntrypoint(ctx, c) + }, + )...) +} + +//nolint:gocyclo +func runEntrypoint(ctx context.Context, c *v1alpha1runtime.Controller) error { + errCh := make(chan error) + var controllerWaitGroup sync.WaitGroup defer controllerWaitGroup.Wait() // wait for controller-runtime to finish before rebooting - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(ctx) defer cancel() drainer := runtime.NewDrainer() @@ -237,7 +256,7 @@ func run() error { initializeCanceled := false // Initialize the machine. - if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil { + if err := c.Run(ctx, runtime.SequenceInitialize, nil); err != nil { if errors.Is(err, context.Canceled) { initializeCanceled = true } else { @@ -248,7 +267,7 @@ func run() error { // If Initialize sequence was canceled, don't run any other sequence. if !initializeCanceled { // Perform an installation if required. - if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil { + if err := c.Run(ctx, runtime.SequenceInstall, nil); err != nil { return err } @@ -258,7 +277,7 @@ func run() error { ) // Boot the machine. - if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) { + if err := c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) { return err } } diff --git a/internal/app/machined/pkg/runtime/controller.go b/internal/app/machined/pkg/runtime/controller.go index 4b527eadc6..cbbac96ac1 100644 --- a/internal/app/machined/pkg/runtime/controller.go +++ b/internal/app/machined/pkg/runtime/controller.go @@ -9,6 +9,7 @@ import ( "log" "github.com/cosi-project/runtime/pkg/controller" + "go.uber.org/zap" ) // TaskSetupFunc defines the function that a task will execute for a specific runtime @@ -61,4 +62,5 @@ type Controller interface { type V1Alpha2Controller interface { Run(context.Context, *Drainer) error DependencyGraph() (*controller.DependencyGraph, error) + MakeLogger(serviceName string) (*zap.Logger, error) } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go index 5f72fe5840..7057c83432 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go @@ -68,20 +68,9 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase { mode := r.State().Platform().Mode() phases := PhaseList{} - phases = phases.Append("logMode", LogMode) - switch mode { //nolint:exhaustive case runtime.ModeContainer: phases = phases.Append( - "systemRequirements", - SetupSystemDirectory, - InitVolumeLifecycle, - ).Append( - "etc", - CreateSystemCgroups, - CreateOSReleaseFile, - SetUserEnvVars, - ).Append( "machined", StartMachined, StartContainerd, @@ -93,18 +82,6 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase { phases = phases.Append( "systemRequirements", EnforceKSPPRequirements, - SetupSystemDirectory, - MountCgroups, - SetRLimit, - InitVolumeLifecycle, - ).Append( - "integrity", - WriteIMAPolicy, - ).Append( - "etc", - CreateSystemCgroups, - CreateOSReleaseFile, - SetUserEnvVars, ).Append( "earlyServices", StartUdevd, diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go index e3e0d706a8..e4473889d6 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go @@ -22,15 +22,11 @@ import ( "syscall" "time" - "github.com/containerd/cgroups/v3" - "github.com/containerd/cgroups/v3/cgroup1" - "github.com/containerd/cgroups/v3/cgroup2" "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" "github.com/dustin/go-humanize" "github.com/hashicorp/go-multierror" - "github.com/opencontainers/runtime-spec/specs-go" pprocfs "github.com/prometheus/procfs" "github.com/siderolabs/gen/maps" "github.com/siderolabs/gen/xslices" @@ -52,7 +48,6 @@ import ( "github.com/siderolabs/talos/internal/app/machined/pkg/system" "github.com/siderolabs/talos/internal/app/machined/pkg/system/events" "github.com/siderolabs/talos/internal/app/machined/pkg/system/services" - "github.com/siderolabs/talos/internal/pkg/cgroup" "github.com/siderolabs/talos/internal/pkg/cri" "github.com/siderolabs/talos/internal/pkg/environment" "github.com/siderolabs/talos/internal/pkg/etcd" @@ -79,7 +74,6 @@ import ( "github.com/siderolabs/talos/pkg/machinery/resources/k8s" resourceruntime "github.com/siderolabs/talos/pkg/machinery/resources/runtime" resourcev1alpha1 "github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1" - "github.com/siderolabs/talos/pkg/machinery/version" "github.com/siderolabs/talos/pkg/minimal" ) @@ -122,15 +116,6 @@ func WaitForUSB(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { }, "waitForUSB" } -// LogMode represents the LogMode task. -func LogMode(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { - logger.Printf("running in mode: %s", r.State().Platform().Mode()) - - return nil - }, "logMode" -} - // EnforceKSPPRequirements represents the EnforceKSPPRequirements task. func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { @@ -142,333 +127,6 @@ func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, }, "enforceKSPPRequirements" } -// SetupSystemDirectory represents the SetupSystemDirectory task. -func SetupSystemDirectory(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { - for _, p := range []string{constants.SystemEtcPath, constants.SystemVarPath, constants.StateMountPoint} { - if err = os.MkdirAll(p, 0o700); err != nil { - return err - } - } - - for _, p := range []string{constants.SystemRunPath} { - if err = os.MkdirAll(p, 0o751); err != nil { - return err - } - } - - return nil - }, "setupSystemDirectory" -} - -// CreateSystemCgroups represents the CreateSystemCgroups task. -// -//nolint:gocyclo -func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { - // in container mode cgroups mode depends on cgroups provided by the container runtime - if r.State().Platform().Mode() != runtime.ModeContainer { - // assert that cgroupsv2 is being used when running not in container mode, - // as Talos sets up cgroupsv2 on its own - if cgroups.Mode() != cgroups.Unified && !mountv2.ForceGGroupsV1() { - return errors.New("cgroupsv2 should be used") - } - } - - // Initialize cgroups root path. - if err = cgroup.InitRoot(); err != nil { - return fmt.Errorf("error initializing cgroups root path: %w", err) - } - - logger.Printf("using cgroups root: %s", cgroup.Root()) - - groups := []struct { - name string - resources *cgroup2.Resources - }{ - { - name: constants.CgroupInit, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupInitReservedMemory), - Low: pointer.To[int64](constants.CgroupInitReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupInitMillicores))), - }, - }, - }, - { - name: constants.CgroupSystem, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupSystemReservedMemory), - Low: pointer.To[int64](constants.CgroupSystemReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemMillicores))), - }, - }, - }, - { - name: constants.CgroupSystemRuntime, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory), - Low: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemRuntimeMillicores))), - }, - }, - }, - { - name: constants.CgroupUdevd, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupUdevdReservedMemory), - Low: pointer.To[int64](constants.CgroupUdevdReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupUdevdMillicores))), - }, - }, - }, - { - name: constants.CgroupPodRuntimeRoot, - resources: &cgroup2.Resources{ - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeRootMillicores))), - }, - }, - }, - { - name: constants.CgroupPodRuntime, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory), - Low: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeMillicores))), - }, - }, - }, - { - name: constants.CgroupKubelet, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupKubeletReservedMemory), - Low: pointer.To[int64](constants.CgroupKubeletReservedMemory * 2), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupKubeletMillicores))), - }, - }, - }, - { - name: constants.CgroupDashboard, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Max: pointer.To[int64](constants.CgroupDashboardMaxMemory), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))), - }, - }, - }, - { - name: constants.CgroupApid, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupApidReservedMemory), - Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2), - Max: pointer.To[int64](constants.CgroupApidMaxMemory), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))), - }, - }, - }, - { - name: constants.CgroupTrustd, - resources: &cgroup2.Resources{ - Memory: &cgroup2.Memory{ - Min: pointer.To[int64](constants.CgroupTrustdReservedMemory), - Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2), - Max: pointer.To[int64](constants.CgroupTrustdMaxMemory), - }, - CPU: &cgroup2.CPU{ - Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))), - }, - }, - }, - } - - for _, c := range groups { - if cgroups.Mode() == cgroups.Unified { - resources := c.resources - - if r.State().Platform().Mode() == runtime.ModeContainer { - // don't attempt to set resources in container mode, as they might conflict with the parent cgroup tree - resources = &cgroup2.Resources{} - } - - cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(c.name), resources) - if err != nil { - return fmt.Errorf("failed to create cgroup: %w", err) - } - - if c.name == constants.CgroupInit { - if err := cg.AddProc(uint64(os.Getpid())); err != nil { - return fmt.Errorf("failed to move init process to cgroup: %w", err) - } - } - } else { - cg, err := cgroup1.New(cgroup1.StaticPath(c.name), &specs.LinuxResources{}) - if err != nil { - return fmt.Errorf("failed to create cgroup: %w", err) - } - - if c.name == constants.CgroupInit { - if err := cg.Add(cgroup1.Process{ - Pid: os.Getpid(), - }); err != nil { - return fmt.Errorf("failed to move init process to cgroup: %w", err) - } - } - } - } - - return nil - }, "CreateSystemCgroups" -} - -// MountCgroups represents the MountCgroups task. -func MountCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { - _, err := mountv2.CGroupMountPoints().Mount() - - return err - }, "mountCgroups" -} - -// SetRLimit represents the SetRLimit task. -func SetRLimit(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { - // TODO(andrewrynhard): Should we read limit from /proc/sys/fs/nr_open? - return unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}) - }, "setRLimit" -} - -// See https://www.kernel.org/doc/Documentation/ABI/testing/ima_policy -var rules = []string{ - "dont_measure fsmagic=0x9fa0", // PROC_SUPER_MAGIC - "dont_measure fsmagic=0x62656572", // SYSFS_MAGIC - "dont_measure fsmagic=0x64626720", // DEBUGFS_MAGIC - "dont_measure fsmagic=0x1021994", // TMPFS_MAGIC - "dont_measure fsmagic=0x1cd1", // DEVPTS_SUPER_MAGIC - "dont_measure fsmagic=0x42494e4d", // BINFMTFS_MAGIC - "dont_measure fsmagic=0x73636673", // SECURITYFS_MAGIC - "dont_measure fsmagic=0xf97cff8c", // SELINUX_MAGIC - "dont_measure fsmagic=0x43415d53", // SMACK_MAGIC - "dont_measure fsmagic=0x27e0eb", // CGROUP_SUPER_MAGIC - "dont_measure fsmagic=0x63677270", // CGROUP2_SUPER_MAGIC - "dont_measure fsmagic=0x6e736673", // NSFS_MAGIC - "dont_measure fsmagic=0xde5e81e4", // EFIVARFS_MAGIC - "dont_measure fsmagic=0x58465342", // XFS_MAGIC - "dont_measure fsmagic=0x794c7630", // OVERLAYFS_SUPER_MAGIC - "dont_measure fsmagic=0x9123683e", // BTRFS_SUPER_MAGIC - "dont_measure fsmagic=0x72b6", // JFFS2_SUPER_MAGIC - "dont_measure fsmagic=0x4d44", // MSDOS_SUPER_MAGIC - "dont_measure fsmagic=0x2011bab0", // EXFAT_SUPER_MAGIC - "dont_measure fsmagic=0x6969", // NFS_SUPER_MAGIC - "dont_measure fsmagic=0x5346544e", // NTFS_SB_MAGIC - "dont_measure fsmagic=0x9660", // ISOFS_SUPER_MAGIC - "dont_measure fsmagic=0x15013346", // UDF_SUPER_MAGIC - "dont_measure fsmagic=0x52654973", // REISERFS_SUPER_MAGIC - "dont_measure fsmagic=0x137d", // EXT_SUPER_MAGIC - "dont_measure fsmagic=0xef51", // EXT2_OLD_SUPER_MAGIC - "dont_measure fsmagic=0xef53", // EXT2_SUPER_MAGIC / EXT3_SUPER_MAGIC / EXT4_SUPER_MAGIC - "dont_measure fsmagic=0x00c36400", // CEPH_SUPER_MAGIC - "dont_measure fsmagic=0x65735543", // FUSE_CTL_SUPER_MAGIC - "measure func=MMAP_CHECK mask=MAY_EXEC", - "measure func=BPRM_CHECK mask=MAY_EXEC", - "measure func=FILE_CHECK mask=^MAY_READ euid=0", - "measure func=FILE_CHECK mask=^MAY_READ uid=0", - "measure func=MODULE_CHECK", - "measure func=FIRMWARE_CHECK", - "measure func=POLICY_CHECK", -} - -// WriteIMAPolicy represents the WriteIMAPolicy task. -func WriteIMAPolicy(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { - if _, err = os.Stat("/sys/kernel/security/ima/policy"); os.IsNotExist(err) { - return fmt.Errorf("policy file does not exist: %w", err) - } - - f, err := os.OpenFile("/sys/kernel/security/ima/policy", os.O_APPEND|os.O_WRONLY, 0o644) - if err != nil { - return err - } - - defer f.Close() //nolint:errcheck - - for _, line := range rules { - if _, err = f.WriteString(line + "\n"); err != nil { - return fmt.Errorf("rule %q is invalid", err) - } - } - - return nil - }, "writeIMAPolicy" -} - -// OSRelease renders a valid /etc/os-release file and writes it to disk. The -// node's OS Image field is reported by the node from /etc/os-release. -func OSRelease() (err error) { - if err = createBindMount(filepath.Join(constants.SystemEtcPath, "os-release"), "/etc/os-release"); err != nil { - return err - } - - contents, err := version.OSRelease() - if err != nil { - return err - } - - return os.WriteFile(filepath.Join(constants.SystemEtcPath, "os-release"), contents, 0o644) -} - -// createBindMount creates a common way to create a writable source file with a -// bind mounted destination. This is most commonly used for well known files -// under /etc that need to be adjusted during startup. -func createBindMount(src, dst string) (err error) { - var f *os.File - - if f, err = os.OpenFile(src, os.O_WRONLY|os.O_CREATE, 0o644); err != nil { - return err - } - - if err = f.Close(); err != nil { - return err - } - - if err = unix.Mount(src, dst, "", unix.MS_BIND, ""); err != nil { - return fmt.Errorf("failed to create bind mount for %s: %w", dst, err) - } - - return nil -} - -// CreateOSReleaseFile represents the CreateOSReleaseFile task. -func CreateOSReleaseFile(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { - // Create /etc/os-release. - return OSRelease() - }, "createOSReleaseFile" -} - // LoadConfig represents the LoadConfig task. func LoadConfig(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { @@ -2428,13 +2086,6 @@ func WaitForCARoots(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { }, "waitForCARoots" } -// InitVolumeLifecycle initializes volume lifecycle resource. -func InitVolumeLifecycle(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { - return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { - return r.State().V1Alpha2().Resources().Create(ctx, blockres.NewVolumeLifecycle(blockres.NamespaceName, blockres.VolumeLifecycleID)) - }, "initVolumeLifecycle" -} - // TeardownVolumeLifecycle tears down volume lifecycle resource. func TeardownVolumeLifecycle(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { diff --git a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go index aacc2b9bef..918bf40d4a 100644 --- a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go +++ b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go @@ -67,7 +67,7 @@ func NewController(v1alpha1Runtime runtime.Runtime) (*Controller, error) { var err error - ctrl.logger, err = ctrl.makeLogger("controller-runtime") + ctrl.logger, err = ctrl.MakeLogger("controller-runtime") if err != nil { return nil, err } @@ -82,7 +82,7 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error // adjust the log level based on machine configuration go ctrl.watchMachineConfig(ctx) - dnsCacheLogger, err := ctrl.makeLogger("dns-resolve-cache") + dnsCacheLogger, err := ctrl.MakeLogger("dns-resolve-cache") if err != nil { return err } @@ -520,8 +520,9 @@ func (ctrl *Controller) updateLoggingConfig(ctx context.Context, dests []talosco wg.Wait() } -func (ctrl *Controller) makeLogger(s string) (*zap.Logger, error) { - logWriter, err := ctrl.loggingManager.ServiceLog(s).Writer() +// MakeLogger creates a logger for a service. +func (ctrl *Controller) MakeLogger(serviceName string) (*zap.Logger, error) { + logWriter, err := ctrl.loggingManager.ServiceLog(serviceName).Writer() if err != nil { return nil, err } @@ -535,5 +536,5 @@ func (ctrl *Controller) makeLogger(s string) (*zap.Logger, error) { logging.WithoutLogLevels(), logging.WithControllerErrorSuppressor(constants.ConsoleLogErrorSuppressThreshold), ), - ).With(logging.Component(s)), nil + ).With(logging.Component(serviceName)), nil } diff --git a/internal/app/machined/pkg/startup/cgroups.go b/internal/app/machined/pkg/startup/cgroups.go new file mode 100644 index 0000000000..abf6951b4a --- /dev/null +++ b/internal/app/machined/pkg/startup/cgroups.go @@ -0,0 +1,205 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package startup + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/containerd/cgroups/v3" + "github.com/containerd/cgroups/v3/cgroup1" + "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/siderolabs/go-pointer" + "go.uber.org/zap" + + "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" + "github.com/siderolabs/talos/internal/pkg/cgroup" + "github.com/siderolabs/talos/internal/pkg/mount/v2" + "github.com/siderolabs/talos/pkg/machinery/constants" +) + +// CreateSystemCgroups creates system cgroups. +// +//nolint:gocyclo +func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + // in container mode cgroups mode depends on cgroups provided by the container runtime + if !rt.State().Platform().Mode().InContainer() { + // assert that cgroupsv2 is being used when running not in container mode, + // as Talos sets up cgroupsv2 on its own + if cgroups.Mode() != cgroups.Unified && !mount.ForceGGroupsV1() { + return errors.New("cgroupsv2 should be used") + } + } + + // Initialize cgroups root path. + if err := cgroup.InitRoot(); err != nil { + return fmt.Errorf("error initializing cgroups root path: %w", err) + } + + log.Info("initializing cgroups", zap.String("root", cgroup.Root())) + + groups := []struct { + name string + resources *cgroup2.Resources + }{ + { + name: constants.CgroupInit, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupInitReservedMemory), + Low: pointer.To[int64](constants.CgroupInitReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupInitMillicores))), + }, + }, + }, + { + name: constants.CgroupSystem, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupSystemReservedMemory), + Low: pointer.To[int64](constants.CgroupSystemReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemMillicores))), + }, + }, + }, + { + name: constants.CgroupSystemRuntime, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory), + Low: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemRuntimeMillicores))), + }, + }, + }, + { + name: constants.CgroupUdevd, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupUdevdReservedMemory), + Low: pointer.To[int64](constants.CgroupUdevdReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupUdevdMillicores))), + }, + }, + }, + { + name: constants.CgroupPodRuntimeRoot, + resources: &cgroup2.Resources{ + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeRootMillicores))), + }, + }, + }, + { + name: constants.CgroupPodRuntime, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory), + Low: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeMillicores))), + }, + }, + }, + { + name: constants.CgroupKubelet, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupKubeletReservedMemory), + Low: pointer.To[int64](constants.CgroupKubeletReservedMemory * 2), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupKubeletMillicores))), + }, + }, + }, + { + name: constants.CgroupDashboard, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Max: pointer.To[int64](constants.CgroupDashboardMaxMemory), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))), + }, + }, + }, + { + name: constants.CgroupApid, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupApidReservedMemory), + Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2), + Max: pointer.To[int64](constants.CgroupApidMaxMemory), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))), + }, + }, + }, + { + name: constants.CgroupTrustd, + resources: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Min: pointer.To[int64](constants.CgroupTrustdReservedMemory), + Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2), + Max: pointer.To[int64](constants.CgroupTrustdMaxMemory), + }, + CPU: &cgroup2.CPU{ + Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))), + }, + }, + }, + } + + for _, c := range groups { + if cgroups.Mode() == cgroups.Unified { + resources := c.resources + + if rt.State().Platform().Mode().InContainer() { + // don't attempt to set resources in container mode, as they might conflict with the parent cgroup tree + resources = &cgroup2.Resources{} + } + + cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(c.name), resources) + if err != nil { + return fmt.Errorf("failed to create cgroup: %w", err) + } + + if c.name == constants.CgroupInit { + if err := cg.AddProc(uint64(os.Getpid())); err != nil { + return fmt.Errorf("failed to move init process to cgroup: %w", err) + } + } + } else { + cg, err := cgroup1.New(cgroup1.StaticPath(c.name), &specs.LinuxResources{}) + if err != nil { + return fmt.Errorf("failed to create cgroup: %w", err) + } + + if c.name == constants.CgroupInit { + if err := cg.Add(cgroup1.Process{ + Pid: os.Getpid(), + }); err != nil { + return fmt.Errorf("failed to move init process to cgroup: %w", err) + } + } + } + } + + return next()(ctx, log, rt, next) +} diff --git a/internal/app/machined/pkg/startup/ima.go b/internal/app/machined/pkg/startup/ima.go new file mode 100644 index 0000000000..83ee18eddb --- /dev/null +++ b/internal/app/machined/pkg/startup/ima.go @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package startup + +import ( + "context" + "fmt" + "os" + + "go.uber.org/zap" + + "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" +) + +// See https://www.kernel.org/doc/Documentation/ABI/testing/ima_policy +var rules = []string{ + "dont_measure fsmagic=0x9fa0", // PROC_SUPER_MAGIC + "dont_measure fsmagic=0x62656572", // SYSFS_MAGIC + "dont_measure fsmagic=0x64626720", // DEBUGFS_MAGIC + "dont_measure fsmagic=0x1021994", // TMPFS_MAGIC + "dont_measure fsmagic=0x1cd1", // DEVPTS_SUPER_MAGIC + "dont_measure fsmagic=0x42494e4d", // BINFMTFS_MAGIC + "dont_measure fsmagic=0x73636673", // SECURITYFS_MAGIC + "dont_measure fsmagic=0xf97cff8c", // SELINUX_MAGIC + "dont_measure fsmagic=0x43415d53", // SMACK_MAGIC + "dont_measure fsmagic=0x27e0eb", // CGROUP_SUPER_MAGIC + "dont_measure fsmagic=0x63677270", // CGROUP2_SUPER_MAGIC + "dont_measure fsmagic=0x6e736673", // NSFS_MAGIC + "dont_measure fsmagic=0xde5e81e4", // EFIVARFS_MAGIC + "dont_measure fsmagic=0x58465342", // XFS_MAGIC + "dont_measure fsmagic=0x794c7630", // OVERLAYFS_SUPER_MAGIC + "dont_measure fsmagic=0x9123683e", // BTRFS_SUPER_MAGIC + "dont_measure fsmagic=0x72b6", // JFFS2_SUPER_MAGIC + "dont_measure fsmagic=0x4d44", // MSDOS_SUPER_MAGIC + "dont_measure fsmagic=0x2011bab0", // EXFAT_SUPER_MAGIC + "dont_measure fsmagic=0x6969", // NFS_SUPER_MAGIC + "dont_measure fsmagic=0x5346544e", // NTFS_SB_MAGIC + "dont_measure fsmagic=0x9660", // ISOFS_SUPER_MAGIC + "dont_measure fsmagic=0x15013346", // UDF_SUPER_MAGIC + "dont_measure fsmagic=0x52654973", // REISERFS_SUPER_MAGIC + "dont_measure fsmagic=0x137d", // EXT_SUPER_MAGIC + "dont_measure fsmagic=0xef51", // EXT2_OLD_SUPER_MAGIC + "dont_measure fsmagic=0xef53", // EXT2_SUPER_MAGIC / EXT3_SUPER_MAGIC / EXT4_SUPER_MAGIC + "dont_measure fsmagic=0x00c36400", // CEPH_SUPER_MAGIC + "dont_measure fsmagic=0x65735543", // FUSE_CTL_SUPER_MAGIC + "measure func=MMAP_CHECK mask=MAY_EXEC", + "measure func=BPRM_CHECK mask=MAY_EXEC", + "measure func=FILE_CHECK mask=^MAY_READ euid=0", + "measure func=FILE_CHECK mask=^MAY_READ uid=0", + "measure func=MODULE_CHECK", + "measure func=FIRMWARE_CHECK", + "measure func=POLICY_CHECK", +} + +// WriteIMAPolicy represents the WriteIMAPolicy task. +func WriteIMAPolicy(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if rt.State().Platform().Mode().InContainer() { + return next()(ctx, log, rt, next) + } + + if _, err := os.Stat("/sys/kernel/security/ima/policy"); os.IsNotExist(err) { + return fmt.Errorf("policy file does not exist: %w", err) + } + + f, err := os.OpenFile("/sys/kernel/security/ima/policy", os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + return err + } + + defer f.Close() //nolint:errcheck + + for _, line := range rules { + if _, err = f.WriteString(line + "\n"); err != nil { + return fmt.Errorf("rule %q is invalid", err) + } + } + + if err = f.Close(); err != nil { + return fmt.Errorf("failed to close policy file: %w", err) + } + + return next()(ctx, log, rt, next) +} diff --git a/internal/app/machined/pkg/startup/os_release.go b/internal/app/machined/pkg/startup/os_release.go new file mode 100644 index 0000000000..2f44189a6a --- /dev/null +++ b/internal/app/machined/pkg/startup/os_release.go @@ -0,0 +1,60 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package startup + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "go.uber.org/zap" + "golang.org/x/sys/unix" + + "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" + "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/version" +) + +// OSRelease renders a valid /etc/os-release file and writes it to disk. +// +// The node's OS Image field is reported by the node from /etc/os-release. +func OSRelease(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if err := createBindMount(filepath.Join(constants.SystemEtcPath, "os-release"), "/etc/os-release"); err != nil { + return err + } + + contents, err := version.OSRelease() + if err != nil { + return err + } + + if err = os.WriteFile(filepath.Join(constants.SystemEtcPath, "os-release"), contents, 0o644); err != nil { + return fmt.Errorf("failed to write os-release: %w", err) + } + + return next()(ctx, log, rt, next) +} + +// createBindMount creates a common way to create a writable source file with a +// bind mounted destination. This is most commonly used for well known files +// under /etc that need to be adjusted during startup. +func createBindMount(src, dst string) (err error) { + var f *os.File + + if f, err = os.OpenFile(src, os.O_WRONLY|os.O_CREATE, 0o644); err != nil { + return err + } + + if err = f.Close(); err != nil { + return err + } + + if err = unix.Mount(src, dst, "", unix.MS_BIND, ""); err != nil { + return fmt.Errorf("failed to create bind mount for %s: %w", dst, err) + } + + return nil +} diff --git a/internal/app/machined/pkg/startup/startup.go b/internal/app/machined/pkg/startup/startup.go new file mode 100644 index 0000000000..2ebd45ad84 --- /dev/null +++ b/internal/app/machined/pkg/startup/startup.go @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Package startup provides machined startup tasks. +package startup + +import ( + "context" + + "go.uber.org/zap" + + "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" +) + +// Task is a function that performs a startup task. +// +// It is supposed to call the next task in the chain. +type Task func(context.Context, *zap.Logger, runtime.Runtime, NextTaskFunc) error + +// NextTaskFunc is a function which returns the next task in the chain. +type NextTaskFunc func() Task + +// RunTasks runs the given tasks in order. +func RunTasks(ctx context.Context, log *zap.Logger, rt runtime.Runtime, tasks ...Task) error { + var idx int + + nextTaskFunc := func() Task { + idx++ + + return tasks[idx] + } + + return tasks[0](ctx, log, rt, nextTaskFunc) +} + +// DefaultTasks returns the default startup tasks. +func DefaultTasks() []Task { + return []Task{ + LogMode, + MountPseudoLate, + SetupSystemDirectories, + InitVolumeLifecycle, + MountCgroups, + SetRLimit, + SetEnvironmentVariables, + WriteIMAPolicy, + CreateSystemCgroups, + OSRelease, + } +} diff --git a/internal/app/machined/pkg/startup/tasks.go b/internal/app/machined/pkg/startup/tasks.go new file mode 100644 index 0000000000..2047344268 --- /dev/null +++ b/internal/app/machined/pkg/startup/tasks.go @@ -0,0 +1,133 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package startup + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "go.uber.org/zap" + "golang.org/x/sys/unix" + + "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" + "github.com/siderolabs/talos/internal/pkg/environment" + "github.com/siderolabs/talos/internal/pkg/mount/v2" + "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/resources/block" +) + +// LogMode prints the current mode. +func LogMode(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + log.Info("platform information", zap.Stringer("mode", rt.State().Platform().Mode())) + + return next()(ctx, log, rt, next) +} + +// SetupSystemDirectories creates system default directories. +func SetupSystemDirectories(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + for _, path := range []string{constants.SystemEtcPath, constants.SystemVarPath, constants.StateMountPoint} { + if err := os.MkdirAll(path, 0o700); err != nil { + return fmt.Errorf("setupSystemDirectories: %w", err) + } + } + + for _, path := range []string{constants.SystemRunPath} { + if err := os.MkdirAll(path, 0o751); err != nil { + return fmt.Errorf("setupSystemDirectories: %w", err) + } + } + + return next()(ctx, log, rt, next) +} + +// InitVolumeLifecycle initializes volume lifecycle resource. +func InitVolumeLifecycle(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if err := rt.State().V1Alpha2().Resources().Create(ctx, block.NewVolumeLifecycle(block.NamespaceName, block.VolumeLifecycleID)); err != nil { + return fmt.Errorf("initVolumeLifecycle: %w", err) + } + + return next()(ctx, log, rt, next) +} + +// MountCgroups represents mounts the cgroupfs (only in !container). +// +//nolint:dupl +func MountCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if rt.State().Platform().Mode().InContainer() { + return next()(ctx, log, rt, next) + } + + unmounter, err := mount.CGroupMountPoints().Mount() + if err != nil { + return fmt.Errorf("mountCgroups: %w", err) + } + + defer func() { + if err := unmounter(); err != nil { + log.Warn("failed to unmount cgroups", zap.Error(err)) + } + }() + + return next()(ctx, log, rt, next) +} + +// MountPseudoLate mounts the late pseudo filesystems (only in !container). +// +//nolint:dupl +func MountPseudoLate(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if rt.State().Platform().Mode().InContainer() { + return next()(ctx, log, rt, next) + } + + unmounter, err := mount.PseudoLate().Mount() + if err != nil { + return fmt.Errorf("mountPseudoLate: %w", err) + } + + defer func() { + if err := unmounter(); err != nil { + log.Warn("failed to unmount pseudo late", zap.Error(err)) + } + }() + + return next()(ctx, log, rt, next) +} + +// SetRLimit sets the file descriptor limit. +func SetRLimit(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + if rt.State().Platform().Mode().InContainer() { + return next()(ctx, log, rt, next) + } + + if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil { + return fmt.Errorf("setRLimit: %w", err) + } + + return next()(ctx, log, rt, next) +} + +// SetEnvironmentVariables sets the environment variables. +func SetEnvironmentVariables(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error { + // Set the PATH env var. + if err := os.Setenv("PATH", constants.PATH); err != nil { + return errors.New("error setting PATH") + } + + if !rt.State().Platform().Mode().InContainer() { + // in container mode, ignore cmdline + for _, env := range environment.Get(nil) { + key, val, _ := strings.Cut(env, "=") + + if err := os.Setenv(key, val); err != nil { + return fmt.Errorf("error setting %s: %w", val, err) + } + } + } + + return next()(ctx, log, rt, next) +} diff --git a/internal/pkg/mount/v2/pseudo.go b/internal/pkg/mount/v2/pseudo.go index dcbf580fb1..e75ef888c1 100644 --- a/internal/pkg/mount/v2/pseudo.go +++ b/internal/pkg/mount/v2/pseudo.go @@ -18,6 +18,12 @@ func Pseudo() Points { NewPoint("devtmpfs", "/dev", "devtmpfs", WithFlags(unix.MS_NOSUID), WithData("mode=0755")), NewPoint("proc", "/proc", "proc", WithFlags(unix.MS_NOSUID|unix.MS_NOEXEC|unix.MS_NODEV)), NewPoint("sysfs", "/sys", "sysfs"), + } +} + +// PseudoLate returns the mountpoints mounted later in the boot cycle. +func PseudoLate() Points { + return Points{ NewPoint("tmpfs", "/run", "tmpfs", WithFlags(unix.MS_NOSUID|unix.MS_NOEXEC|unix.MS_RELATIME), WithData("mode=0755")), NewPoint("tmpfs", "/system", "tmpfs", WithData("mode=0755")), NewPoint("tmpfs", "/tmp", "tmpfs", WithFlags(unix.MS_NOSUID|unix.MS_NOEXEC|unix.MS_NODEV), WithData("size=64M"), WithData("mode=0755")),