Skip to content

Commit

Permalink
fix: multiple issues with opening encrypted volumes
Browse files Browse the repository at this point in the history
Fixes #9820

This only affects volumes with multiple key slots configured.

Make sync issues non-fatal, so that if some keys fail to sync, proceed
with normal boot, but record an error in the `VolumeStatus` resource.

When opening, correctly try all key slots.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Nov 28, 2024
1 parent 145b026 commit fc3b315
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 77 deletions.
1 change: 1 addition & 0 deletions api/resource/definitions/block/block.proto
Original file line number Diff line number Diff line change
Expand Up @@ -168,5 +168,6 @@ message VolumeStatusSpec {
string mount_location = 11;
talos.resource.definitions.enums.BlockEncryptionProviderType encryption_provider = 12;
string pretty_size = 13;
repeated string encryption_failed_syncs = 14;
}

Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,10 @@ import (

"github.com/siderolabs/talos/internal/pkg/encryption"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
)

// Close the encrypted volumes.
func Close(ctx context.Context, logger *zap.Logger, volumeContext ManagerContext) error {
getSystemInformation := func(ctx context.Context) (*hardware.SystemInformation, error) {
if volumeContext.SystemInformation != nil {
return volumeContext.SystemInformation, nil
}

return nil, fmt.Errorf("system information not available")
}

switch volumeContext.Cfg.TypedSpec().Encryption.Provider {
case block.EncryptionProviderNone:
// nothing to do
Expand All @@ -36,7 +27,7 @@ func Close(ctx context.Context, logger *zap.Logger, volumeContext ManagerContext
case block.EncryptionProviderLUKS2:
encryptionConfig := volumeContext.Cfg.TypedSpec().Encryption

handler, err := encryption.NewHandler(encryptionConfig, volumeContext.Cfg.Metadata().ID(), getSystemInformation)
handler, err := encryption.NewHandler(encryptionConfig, volumeContext.Cfg.Metadata().ID(), volumeContext.GetSystemInformation)
if err != nil {
return fmt.Errorf("failed to create encryption handler: %w", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,10 @@ import (

"github.com/siderolabs/talos/internal/pkg/encryption"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
)

// HandleEncryption makes sure the encryption for the volumes is handled appropriately.
func HandleEncryption(ctx context.Context, logger *zap.Logger, volumeContext ManagerContext) error {
getSystemInformation := func(ctx context.Context) (*hardware.SystemInformation, error) {
if volumeContext.SystemInformation != nil {
return volumeContext.SystemInformation, nil
}

return nil, fmt.Errorf("system information not available")
}

switch volumeContext.Cfg.TypedSpec().Encryption.Provider {
case block.EncryptionProviderNone:
// nothing to do
Expand All @@ -41,7 +32,7 @@ func HandleEncryption(ctx context.Context, logger *zap.Logger, volumeContext Man
case block.EncryptionProviderLUKS2:
encryptionConfig := volumeContext.Cfg.TypedSpec().Encryption

handler, err := encryption.NewHandler(encryptionConfig, volumeContext.Cfg.Metadata().ID(), getSystemInformation)
handler, err := encryption.NewHandler(encryptionConfig, volumeContext.Cfg.Metadata().ID(), volumeContext.GetSystemInformation)
if err != nil {
return fmt.Errorf("failed to create encryption handler: %w", err)
}
Expand Down Expand Up @@ -102,7 +93,7 @@ func HandleEncryptionWithHandler(ctx context.Context, logger *zap.Logger, volume

encryptedName := filepath.Base(volumeContext.Status.Location) + "-encrypted"

encryptedPath, err := handler.Open(ctx, logger, volumeContext.Status.Location, encryptedName)
encryptedPath, failedSyncs, err := handler.Open(ctx, logger, volumeContext.Status.Location, encryptedName)
if err != nil {
return xerrors.NewTaggedf[Retryable]("error opening encrypted volume: %w", err)
}
Expand All @@ -115,6 +106,7 @@ func HandleEncryptionWithHandler(ctx context.Context, logger *zap.Logger, volume
volumeContext.Status.Phase = block.VolumePhasePrepared
volumeContext.Status.MountLocation = encryptedPath
volumeContext.Status.EncryptionProvider = volumeContext.Cfg.TypedSpec().Encryption.Provider
volumeContext.Status.EncryptionFailedSyncs = failedSyncs

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package volumes

import (
"cmp"
"context"

"github.com/siderolabs/gen/optional"

Expand Down Expand Up @@ -67,6 +68,6 @@ type ManagerContext struct {

DevicesReady bool
PreviousWaveProvisioned bool
SystemInformation *hardware.SystemInformation
GetSystemInformation func(context.Context) (*hardware.SystemInformation, error)
Lifecycle *block.VolumeLifecycle
}
21 changes: 14 additions & 7 deletions internal/app/machined/pkg/controllers/block/volume_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package block

import (
"context"
"errors"
"fmt"
"math"
"slices"
Expand Down Expand Up @@ -181,11 +182,6 @@ func (ctrl *VolumeManagerController) Run(ctx context.Context, r controller.Runti
return fmt.Errorf("error fetching system disk: %w", err)
}

systemInfo, err := safe.ReaderGetByID[*hardware.SystemInformation](ctx, r, hardware.SystemInformationID)
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error fetching system information: %w", err)
}

volumeLifecycle, err := safe.ReaderGetByID[*block.VolumeLifecycle](ctx, r, block.VolumeLifecycleID)
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error fetching volume lifecycle: %w", err)
Expand Down Expand Up @@ -318,8 +314,19 @@ func (ctrl *VolumeManagerController) Run(ctx context.Context, r controller.Runti
Disks: diskSpecs,
DevicesReady: devicesReady,
PreviousWaveProvisioned: vc.TypedSpec().Provisioning.Wave <= fullyProvisionedWave,
SystemInformation: systemInfo,
Lifecycle: volumeLifecycle,
GetSystemInformation: func(ctx context.Context) (*hardware.SystemInformation, error) {
systemInfo, err := safe.ReaderGetByID[*hardware.SystemInformation](ctx, r, hardware.SystemInformationID)
if err != nil && !state.IsNotFoundError(err) {
return nil, fmt.Errorf("error fetching system information: %w", err)
}

if systemInfo == nil {
return nil, errors.New("system information not available")
}

return systemInfo, nil
},
Lifecycle: volumeLifecycle,
},
); err != nil {
volumeStatus.PreFailPhase = volumeStatus.Phase
Expand Down
26 changes: 26 additions & 0 deletions internal/integration/api/apply-config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,20 @@ package api

import (
"context"
"encoding/json"
"net/url"
"os"
"slices"
"testing"
"time"

"github.com/cosi-project/runtime/pkg/resource/rtestutils"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/siderolabs/gen/ensure"
"github.com/siderolabs/go-pointer"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc/codes"
"google.golang.org/protobuf/types/known/durationpb"

Expand All @@ -30,6 +34,7 @@ import (
"github.com/siderolabs/talos/pkg/machinery/config/types/runtime"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
mc "github.com/siderolabs/talos/pkg/machinery/resources/config"
)

Expand Down Expand Up @@ -243,6 +248,8 @@ func (suite *ApplyConfigSuite) TestApplyConfigRotateEncryptionSecrets() {

suite.WaitForBootDone(suite.ctx)

suite.T().Logf("testing encryption key rotation on node %s", node)

existing := cfg.EncryptionKeys[0]
slot := existing.Slot() + 1

Expand Down Expand Up @@ -290,6 +297,10 @@ func (suite *ApplyConfigSuite) TestApplyConfigRotateEncryptionSecrets() {

for _, keys := range keySets {
data := suite.PatchV1Alpha1Config(provider, func(cfg *v1alpha1.Config) {
suite.T().Logf("changing encryption keys to %s",
toJSONString(suite.T(), keys),
)

cfg.MachineConfig.MachineSystemDiskEncryption.EphemeralPartition.EncryptionKeys = keys
})

Expand Down Expand Up @@ -348,9 +359,24 @@ func (suite *ApplyConfigSuite) TestApplyConfigRotateEncryptionSecrets() {
}

suite.WaitForBootDone(suite.ctx)

// verify that encryption key sync has no failures
rtestutils.AssertAll(nodeCtx, suite.T(), suite.Client.COSI, func(vs *block.VolumeStatus, asrt *assert.Assertions) {
suite.Assert().Contains([]block.VolumePhase{block.VolumePhaseReady, block.VolumePhaseMissing}, vs.TypedSpec().Phase)
suite.Assert().Empty(vs.TypedSpec().EncryptionFailedSyncs)
})
}
}

func toJSONString(t *testing.T, v any) string {
t.Helper()

out, err := json.Marshal(v)
require.NoError(t, err)

return string(out)
}

// TestApplyNoReboot verifies the apply config API fails if NoReboot mode is requested on a field that can not be applied immediately.
func (suite *ApplyConfigSuite) TestApplyNoReboot() {
nodes := suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeWorker)
Expand Down
58 changes: 34 additions & 24 deletions internal/pkg/encryption/encryption.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ type Handler struct {
}

// Open encrypted partition.
func (h *Handler) Open(ctx context.Context, logger *zap.Logger, devicePath, encryptedName string) (string, error) {
func (h *Handler) Open(ctx context.Context, logger *zap.Logger, devicePath, encryptedName string) (string, []string, error) {
isOpen, path, err := h.encryptionProvider.IsOpen(ctx, devicePath, encryptedName)
if err != nil {
return "", err
return "", nil, err
}

var usedKey *encryption.Key
Expand All @@ -108,27 +108,29 @@ func (h *Handler) Open(ctx context.Context, logger *zap.Logger, devicePath, encr
return nil, nil, err
}

// try to open with the key, if it fails, tryHandlers will try the next handler
path, err = h.encryptionProvider.Open(ctx, devicePath, encryptedName, slotKey)
if err != nil {
return nil, nil, err
}

return slotKey, slotToken, nil
})
if err != nil {
return "", err
}

path, err = h.encryptionProvider.Open(ctx, devicePath, encryptedName, key)
if err != nil {
return "", err
return "", nil, err
}

logger.Info("opened encrypted device", zap.Int("slot", handler.Slot()), zap.String("type", fmt.Sprintf("%T", handler)))

usedKey = key
}

if err := h.syncKeys(ctx, logger, devicePath, usedKey); err != nil {
return "", err
failedSyncs, err := h.syncKeys(ctx, logger, devicePath, usedKey)
if err != nil {
return "", nil, err
}

return path, nil
return path, failedSyncs, nil
}

// Close encrypted partition.
Expand Down Expand Up @@ -177,12 +179,14 @@ func (h *Handler) FormatAndEncrypt(ctx context.Context, logger *zap.Logger, path
}

//nolint:gocyclo
func (h *Handler) syncKeys(ctx context.Context, logger *zap.Logger, path string, k *encryption.Key) error {
func (h *Handler) syncKeys(ctx context.Context, logger *zap.Logger, path string, k *encryption.Key) ([]string, error) {
keyslots, err := h.encryptionProvider.ReadKeyslots(path)
if err != nil {
return err
return nil, err
}

var failedSyncs []string

visited := map[string]bool{}

for _, handler := range h.keyHandlers {
Expand All @@ -196,17 +200,21 @@ func (h *Handler) syncKeys(ctx context.Context, logger *zap.Logger, path string,
// keyslot exists
if _, ok := keyslots.Keyslots[slot]; ok {
if err = h.updateKey(ctx, path, k, handler); err != nil {
return fmt.Errorf("error updating key slot %s %T: %w", slot, handler, err)
}
logger.Error("failed to update key", zap.Int("slot", handler.Slot()), zap.String("handler", fmt.Sprintf("%T", handler)), zap.Error(err))

logger.Info("updated encryption key", zap.Int("slot", handler.Slot()))
failedSyncs = append(failedSyncs, fmt.Sprintf("error updating key slot %s %T: %s", slot, handler, err))
} else {
logger.Info("updated encryption key", zap.Int("slot", handler.Slot()))
}
} else {
// keyslot does not exist so just add the key
if err = h.addKey(ctx, path, k, handler); err != nil {
return fmt.Errorf("error adding key slot %s %T: %w", slot, handler, err)
}
logger.Error("failed to add key", zap.Int("slot", handler.Slot()), zap.String("handler", fmt.Sprintf("%T", handler)), zap.Error(err))

logger.Info("added encryption key", zap.Int("slot", handler.Slot()))
failedSyncs = append(failedSyncs, fmt.Sprintf("error adding key slot %s %T: %s", slot, handler, err))
} else {
logger.Info("added encryption key", zap.Int("slot", handler.Slot()))
}
}
}

Expand All @@ -215,18 +223,20 @@ func (h *Handler) syncKeys(ctx context.Context, logger *zap.Logger, path string,
if !visited[slot] {
s, err := strconv.ParseInt(slot, 10, 64)
if err != nil {
return err
return nil, err
}

if err = h.encryptionProvider.RemoveKey(ctx, path, int(s), k); err != nil {
return fmt.Errorf("error removing key slot %s: %w", slot, err)
}
logger.Error("failed to remove key", zap.Int("slot", int(s)), zap.Error(err))

logger.Info("removed encryption key", zap.Int("slot", k.Slot))
failedSyncs = append(failedSyncs, fmt.Sprintf("error removing key slot %s: %s", slot, err))
} else {
logger.Info("removed encryption key", zap.Int("slot", k.Slot))
}
}
}

return nil
return failedSyncs, nil
}

func (h *Handler) updateKey(ctx context.Context, path string, existingKey *encryption.Key, handler keys.Handler) error {
Expand Down
Loading

0 comments on commit fc3b315

Please sign in to comment.