Skip to content

Commit

Permalink
CE changes for vault-31750 (#29303)
Browse files Browse the repository at this point in the history
* ce changes for vault-31750

* add changelog

* make proto

* refactor naming

* clarify error message

* update changelog

* one more time

* make proto AGAIN
  • Loading branch information
raskchanky authored Jan 9, 2025
1 parent 36d7e0c commit f625f50
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 6 deletions.
3 changes: 3 additions & 0 deletions changelog/29303.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:change
core (enterprise): Add tracking of performance standbys by their HA node ID so that RPC connections can be more easily cleaned up when nodes are removed.
```
22 changes: 20 additions & 2 deletions physical/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,9 @@ type RaftBackend struct {
// limits.
specialPathLimits map[string]uint64

removed *atomic.Bool
removedCallback func()
removed *atomic.Bool
removedCallback func()
removedServerCleanup func(context.Context, string) (bool, error)
}

func (b *RaftBackend) IsNodeRemoved(ctx context.Context, nodeID string) (bool, error) {
Expand All @@ -284,6 +285,23 @@ func (b *RaftBackend) RemoveSelf() error {
return b.stableStore.SetUint64(removedKey, 1)
}

func (b *RaftBackend) SetRemovedServerCleanupFunc(f func(context.Context, string) (bool, error)) {
b.l.Lock()
b.removedServerCleanup = f
b.l.Unlock()
}

func (b *RaftBackend) RemovedServerCleanup(ctx context.Context, nodeID string) (bool, error) {
b.l.RLock()
defer b.l.RUnlock()

if b.removedServerCleanup != nil {
return b.removedServerCleanup(ctx, nodeID)
}

return false, nil
}

// LeaderJoinInfo contains information required by a node to join itself as a
// follower to an existing raft cluster
type LeaderJoinInfo struct {
Expand Down
4 changes: 4 additions & 0 deletions physical/raft/raft_autopilot.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,10 @@ func (d *Delegate) RemoveFailedServer(server *autopilot.Server) {
}

d.followerStates.Delete(string(server.ID))
_, err := d.RemovedServerCleanup(context.Background(), string(server.ID))
if err != nil {
d.logger.Error("failed to run cleanup", "error", err)
}
}()
}

Expand Down
8 changes: 4 additions & 4 deletions sdk/plugin/pb/backend.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions vault/logical_system_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,13 @@ func (b *SystemBackend) handleRaftRemovePeerUpdate() framework.OperationFunc {
}

b.Core.raftFollowerStates.Delete(serverID)
_, err := raftBackend.RemovedServerCleanup(ctx, serverID)
if err != nil {
// log the error but don't return it - we might get an error if we can't find the node in the cache, which
// is not an error condition in this instance.
b.logger.Info("attempted to remove node from perf standby cache but it failed, which might be fine", "server ID", serverID, "error", err)
return nil, nil
}

return nil, nil
}
Expand Down

0 comments on commit f625f50

Please sign in to comment.