Skip to content

Commit

Permalink
daemon: push own CiliumNode later
Browse files Browse the repository at this point in the history
When WireGuard node-to-node encryption is enabled and the
control-planes are encrypted, this leads to the KubeAPI becoming
unresponsive. This happens when the second control-plane with a stacked
etcd architecture joins because the second etcd will join the first
and then the first control-plane consumes the published CiliumNode
CR and add the node to its WireGuard interface and IPCache so
that all traffic is now routed over it. This includes the etcd traffic.

The second node does not yet have the first control-plane added to
the WireGuard interface, hence the etcd traffic is dropped.
This leads to an unresponsive KubeAPI when the second node
now queries the CiliumNode CR it has created and the daemon
setup never reaches the inclusion of the first node in the
WireGuard interface.

Therefore, we re-order the setup logic to first enable the CiliumNode
watchers and push their own CiliumNode resource later.

Fixes: cilium#28965

Signed-off-by: Leonard Cohnen <[email protected]>
  • Loading branch information
3u13r authored and burgerdev committed May 29, 2024
1 parent c41f043 commit 64ff1c4
Showing 1 changed file with 35 additions and 35 deletions.
70 changes: 35 additions & 35 deletions daemon/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,41 +676,6 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams

bootstrapStats.fqdn.End(true)

if params.Clientset.IsEnabled() {
bootstrapStats.k8sInit.Start()
// Errors are handled inside WaitForCRDsToRegister. It will fatal on a
// context deadline or if the context has been cancelled, the context's
// error will be returned. Otherwise, it succeeded.
if !option.Config.DryMode {
if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil {
return nil, restoredEndpoints, err
}
}

if option.Config.IPAM == ipamOption.IPAMClusterPool ||
option.Config.IPAM == ipamOption.IPAMMultiPool {
// Create the CiliumNode custom resource. This call will block until
// the custom resource has been created
d.nodeDiscovery.UpdateCiliumNodeResource()
}

if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil {
log.WithError(err).Error("unable to connect to get node spec from apiserver")
return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err)
}

// Kubernetes demands that the localhost can always reach local
// pods. Therefore unless the AllowLocalhost policy is set to a
// specific mode, always allow localhost to reach local
// endpoints.
if option.Config.AllowLocalhost == option.AllowLocalhostAuto {
option.Config.AllowLocalhost = option.AllowLocalhostAlways
log.Info("k8s mode: Allowing localhost to reach local endpoints")
}

bootstrapStats.k8sInit.End(true)
}

if params.WGAgent != nil && option.Config.EnableWireguard {
if err := params.WGAgent.Init(d.ipcache, d.mtuConfig); err != nil {
log.WithError(err).Error("failed to initialize WireGuard agent")
Expand Down Expand Up @@ -824,6 +789,41 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams
close(params.CacheStatus)
}

if params.Clientset.IsEnabled() {
bootstrapStats.k8sInit.Start()
// Errors are handled inside WaitForCRDsToRegister. It will fatal on a
// context deadline or if the context has been cancelled, the context's
// error will be returned. Otherwise, it succeeded.
if !option.Config.DryMode {
if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil {
return nil, restoredEndpoints, err
}
}

if option.Config.IPAM == ipamOption.IPAMClusterPool ||
option.Config.IPAM == ipamOption.IPAMMultiPool {
// Create the CiliumNode custom resource. This call will block until
// the custom resource has been created
d.nodeDiscovery.UpdateCiliumNodeResource()
}

if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil {
log.WithError(err).Error("unable to connect to get node spec from apiserver")
return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err)
}

// Kubernetes demands that the localhost can always reach local
// pods. Therefore unless the AllowLocalhost policy is set to a
// specific mode, always allow localhost to reach local
// endpoints.
if option.Config.AllowLocalhost == option.AllowLocalhostAuto {
option.Config.AllowLocalhost = option.AllowLocalhostAlways
log.Info("k8s mode: Allowing localhost to reach local endpoints")
}

bootstrapStats.k8sInit.End(true)
}

bootstrapStats.cleanup.Start()
err = clearCiliumVeths()
bootstrapStats.cleanup.EndError(err)
Expand Down

0 comments on commit 64ff1c4

Please sign in to comment.