From 64ff1c4b4912841ca1a7c67af2ca154df3cf2ca1 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Fri, 3 Nov 2023 16:52:08 +0100 Subject: [PATCH] daemon: push own CiliumNode later When WireGuard node-to-node encryption is enabled and the control-planes are encrypted, this leads to the KubeAPI becoming unresponsive. This happens when the second control-plane with a stacked etcd architecture joins because the second etcd will join the first and then the first control-plane consumes the published CiliumNode CR and add the node to its WireGuard interface and IPCache so that all traffic is now routed over it. This includes the etcd traffic. The second node does not yet have the first control-plane added to the WireGuard interface, hence the etcd traffic is dropped. This leads to an unresponsive KubeAPI when the second node now queries the CiliumNode CR it has created and the daemon setup never reaches the inclusion of the first node in the WireGuard interface. Therefore, we re-order the setup logic to first enable the CiliumNode watchers and push their own CiliumNode resource later. Fixes: #28965 Signed-off-by: Leonard Cohnen --- daemon/cmd/daemon.go | 70 ++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/daemon/cmd/daemon.go b/daemon/cmd/daemon.go index 137e1a74aa193..33b9b632982f0 100644 --- a/daemon/cmd/daemon.go +++ b/daemon/cmd/daemon.go @@ -676,41 +676,6 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams bootstrapStats.fqdn.End(true) - if params.Clientset.IsEnabled() { - bootstrapStats.k8sInit.Start() - // Errors are handled inside WaitForCRDsToRegister. It will fatal on a - // context deadline or if the context has been cancelled, the context's - // error will be returned. Otherwise, it succeeded. - if !option.Config.DryMode { - if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil { - return nil, restoredEndpoints, err - } - } - - if option.Config.IPAM == ipamOption.IPAMClusterPool || - option.Config.IPAM == ipamOption.IPAMMultiPool { - // Create the CiliumNode custom resource. This call will block until - // the custom resource has been created - d.nodeDiscovery.UpdateCiliumNodeResource() - } - - if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil { - log.WithError(err).Error("unable to connect to get node spec from apiserver") - return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err) - } - - // Kubernetes demands that the localhost can always reach local - // pods. Therefore unless the AllowLocalhost policy is set to a - // specific mode, always allow localhost to reach local - // endpoints. - if option.Config.AllowLocalhost == option.AllowLocalhostAuto { - option.Config.AllowLocalhost = option.AllowLocalhostAlways - log.Info("k8s mode: Allowing localhost to reach local endpoints") - } - - bootstrapStats.k8sInit.End(true) - } - if params.WGAgent != nil && option.Config.EnableWireguard { if err := params.WGAgent.Init(d.ipcache, d.mtuConfig); err != nil { log.WithError(err).Error("failed to initialize WireGuard agent") @@ -824,6 +789,41 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams close(params.CacheStatus) } + if params.Clientset.IsEnabled() { + bootstrapStats.k8sInit.Start() + // Errors are handled inside WaitForCRDsToRegister. It will fatal on a + // context deadline or if the context has been cancelled, the context's + // error will be returned. Otherwise, it succeeded. + if !option.Config.DryMode { + if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil { + return nil, restoredEndpoints, err + } + } + + if option.Config.IPAM == ipamOption.IPAMClusterPool || + option.Config.IPAM == ipamOption.IPAMMultiPool { + // Create the CiliumNode custom resource. This call will block until + // the custom resource has been created + d.nodeDiscovery.UpdateCiliumNodeResource() + } + + if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil { + log.WithError(err).Error("unable to connect to get node spec from apiserver") + return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err) + } + + // Kubernetes demands that the localhost can always reach local + // pods. Therefore unless the AllowLocalhost policy is set to a + // specific mode, always allow localhost to reach local + // endpoints. + if option.Config.AllowLocalhost == option.AllowLocalhostAuto { + option.Config.AllowLocalhost = option.AllowLocalhostAlways + log.Info("k8s mode: Allowing localhost to reach local endpoints") + } + + bootstrapStats.k8sInit.End(true) + } + bootstrapStats.cleanup.Start() err = clearCiliumVeths() bootstrapStats.cleanup.EndError(err)