From 93951f10d3f80b9af9b23a23658b97e9765dcb5d Mon Sep 17 00:00:00 2001 From: Marco Hofstetter Date: Mon, 4 Dec 2023 10:56:23 +0100 Subject: [PATCH] connectivity test: introduce connectivity test suite timeout flag Currently, the connectivity test suite command doesn't have a timeout. Its context only gets cancelled due to interrupt of SIGTERM signal. For most connectivity test scenarios and actions, this isn't a problem as they have individual fine-grained timeouts (e.g. request timeout). But there are test scenarios (e.g. `health ` - `Cilium Health Probe`) that might run infinitely on failiure (e.g. if a Cilium Agent Pod no longer is available). Therefore, this commit introduces a new flag `--timeout` that provides the possibility to configure an overall timeout for the connectivity test suite. It defaults to `0` - meaning no timeout by default (current behaviour). Signed-off-by: Marco Hofstetter --- connectivity/check/check.go | 2 ++ defaults/defaults.go | 7 ++++++- internal/cli/cmd/connectivity.go | 13 +++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/connectivity/check/check.go b/connectivity/check/check.go index e358d6a7ee..25bdefc66c 100644 --- a/connectivity/check/check.go +++ b/connectivity/check/check.go @@ -95,6 +95,8 @@ type Parameters struct { ExternalTargetCANamespace string ExternalTargetCAName string + + Timeout time.Duration } type podCIDRs struct { diff --git a/defaults/defaults.go b/defaults/defaults.go index af7650363b..10cde1bcb6 100644 --- a/defaults/defaults.go +++ b/defaults/defaults.go @@ -3,7 +3,9 @@ package defaults -import "time" +import ( + "time" +) const ( // renovate: datasource=github-releases depName=cilium/cilium @@ -135,6 +137,9 @@ const ( // ClustermeshMaxConnectedClusters is the default number of the maximum // number of clusters that should be allowed to connect to the Clustermesh. ClustermeshMaxConnectedClusters = 255 + + // Default timeout for Connectivity Test Suite (disabled by default) + ConnectivityTestSuiteTimeout = 0 * time.Minute ) var ( diff --git a/internal/cli/cmd/connectivity.go b/internal/cli/cmd/connectivity.go index 3578d23293..b0a78a8fa6 100644 --- a/internal/cli/cmd/connectivity.go +++ b/internal/cli/cmd/connectivity.go @@ -44,6 +44,7 @@ var params = check.Parameters{ Writer: os.Stdout, }, } + var tests []string func newCmdConnectivityTest(hooks Hooks) *cobra.Command { @@ -76,11 +77,17 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command { return err } - ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + ctx, _ := signal.NotifyContext(cmd.Context(), os.Interrupt, syscall.SIGTERM) + + if params.Timeout > 0 { + timeoutCtx, cancelCtx := context.WithTimeoutCause(ctx, params.Timeout, fmt.Errorf("connectivity test suite timeout (%s) reached", params.Timeout)) + defer cancelCtx() + ctx = timeoutCtx + } go func() { <-ctx.Done() - cc.Log("Interrupt received, cancelling tests...") + cc.Logf("Cancellation request (%s) received, cancelling tests...", context.Cause(ctx)) }() done := make(chan struct{}) @@ -187,6 +194,8 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command { cmd.Flags().MarkHidden("flush-ct") cmd.Flags().StringVar(¶ms.SecondaryNetworkIface, "secondary-network-iface", "", "Secondary network iface name (e.g., to test NodePort BPF on multiple networks)") + cmd.Flags().DurationVar(¶ms.Timeout, "timeout", defaults.ConnectivityTestSuiteTimeout, "Maximum time to allow the connectivity test suite to take") + hooks.AddConnectivityTestFlags(cmd.Flags()) return cmd