From 7782c29a8e827c25f4fe7c8c7b684ea68295d0e9 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 5 Dec 2023 13:06:11 +0100 Subject: [PATCH 1/3] connectivity: Check for unexpected packet drops Convert the check for missed tail call packet drops into a more general check for any unexpected packets drops. Current expected packet drop reasons are Policy denied and Policy denied by denylist only. Others will be added in subsequent commits. Contrary to the Missed tail call check who only ran in the context of the conn-disrupt test, this new check is always run by default. Signed-off-by: Paul Chaignon --- connectivity/suite.go | 4 ++-- connectivity/tests/errors.go | 34 ++++++++++++---------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/connectivity/suite.go b/connectivity/suite.go index 045496d7e3..49234627e8 100644 --- a/connectivity/suite.go +++ b/connectivity/suite.go @@ -259,10 +259,10 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch // include --include-conn-disrupt-test" return ct.Run(ctx) } - - ct.NewTest("no-missed-tail-calls").WithScenarios(tests.NoMissedTailCalls()) } + ct.NewTest("no-unexpected-packet-drops").WithScenarios(tests.NoUnexpectedPacketDrops()) + // Run all tests without any policies in place. noPoliciesScenarios := []check.Scenario{ tests.PodToPod(), diff --git a/connectivity/tests/errors.go b/connectivity/tests/errors.go index ec1009ae7c..8d5146e6b0 100644 --- a/connectivity/tests/errors.go +++ b/connectivity/tests/errors.go @@ -5,7 +5,6 @@ package tests import ( "context" - "strconv" "strings" "time" @@ -75,43 +74,34 @@ func (n *noErrorsInLogs) Run(ctx context.Context, t *check.Test) { } -// NoMissedTailCalls checks whether there were no drops due to missed (BPF) -// tail calls. -func NoMissedTailCalls() check.Scenario { - return &noMissedTailCalls{} +// NoUnexpectedPacketDrops checks whether there were no drops due to expected +// packet drops. +func NoUnexpectedPacketDrops() check.Scenario { + return &noUnexpectedPacketDrops{} } -type noMissedTailCalls struct{} +type noUnexpectedPacketDrops struct{} -func (n *noMissedTailCalls) Name() string { - return "no-missed-tail-calls" +func (n *noUnexpectedPacketDrops) Name() string { + return "no-unexpected-packet-drops" } -func (n *noMissedTailCalls) Run(ctx context.Context, t *check.Test) { +func (n *noUnexpectedPacketDrops) Run(ctx context.Context, t *check.Test) { ct := t.Context() cmd := []string{ "/bin/sh", "-c", - "cilium metrics list -o json | jq '.[] | select( .name == \"cilium_drop_count_total\" and .labels.reason == \"Missed tail call\" ).value'", + "cilium metrics list -o json | jq '.[] | select((.name == \"cilium_drop_count_total\") and (.labels.reason | IN(\"Policy denied\", \"Policy denied by denylist\") | not))'", } for _, pod := range ct.CiliumPods() { pod := pod stdout, err := pod.K8sClient.ExecInPod(ctx, pod.Pod.Namespace, pod.Pod.Name, defaults.AgentContainerName, cmd) if err != nil { - t.Fatalf("Error fetching missed tail call drop counts: %s", err) + t.Fatalf("Error fetching packet drop counts: %s", err) } countStr := strings.TrimSpace(stdout.String()) - if countStr == "" { - return - } - - count, err := strconv.Atoi(countStr) - if err != nil { - t.Fatalf("Failed to convert missed tail call drops %q to int: %s", countStr, err) - } - - if count != 0 { - t.Fatalf("Detected drops due to missed tail calls: %d", count) + if countStr != "" { + t.Fatalf("Found unexpected packet drops:\n%s", countStr) } } From 36a9e3ac62c619e888f418fb78b5b6611dd59aff Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 6 Dec 2023 15:14:49 +0100 Subject: [PATCH 2/3] connectivity: Add flag --expected-drop-reasons This new flag can be used to customize the set of expected reasons for packet drops, for the new test that ensure we don't have any unexpected packet drops. Signed-off-by: Paul Chaignon --- connectivity/check/check.go | 7 +++- connectivity/suite.go | 2 +- connectivity/tests/errors.go | 55 ++++++++++++++++++++++-- connectivity/tests/errors_test.go | 69 +++++++++++++++++++++++++++++++ defaults/defaults.go | 5 +++ internal/cli/cmd/connectivity.go | 4 ++ 6 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 connectivity/tests/errors_test.go diff --git a/connectivity/check/check.go b/connectivity/check/check.go index 25bdefc66c..a05347bd26 100644 --- a/connectivity/check/check.go +++ b/connectivity/check/check.go @@ -74,8 +74,11 @@ type Parameters struct { ConnDisruptTestRestartsPath string ConnDisruptTestXfrmErrorsPath string ConnDisruptDispatchInterval time.Duration - FlushCT bool - SecondaryNetworkIface string + + ExpectedDropReasons []string + + FlushCT bool + SecondaryNetworkIface string K8sVersion string HelmChartDirectory string diff --git a/connectivity/suite.go b/connectivity/suite.go index 49234627e8..e32d7bfa23 100644 --- a/connectivity/suite.go +++ b/connectivity/suite.go @@ -261,7 +261,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch } } - ct.NewTest("no-unexpected-packet-drops").WithScenarios(tests.NoUnexpectedPacketDrops()) + ct.NewTest("no-unexpected-packet-drops").WithScenarios(tests.NoUnexpectedPacketDrops(ct.Params().ExpectedDropReasons)) // Run all tests without any policies in place. noPoliciesScenarios := []check.Scenario{ diff --git a/connectivity/tests/errors.go b/connectivity/tests/errors.go index 8d5146e6b0..32d77168bd 100644 --- a/connectivity/tests/errors.go +++ b/connectivity/tests/errors.go @@ -5,6 +5,7 @@ package tests import ( "context" + "fmt" "strings" "time" @@ -76,11 +77,13 @@ func (n *noErrorsInLogs) Run(ctx context.Context, t *check.Test) { // NoUnexpectedPacketDrops checks whether there were no drops due to expected // packet drops. -func NoUnexpectedPacketDrops() check.Scenario { - return &noUnexpectedPacketDrops{} +func NoUnexpectedPacketDrops(expectedDrops []string) check.Scenario { + return &noUnexpectedPacketDrops{expectedDrops} } -type noUnexpectedPacketDrops struct{} +type noUnexpectedPacketDrops struct { + expectedDrops []string +} func (n *noUnexpectedPacketDrops) Name() string { return "no-unexpected-packet-drops" @@ -88,9 +91,11 @@ func (n *noUnexpectedPacketDrops) Name() string { func (n *noUnexpectedPacketDrops) Run(ctx context.Context, t *check.Test) { ct := t.Context() + + filter := computeExpectedDropReasons(defaults.ExpectedDropReasons, n.expectedDrops) cmd := []string{ "/bin/sh", "-c", - "cilium metrics list -o json | jq '.[] | select((.name == \"cilium_drop_count_total\") and (.labels.reason | IN(\"Policy denied\", \"Policy denied by denylist\") | not))'", + fmt.Sprintf("cilium metrics list -o json | jq '.[] | select((.name == \"cilium_drop_count_total\") and (.labels.reason | IN(%s) | not))'", filter), } for _, pod := range ct.CiliumPods() { @@ -104,7 +109,49 @@ func (n *noUnexpectedPacketDrops) Run(ctx context.Context, t *check.Test) { t.Fatalf("Found unexpected packet drops:\n%s", countStr) } } +} +func computeExpectedDropReasons(defaultReasons, inputReasons []string) string { + filter := "" + if len(inputReasons) > 0 { + // Build final list of drop reasons based on default reasons, added + // reasons (+ prefix), and removed reasons (- prefix). + addedAllDefaults := false + dropReasons := map[string]bool{} + for _, reason := range inputReasons { + if reason[0] == '+' || reason[0] == '-' { + if !addedAllDefaults { + for _, r := range defaultReasons { + dropReasons[r] = true + } + addedAllDefaults = true + } + } + switch reason[0] { + case '+': + dropReasons[reason[1:]] = true + case '-': + dropReasons[reason[1:]] = false + default: + dropReasons[reason] = true + } + } + + // Build output string in form of '"reason1", "reason2"'. + i := 0 + for reason, isIn := range dropReasons { + if !isIn { + continue + } + if i == 0 { + filter = fmt.Sprintf("%q", reason) + } else { + filter = fmt.Sprintf("%s, %q", filter, reason) + } + i++ + } + } + return filter } func (n *noErrorsInLogs) checkErrorsInLogs(logs string, t *check.Test) { diff --git a/connectivity/tests/errors_test.go b/connectivity/tests/errors_test.go new file mode 100644 index 0000000000..88f679ff4e --- /dev/null +++ b/connectivity/tests/errors_test.go @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Cilium + +package tests + +import ( + "fmt" + "strings" + "testing" +) + +func TestComputeExpectedDropReasons(t *testing.T) { + defaultReasons := []string{"reason0", "reason1"} + tests := []struct { + inputReasons []string + expectedReasons []string + }{ + // Empty list of reasons. + { + inputReasons: []string{}, + expectedReasons: []string{}, + }, + // Add a reason to default list. + { + inputReasons: []string{"+reason2"}, + expectedReasons: []string{"reason0", "reason1", "reason2"}, + }, + // Remove a reason from default list. + { + inputReasons: []string{"-reason1"}, + expectedReasons: []string{"reason0"}, + }, + // Add a reason then remove it. + { + inputReasons: []string{"+reason2", "-reason2"}, + expectedReasons: []string{"reason0", "reason1"}, + }, + // Remove a reason then add it back. + { + inputReasons: []string{"-reason1", "+reason1"}, + expectedReasons: []string{"reason0", "reason1"}, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("InputReasons: %v", test.inputReasons), func(t *testing.T) { + result := computeExpectedDropReasons(defaultReasons, test.inputReasons) + + // computeExpectedDropReasons doesn't guarantee the order of the + // emitted string so instead we check the length and that all + // expected elements are listed. + + for _, expReason := range test.expectedReasons { + if !strings.Contains(result, fmt.Sprintf("%q", expReason)) { + t.Errorf("Expected filter to contain %q, but got: %v", expReason, result) + } + } + + // We know the expected length because all our test reasons are the same length. + expectedLength := len(test.expectedReasons) * 9 + if len(test.expectedReasons) > 1 { + expectedLength += (len(test.expectedReasons) - 1) * 2 + } + if len(result) != expectedLength { + t.Errorf("Expected filter of length %d, but got: %v", expectedLength, result) + } + }) + } +} diff --git a/defaults/defaults.go b/defaults/defaults.go index 10cde1bcb6..70626a1d22 100644 --- a/defaults/defaults.go +++ b/defaults/defaults.go @@ -190,4 +190,9 @@ var ( "authentication.mutual.spire.install.agent.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator=NotIn", "authentication.mutual.spire.install.agent.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]=true", } + + ExpectedDropReasons = []string{ + "Policy denied", + "Policy denied by denylist", + } ) diff --git a/internal/cli/cmd/connectivity.go b/internal/cli/cmd/connectivity.go index 0e668cfe4a..e7811b98f2 100644 --- a/internal/cli/cmd/connectivity.go +++ b/internal/cli/cmd/connectivity.go @@ -190,6 +190,10 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command { cmd.Flags().StringVar(¶ms.ConnDisruptTestRestartsPath, "conn-disrupt-test-restarts-path", "/tmp/cilium-conn-disrupt-restarts", "Conn disrupt test temporary result file (used internally)") cmd.Flags().StringVar(¶ms.ConnDisruptTestXfrmErrorsPath, "conn-disrupt-test-xfrm-errors-path", "/tmp/cilium-conn-disrupt-xfrm-errors", "Conn disrupt test temporary result file (used internally)") cmd.Flags().DurationVar(¶ms.ConnDisruptDispatchInterval, "conn-disrupt-dispatch-interval", 0, "TCP packet dispatch interval") + + cmd.Flags().StringSliceVar(¶ms.ExpectedDropReasons, "expected-drop-reasons", defaults.ExpectedDropReasons, "List of expected drop reasons") + cmd.Flags().MarkHidden("expected-drop-reasons") + cmd.Flags().BoolVar(¶ms.FlushCT, "flush-ct", false, "Flush conntrack of Cilium on each node") cmd.Flags().MarkHidden("flush-ct") cmd.Flags().StringVar(¶ms.SecondaryNetworkIface, "secondary-network-iface", "", "Secondary network iface name (e.g., to test NodePort BPF on multiple networks)") From 0bca92f1a0384a45447bc503ad2ea25b1d0cb7f0 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 6 Dec 2023 15:58:30 +0100 Subject: [PATCH 3/3] defaults: Add expected drop reasons We currently have many packet drops that are more or less expected. Some like "Authentication required" are expected for specific features and others like "Stale or unroutable IP" are known and understood even though we'd prefer to avoid them. Signed-off-by: Paul Chaignon --- defaults/defaults.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/defaults/defaults.go b/defaults/defaults.go index 70626a1d22..addad43d0f 100644 --- a/defaults/defaults.go +++ b/defaults/defaults.go @@ -194,5 +194,13 @@ var ( ExpectedDropReasons = []string{ "Policy denied", "Policy denied by denylist", + "Unsupported L3 protocol", + "Stale or unroutable IP", + "Authentication required", + "Service backend not found", + "Unsupported protocol for NAT masquerade", + "Invalid source ip", + "Unknown L3 target address", + "No tunnel/encapsulation endpoint (datapath BUG!)", } )