From 61ff3d1747358c77f4cc8d4dc9a96b54e699223a Mon Sep 17 00:00:00 2001 From: Dmitry Sharshakov Date: Tue, 28 Jan 2025 17:44:45 +0100 Subject: [PATCH] fix: fix reverse routing for KubeSpan This allows it to not come down when rp_filter is enabled. Fixes #9814 Co-authored-by: Andrey Smirnov Signed-off-by: Dmitry Sharshakov --- .github/workflows/ci.yaml | 3 +- .../workflows/integration-misc-1-cron.yaml | 3 +- .kres.yaml | 1 + hack/test/patches/kubespan-rp_filter.yaml | 3 ++ .../pkg/controllers/kubespan/manager.go | 52 +++++++++++++++++++ .../pkg/controllers/kubespan/manager_test.go | 36 ++++++++++++- 6 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 hack/test/patches/kubespan-rp_filter.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6c71f0e12b..26588c1fcc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-01-24T14:30:35Z by kres 3075de9. +# Generated on 2025-01-28T16:44:01Z by kres 987bf4d. name: default concurrency: @@ -2070,6 +2070,7 @@ jobs: IMAGE_REGISTRY: registry.dev.siderolabs.io\ SHORT_INTEGRATION_TEST: "yes" WITH_CLUSTER_DISCOVERY: "true" + WITH_CONFIG_PATCH: '@hack/test/patches/kubespan-rp_filter.yaml' WITH_KUBESPAN: "true" run: | sudo -E make e2e-qemu diff --git a/.github/workflows/integration-misc-1-cron.yaml b/.github/workflows/integration-misc-1-cron.yaml index 96b578831f..18a63f507f 100644 --- a/.github/workflows/integration-misc-1-cron.yaml +++ b/.github/workflows/integration-misc-1-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-12-24T15:00:58Z by kres fcff05e. +# Generated on 2025-01-15T12:49:27Z by kres 3b3f992. name: integration-misc-1-cron concurrency: @@ -92,6 +92,7 @@ jobs: IMAGE_REGISTRY: registry.dev.siderolabs.io\ SHORT_INTEGRATION_TEST: "yes" WITH_CLUSTER_DISCOVERY: "true" + WITH_CONFIG_PATCH: '@hack/test/patches/kubespan-rp_filter.yaml' WITH_KUBESPAN: "true" run: | sudo -E make e2e-qemu diff --git a/.kres.yaml b/.kres.yaml index 4ccff8762e..a9698fc6c9 100644 --- a/.kres.yaml +++ b/.kres.yaml @@ -754,6 +754,7 @@ spec: SHORT_INTEGRATION_TEST: yes WITH_CLUSTER_DISCOVERY: true WITH_KUBESPAN: true + WITH_CONFIG_PATCH: "@hack/test/patches/kubespan-rp_filter.yaml" IMAGE_REGISTRY: registry.dev.siderolabs.io\ - name: e2e-default-hostname command: e2e-qemu diff --git a/hack/test/patches/kubespan-rp_filter.yaml b/hack/test/patches/kubespan-rp_filter.yaml new file mode 100644 index 0000000000..38bd3198ee --- /dev/null +++ b/hack/test/patches/kubespan-rp_filter.yaml @@ -0,0 +1,3 @@ +machine: + sysctls: + net.ipv4.conf.all.rp_filter: "1" diff --git a/internal/app/machined/pkg/controllers/kubespan/manager.go b/internal/app/machined/pkg/controllers/kubespan/manager.go index 1ca0e473d3..632bb096c4 100644 --- a/internal/app/machined/pkg/controllers/kubespan/manager.go +++ b/internal/app/machined/pkg/controllers/kubespan/manager.go @@ -27,10 +27,12 @@ import ( kubespanadapter "github.com/siderolabs/talos/internal/app/machined/pkg/adapters/kubespan" "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/kernel" "github.com/siderolabs/talos/pkg/machinery/nethelpers" "github.com/siderolabs/talos/pkg/machinery/resources/config" "github.com/siderolabs/talos/pkg/machinery/resources/kubespan" "github.com/siderolabs/talos/pkg/machinery/resources/network" + "github.com/siderolabs/talos/pkg/machinery/resources/runtime" ) // DefaultPeerReconcileInterval is interval between peer status reconciliation on timer. @@ -108,6 +110,10 @@ func (ctrl *ManagerController) Outputs() []controller.Output { Type: kubespan.PeerStatusType, Kind: controller.OutputExclusive, }, + { + Type: runtime.KernelParamSpecType, + Kind: controller.OutputShared, + }, } } @@ -371,6 +377,7 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo spec.Policy = nethelpers.VerdictAccept spec.Rules = []network.NfTablesRule{ + // Accept outgoing WireGuard packets. { MatchMark: &network.NfTablesMark{ Mask: constants.KubeSpanDefaultFirewallMask, @@ -378,6 +385,7 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo }, Verdict: pointer.To(nethelpers.VerdictAccept), }, + // Mark packets to be sent over the KubeSpan link. { MatchDestinationAddress: &network.NfTablesAddressMatch{ IncludeSubnets: allowedIPsSet.Prefixes(), @@ -388,6 +396,35 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo }, Verdict: pointer.To(nethelpers.VerdictAccept), }, + // Remove KubeSpan mark from packets not sent to KubeSpan peers or received from them. + // This is typically the case when deencapsulated VXLAN packets retain envelope's fwmark, thus causing a routing loop. + { + MatchSourceAddress: &network.NfTablesAddressMatch{ + Invert: true, + IncludeSubnets: allowedIPsSet.Prefixes(), + }, + MatchMark: &network.NfTablesMark{ + Mask: constants.KubeSpanDefaultForceFirewallMark, + Value: constants.KubeSpanDefaultForceFirewallMark, + }, + SetMark: &network.NfTablesMark{ + Mask: 0xffffffff, + Xor: constants.KubeSpanDefaultForceFirewallMark, + }, + Verdict: pointer.To(nethelpers.VerdictAccept), + }, + // Mark incoming packets from the KubeSpan link for rp_filter to find the correct routing table. + { + MatchIIfName: &network.NfTablesIfNameMatch{ + InterfaceNames: []string{constants.KubeSpanLinkName}, + Operator: nethelpers.OperatorEqual, + }, + SetMark: &network.NfTablesMark{ + Mask: ^uint32(constants.KubeSpanDefaultFirewallMask), + Xor: constants.KubeSpanDefaultForceFirewallMark, + }, + Verdict: pointer.To(nethelpers.VerdictAccept), + }, } return nil @@ -554,6 +591,17 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo return fmt.Errorf("error modifying link spec: %w", err) } + if err = safe.WriterModify(ctx, r, runtime.NewKernelParamSpec( + runtime.NamespaceName, + kernel.Sysctl+".net.ipv4.conf."+constants.KubeSpanLinkName+".src_valid_mark", + ), func(res *runtime.KernelParamSpec) error { + res.TypedSpec().Value = "1" + + return nil + }); err != nil { + return err + } + if rulesMgr == nil { rulesMgr = ctrl.RulesManagerFactory(constants.KubeSpanDefaultRoutingTable, constants.KubeSpanDefaultForceFirewallMark, constants.KubeSpanDefaultFirewallMask) @@ -591,6 +639,10 @@ func (ctrl *ManagerController) cleanup(ctx context.Context, r controller.Runtime namespace: kubespan.NamespaceName, typ: kubespan.PeerStatusType, }, + { + namespace: runtime.NamespaceName, + typ: runtime.KernelParamSpecType, + }, } { // list keys for cleanup list, err := r.List(ctx, resource.NewMetadata(item.namespace, item.typ, "", resource.VersionUndefined)) diff --git a/internal/app/machined/pkg/controllers/kubespan/manager_test.go b/internal/app/machined/pkg/controllers/kubespan/manager_test.go index 606c496709..269c66932c 100644 --- a/internal/app/machined/pkg/controllers/kubespan/manager_test.go +++ b/internal/app/machined/pkg/controllers/kubespan/manager_test.go @@ -25,6 +25,7 @@ import ( "github.com/siderolabs/talos/pkg/machinery/resources/config" "github.com/siderolabs/talos/pkg/machinery/resources/kubespan" "github.com/siderolabs/talos/pkg/machinery/resources/network" + "github.com/siderolabs/talos/pkg/machinery/resources/runtime" ) type ManagerSuite struct { @@ -40,6 +41,10 @@ func (suite *ManagerSuite) TestDisabled() { suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg)) ctest.AssertNoResource[*network.NfTablesChain](suite, "kubespan_outgoing") + ctest.AssertNoResource[*runtime.KernelParamSpec]( + suite, + "proc.sys.net.ipv4.conf.kubespan.src_valid_mark", + ) } type mockWireguardClient struct { @@ -244,9 +249,9 @@ func (suite *ManagerSuite) TestReconcile() { asrt.Equal(nethelpers.ChainPriorityFilter, spec.Priority) asrt.Equal(nethelpers.VerdictAccept, spec.Policy) - asrt.Len(spec.Rules, 2) + asrt.Len(spec.Rules, 3) - if len(spec.Rules) != 2 { + if len(spec.Rules) != 3 { return } @@ -277,6 +282,21 @@ func (suite *ManagerSuite) TestReconcile() { }, spec.Rules[1], ) + + asrt.Equal( + network.NfTablesRule{ + MatchIIfName: &network.NfTablesIfNameMatch{ + InterfaceNames: []string{constants.KubeSpanLinkName}, + Operator: nethelpers.OperatorEqual, + }, + SetMark: &network.NfTablesMark{ + Mask: ^uint32(constants.KubeSpanDefaultFirewallMask), + Xor: constants.KubeSpanDefaultForceFirewallMark, + }, + Verdict: pointer.To(nethelpers.VerdictAccept), + }, + spec.Rules[2], + ) }, ) @@ -358,6 +378,14 @@ func (suite *ManagerSuite) TestReconcile() { }, ) + ctest.AssertResource( + suite, + "proc.sys.net.ipv4.conf.kubespan.src_valid_mark", + func(res *runtime.KernelParamSpec, asrt *assert.Assertions) { + asrt.Equal(res.TypedSpec().Value, "1") + }, + ) + // update config and disable wireguard, everything should be cleaned up cfg.TypedSpec().Enabled = false suite.Require().NoError(suite.State().Update(suite.Ctx(), cfg)) @@ -371,6 +399,10 @@ func (suite *ManagerSuite) TestReconcile() { suite, "kubespan_prerouting", ) + ctest.AssertNoResource[*runtime.KernelParamSpec]( + suite, + "proc.sys.net.ipv4.conf.kubespan.src_valid_mark", + ) } func asUDP(addr netip.AddrPort) *net.UDPAddr {