From 3eecb43ce9db9f4af94d92c53c7afafbc8d45dcb Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Tue, 1 Oct 2024 14:40:27 -0500 Subject: [PATCH 1/6] fact(routes): begin separating linux routing functionality --- .../routing/network_routes_controller.go | 25 +++++++-------- pkg/controllers/routing/utils.go | 18 ----------- pkg/routes/linux_routes.go | 32 +++++++++++++++++++ .../routing => routes}/route_sync.go | 29 +++++++++++------ .../routing => routes}/route_sync_test.go | 18 +++++------ 5 files changed, 72 insertions(+), 50 deletions(-) create mode 100644 pkg/routes/linux_routes.go rename pkg/{controllers/routing => routes}/route_sync.go (70%) rename pkg/{controllers/routing => routes}/route_sync_test.go (93%) diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index 77f1eec5d..b15ca3917 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -17,6 +17,7 @@ import ( "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" "github.com/cloudnativelabs/kube-router/v2/pkg/options" + "github.com/cloudnativelabs/kube-router/v2/pkg/routes" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" "github.com/coreos/go-iptables/iptables" gobgpapi "github.com/osrg/gobgp/v3/api" @@ -68,8 +69,6 @@ const ( bgpCommunityMaxPartSize = 16 routeReflectorMaxID = 32 ipv4MaskMinBits = 32 - // Taken from: https://github.com/torvalds/linux/blob/master/include/uapi/linux/rtnetlink.h#L284 - zebraRouteOriginator = 0x11 encapTypeFOU = "fou" encapTypeIPIP = "ipip" @@ -138,7 +137,7 @@ type NetworkRoutingController struct { podIPv6CIDRs []string CNIFirewallSetup *sync.Cond ipsetMutex *sync.Mutex - routeSyncer *routeSyncer + routeSyncer routes.RouteSyncer nodeLister cache.Indexer svcLister cache.Indexer @@ -306,7 +305,7 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll klog.Infof("Starting network route controller") // Start route syncer - nrc.routeSyncer.run(stopCh, wg) + nrc.routeSyncer.Run(stopCh, wg) // Wait till we are ready to launch BGP server for { @@ -621,14 +620,14 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { klog.V(1).Infof("Peer '%s' was not found any longer, removing tunnel and routes", nextHop.String()) // Also delete route from state map so that it doesn't get re-synced after deletion - nrc.routeSyncer.delInjectedRoute(dst) + nrc.routeSyncer.DelInjectedRoute(dst) nrc.cleanupTunnel(dst, tunnelName) return nil } // Also delete route from state map so that it doesn't get re-synced after deletion - nrc.routeSyncer.delInjectedRoute(dst) - return deleteRoutesByDestination(dst) + nrc.routeSyncer.DelInjectedRoute(dst) + return routes.DeleteByDestination(dst) } shouldCreateTunnel := func() bool { @@ -676,7 +675,7 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { LinkIndex: link.Attrs().Index, Src: bestIPForFamily, Dst: dst, - Protocol: zebraRouteOriginator, + Protocol: routes.ZebraOriginator, } case sameSubnet: // if the nextHop is within the same subnet, add a route for the destination so that traffic can bet routed @@ -692,7 +691,7 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { route = &netlink.Route{ Dst: dst, Gw: nextHop, - Protocol: zebraRouteOriginator, + Protocol: routes.ZebraOriginator, } default: // otherwise, let BGP do its thing, nothing to do here @@ -701,9 +700,9 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { // Alright, everything is in place, and we have our route configured, let's add it to the host's routing table klog.V(2).Infof("Inject route: '%s via %s' from peer to routing table", dst, nextHop) - nrc.routeSyncer.addInjectedRoute(dst, route) + nrc.routeSyncer.AddInjectedRoute(dst, route) // Immediately sync the local route table regardless of timer - nrc.routeSyncer.syncLocalRouteTable() + nrc.routeSyncer.SyncLocalRouteTable() return nil } @@ -728,7 +727,7 @@ func (nrc *NetworkRoutingController) isPeerEstablished(peerIP string) (bool, err // needed. All errors are logged only, as we want to attempt to perform all cleanup actions regardless of their success func (nrc *NetworkRoutingController) cleanupTunnel(destinationSubnet *net.IPNet, tunnelName string) { klog.V(1).Infof("Cleaning up old routes for %s if there are any", destinationSubnet.String()) - if err := deleteRoutesByDestination(destinationSubnet); err != nil { + if err := routes.DeleteByDestination(destinationSubnet); err != nil { klog.Errorf("Failed to cleanup routes: %v", err) } @@ -1442,7 +1441,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface, nrc.bgpServerStarted = false nrc.disableSrcDstCheck = kubeRouterConfig.DisableSrcDstCheck nrc.initSrcDstCheckDone = false - nrc.routeSyncer = newRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod) + nrc.routeSyncer = routes.NewRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod) nrc.bgpHoldtime = kubeRouterConfig.BGPHoldTime.Seconds() if nrc.bgpHoldtime > 65536 || nrc.bgpHoldtime < 3 { diff --git a/pkg/controllers/routing/utils.go b/pkg/controllers/routing/utils.go index 08d99e182..5de5257d5 100644 --- a/pkg/controllers/routing/utils.go +++ b/pkg/controllers/routing/utils.go @@ -17,7 +17,6 @@ import ( "github.com/cloudnativelabs/kube-router/v2/pkg/utils" gobgpapi "github.com/osrg/gobgp/v3/api" "github.com/osrg/gobgp/v3/pkg/packet/bgp" - "github.com/vishvananda/netlink/nl" v1core "k8s.io/api/core/v1" "k8s.io/klog/v2" @@ -240,23 +239,6 @@ func parseBGPPath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { return dstSubnet, nextHop, nil } -// deleteRoutesByDestination attempts to safely find all routes based upon its destination subnet and delete them -func deleteRoutesByDestination(destinationSubnet *net.IPNet) error { - routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{ - Dst: destinationSubnet, Protocol: zebraRouteOriginator, - }, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL) - if err != nil { - return fmt.Errorf("failed to get routes from netlink: %v", err) - } - for i, r := range routes { - klog.V(2).Infof("Found route to remove: %s", r.String()) - if err = netlink.RouteDel(&routes[i]); err != nil { - return fmt.Errorf("failed to remove route due to %v", err) - } - } - return nil -} - // getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The // order of preference is: // 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility) diff --git a/pkg/routes/linux_routes.go b/pkg/routes/linux_routes.go new file mode 100644 index 000000000..1bd49aee1 --- /dev/null +++ b/pkg/routes/linux_routes.go @@ -0,0 +1,32 @@ +package routes + +import ( + "fmt" + "net" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" + "k8s.io/klog/v2" +) + +const ( + // Taken from: https://github.com/torvalds/linux/blob/master/include/uapi/linux/rtnetlink.h#L284 + ZebraOriginator = 0x11 +) + +// DeleteByDestination attempts to safely find all routes based upon its destination subnet and delete them +func DeleteByDestination(destinationSubnet *net.IPNet) error { + routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{ + Dst: destinationSubnet, Protocol: ZebraOriginator, + }, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL) + if err != nil { + return fmt.Errorf("failed to get routes from netlink: %v", err) + } + for i, r := range routes { + klog.V(2).Infof("Found route to remove: %s", r.String()) + if err = netlink.RouteDel(&routes[i]); err != nil { + return fmt.Errorf("failed to remove route due to %v", err) + } + } + return nil +} diff --git a/pkg/controllers/routing/route_sync.go b/pkg/routes/route_sync.go similarity index 70% rename from pkg/controllers/routing/route_sync.go rename to pkg/routes/route_sync.go index a8f60069d..2da0061a3 100644 --- a/pkg/controllers/routing/route_sync.go +++ b/pkg/routes/route_sync.go @@ -1,4 +1,4 @@ -package routing +package routes import ( "net" @@ -9,7 +9,16 @@ import ( "k8s.io/klog/v2" ) -type routeSyncer struct { +// RouteSyncer is an interface that defines the methods needed to sync routes to the kernel's routing table +type RouteSyncer interface { + AddInjectedRoute(dst *net.IPNet, route *netlink.Route) + DelInjectedRoute(dst *net.IPNet) + Run(stopCh <-chan struct{}, wg *sync.WaitGroup) + SyncLocalRouteTable() +} + +// RouteSync is a struct that holds all of the information needed for syncing routes to the kernel's routing table +type RouteSync struct { routeTableStateMap map[string]*netlink.Route injectedRoutesSyncPeriod time.Duration mutex sync.Mutex @@ -17,7 +26,7 @@ type routeSyncer struct { } // addInjectedRoute adds a route to the route map that is regularly synced to the kernel's routing table -func (rs *routeSyncer) addInjectedRoute(dst *net.IPNet, route *netlink.Route) { +func (rs *RouteSync) AddInjectedRoute(dst *net.IPNet, route *netlink.Route) { rs.mutex.Lock() defer rs.mutex.Unlock() klog.V(3).Infof("Adding route for destination: %s", dst) @@ -25,7 +34,7 @@ func (rs *routeSyncer) addInjectedRoute(dst *net.IPNet, route *netlink.Route) { } // delInjectedRoute delete a route from the route map that is regularly synced to the kernel's routing table -func (rs *routeSyncer) delInjectedRoute(dst *net.IPNet) { +func (rs *RouteSync) DelInjectedRoute(dst *net.IPNet) { rs.mutex.Lock() defer rs.mutex.Unlock() if _, ok := rs.routeTableStateMap[dst.String()]; ok { @@ -35,7 +44,7 @@ func (rs *routeSyncer) delInjectedRoute(dst *net.IPNet) { } // syncLocalRouteTable iterates over the local route state map and syncs all routes to the kernel's routing table -func (rs *routeSyncer) syncLocalRouteTable() { +func (rs *RouteSync) SyncLocalRouteTable() { rs.mutex.Lock() defer rs.mutex.Unlock() klog.V(2).Infof("Running local route table synchronization") @@ -49,7 +58,7 @@ func (rs *routeSyncer) syncLocalRouteTable() { } // run starts a goroutine that calls syncLocalRouteTable on interval injectedRoutesSyncPeriod -func (rs *routeSyncer) run(stopCh <-chan struct{}, wg *sync.WaitGroup) { +func (rs *RouteSync) Run(stopCh <-chan struct{}, wg *sync.WaitGroup) { // Start route synchronization routine wg.Add(1) go func(stopCh <-chan struct{}, wg *sync.WaitGroup) { @@ -59,7 +68,7 @@ func (rs *routeSyncer) run(stopCh <-chan struct{}, wg *sync.WaitGroup) { for { select { case <-t.C: - rs.syncLocalRouteTable() + rs.SyncLocalRouteTable() case <-stopCh: klog.Infof("Shutting down local route synchronization") return @@ -68,10 +77,10 @@ func (rs *routeSyncer) run(stopCh <-chan struct{}, wg *sync.WaitGroup) { }(stopCh, wg) } -// newRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every +// NewRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every // syncPeriod -func newRouteSyncer(syncPeriod time.Duration) *routeSyncer { - rs := routeSyncer{} +func NewRouteSyncer(syncPeriod time.Duration) *RouteSync { + rs := RouteSync{} rs.routeTableStateMap = make(map[string]*netlink.Route) rs.injectedRoutesSyncPeriod = syncPeriod rs.mutex = sync.Mutex{} diff --git a/pkg/controllers/routing/route_sync_test.go b/pkg/routes/route_sync_test.go similarity index 93% rename from pkg/controllers/routing/route_sync_test.go rename to pkg/routes/route_sync_test.go index ebf154e0b..be42a33db 100644 --- a/pkg/controllers/routing/route_sync_test.go +++ b/pkg/routes/route_sync_test.go @@ -1,4 +1,4 @@ -package routing +package routes import ( "net" @@ -55,13 +55,13 @@ func (mnl *mockNetlink) mockRouteReplace(route *netlink.Route) error { } func Test_syncLocalRouteTable(t *testing.T) { - prepSyncLocalTest := func() (*mockNetlink, *routeSyncer) { + prepSyncLocalTest := func() (*mockNetlink, *RouteSync) { // Create myNetlink so that it will wait 200 milliseconds on routeReplace and artificially hold its lock myNetlink := mockNetlink{} myNetlink.pause = time.Millisecond * 200 // Create a route replacer and seed it with some routes to iterate over - syncer := newRouteSyncer(15 * time.Second) + syncer := NewRouteSyncer(15 * time.Second) syncer.routeTableStateMap = generateTestRouteMap(testRoutes) // Replace the netlink.RouteReplace function with our own mock function that includes a WaitGroup for syncing @@ -71,13 +71,13 @@ func Test_syncLocalRouteTable(t *testing.T) { return &myNetlink, syncer } - waitForSyncLocalRouteToAcquireLock := func(myNetlink *mockNetlink, syncer *routeSyncer) { + waitForSyncLocalRouteToAcquireLock := func(myNetlink *mockNetlink, syncer *RouteSync) { // Launch syncLocalRouteTable in a separate goroutine so that we can try to inject a route into the map while it // is syncing. Then wait on the wait group so that we know that syncLocalRouteTable has a hold on the lock when // we try to use it in addInjectedRoute() below myNetlink.wg = &sync.WaitGroup{} myNetlink.wg.Add(1) - go syncer.syncLocalRouteTable() + go syncer.SyncLocalRouteTable() // Now we know that the syncLocalRouteTable() is paused on our artificial wait we added above myNetlink.wg.Wait() @@ -94,7 +94,7 @@ func Test_syncLocalRouteTable(t *testing.T) { // By measuring how much time it takes to inject the route we can understand whether addInjectedRoute waited // for the lock to be returned or not start := time.Now() - syncer.addInjectedRoute(testAddRouteIPNet, testAddRouteRoute) + syncer.AddInjectedRoute(testAddRouteIPNet, testAddRouteRoute) duration := time.Since(start) // We give ourselves a bit of leeway here, and say that if we were forced to wait for at least 190 milliseconds @@ -111,7 +111,7 @@ func Test_syncLocalRouteTable(t *testing.T) { // By measuring how much time it takes to inject the route we can understand whether addInjectedRoute waited // for the lock to be returned or not start := time.Now() - syncer.delInjectedRoute(testAddRouteIPNet) + syncer.DelInjectedRoute(testAddRouteIPNet) duration := time.Since(start) // We give ourselves a bit of leeway here, and say that if we were forced to wait for at least 190 milliseconds @@ -141,7 +141,7 @@ func Test_routeSyncer_run(t *testing.T) { t.Run("Ensure that run goroutine shuts down correctly on stop", func(t *testing.T) { // Setup routeSyncer to run 10 times a second - syncer := newRouteSyncer(100 * time.Millisecond) + syncer := NewRouteSyncer(100 * time.Millisecond) myNetLink := mockNetlink{} syncer.routeReplacer = myNetLink.mockRouteReplace syncer.routeTableStateMap = generateTestRouteMap(testRoutes) @@ -151,7 +151,7 @@ func Test_routeSyncer_run(t *testing.T) { // For a sanity check that the currentRoute on the mock object is nil to start with as we'll rely on this later assert.Nil(t, myNetLink.currentRoute, "currentRoute should be nil when the syncer hasn't run") - syncer.run(stopCh, &wg) + syncer.Run(stopCh, &wg) time.Sleep(110 * time.Millisecond) From e8f9d9db3288f25f5b90a47bc32f1d892bf2cccc Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Tue, 1 Oct 2024 15:11:51 -0500 Subject: [PATCH 2/6] fact(pbr): move to routes package --- .../routing/network_routes_controller.go | 17 +++--- pkg/{controllers/routing => routes}/pbr.go | 57 ++++++++++++++----- 2 files changed, 51 insertions(+), 23 deletions(-) rename pkg/{controllers/routing => routes}/pbr.go (58%) diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index b15ca3917..b85f44522 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -32,10 +32,8 @@ import ( const ( IfaceNotFound = "Link not found" - customRouteTableID = "77" - customRouteTableName = "kube-router" - podSubnetsIPSetName = "kube-router-pod-subnets" - nodeAddrsIPSetName = "kube-router-node-ips" + podSubnetsIPSetName = "kube-router-pod-subnets" + nodeAddrsIPSetName = "kube-router-node-ips" nodeASNAnnotation = "kube-router.io/node.asn" nodeCommunitiesAnnotation = "kube-router.io/node.bgp.communities" @@ -138,6 +136,7 @@ type NetworkRoutingController struct { CNIFirewallSetup *sync.Cond ipsetMutex *sync.Mutex routeSyncer routes.RouteSyncer + pbr routes.PBRer nodeLister cache.Indexer svcLister cache.Indexer @@ -176,11 +175,13 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll nrc.CNIFirewallSetup.Broadcast() + nrc.pbr = routes.NewPBR(nrc.krNode, nrc.podIPv4CIDRs, nrc.podIPv6CIDRs) + // Handle ipip tunnel overlay if nrc.enableOverlays { klog.V(1).Info("Tunnel Overlay enabled in configuration.") klog.V(1).Info("Setting up overlay networking.") - err = nrc.enablePolicyBasedRouting() + err = nrc.pbr.EnablePolicyBasedRouting() if err != nil { klog.Errorf("Failed to enable required policy based routing: %s", err.Error()) } @@ -196,7 +197,7 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll } else { klog.V(1).Info("Tunnel Overlay disabled in configuration.") klog.V(1).Info("Cleaning up old overlay networking if needed.") - err = nrc.disablePolicyBasedRouting() + err = nrc.pbr.DisablePolicyBasedRouting() if err != nil { klog.Errorf("Failed to disable policy based routing: %s", err.Error()) } @@ -865,14 +866,14 @@ func (nrc *NetworkRoutingController) setupOverlayTunnel(tunnelName string, nextH // Now that the tunnel link exists, we need to add a route to it, so the node knows where to send traffic bound for // this interface //nolint:gocritic // we understand that we are appending to a new slice - cmdArgs := append(ipBase, "route", "list", "table", customRouteTableID) + cmdArgs := append(ipBase, "route", "list", "table", routes.CustomTableID) out, err = exec.Command("ip", cmdArgs...).CombinedOutput() // This used to be "dev "+tunnelName+" scope" but this isn't consistent with IPv6's output, so we changed it to just // "dev "+tunnelName, but at this point I'm unsure if there was a good reason for adding scope on before, so that's // why this comment is here. if err != nil || !strings.Contains(string(out), "dev "+tunnelName) { //nolint:gocritic // we understand that we are appending to a new slice - cmdArgs = append(ipBase, "route", "add", nextHop.String(), "dev", tunnelName, "table", customRouteTableID) + cmdArgs = append(ipBase, "route", "add", nextHop.String(), "dev", tunnelName, "table", routes.CustomTableID) if out, err = exec.Command("ip", cmdArgs...).CombinedOutput(); err != nil { return nil, fmt.Errorf("failed to add route in custom route table, err: %s, output: %s", err, string(out)) } diff --git a/pkg/controllers/routing/pbr.go b/pkg/routes/pbr.go similarity index 58% rename from pkg/controllers/routing/pbr.go rename to pkg/routes/pbr.go index d345ccd33..2abf9429f 100644 --- a/pkg/controllers/routing/pbr.go +++ b/pkg/routes/pbr.go @@ -1,4 +1,4 @@ -package routing +package routes import ( "fmt" @@ -8,6 +8,33 @@ import ( "github.com/cloudnativelabs/kube-router/v2/pkg/utils" ) +const ( + // CustomTableID is the ID of the custom, iproute2 routing table that will be used for policy based routing + CustomTableID = "77" + // CustomTableName is the name of the custom, iproute2 routing table that will be used for policy based routing + CustomTableName = "kube-router" +) + +type PBR struct { + nfa utils.NodeFamilyAware + podIPv4CIDRs []string + podIPv6CIDRs []string +} + +type PBRer interface { + EnablePolicyBasedRouting() error + DisablePolicyBasedRouting() error +} + +// NewPBR creates a new PBR object which will be used to manipulate policy based routing rules +func NewPBR(nfa utils.NodeFamilyAware, podIPv4CIDRs, podIPv6CIDRs []string) *PBR { + return &PBR{ + nfa: nfa, + podIPv4CIDRs: podIPv4CIDRs, + podIPv6CIDRs: podIPv6CIDRs, + } +} + // ipRuleAbstraction used for abstracting iproute2 rule additions between IPv4 and IPv6 for both add and del operations. // ipProtocol is the iproute2 protocol specified as a string ("-4" or "-6"). ipOp is the rule operation specified as a // string ("add" or "del). The cidr is the IPv4 / IPv6 source CIDR string that when received will be used to lookup @@ -19,12 +46,12 @@ func ipRuleAbstraction(ipProtocol, ipOp, cidr string) error { } if strings.Contains(string(out), cidr) && ipOp == "del" { - err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", customRouteTableID).Run() + err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", CustomTableID).Run() if err != nil { return fmt.Errorf("failed to add ip rule due to: %s", err.Error()) } } else if !strings.Contains(string(out), cidr) && ipOp == "add" { - err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", customRouteTableID).Run() + err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", CustomTableID).Run() if err != nil { return fmt.Errorf("failed to add ip rule due to: %s", err.Error()) } @@ -35,21 +62,21 @@ func ipRuleAbstraction(ipProtocol, ipOp, cidr string) error { // setup a custom routing table that will be used for policy based routing to ensure traffic originating // on tunnel interface only leaves through tunnel interface irrespective rp_filter enabled/disabled -func (nrc *NetworkRoutingController) enablePolicyBasedRouting() error { - err := utils.RouteTableAdd(customRouteTableID, customRouteTableName) +func (pbr *PBR) EnablePolicyBasedRouting() error { + err := utils.RouteTableAdd(CustomTableID, CustomTableName) if err != nil { return fmt.Errorf("failed to update rt_tables file: %s", err) } - if nrc.krNode.IsIPv4Capable() { - for _, ipv4CIDR := range nrc.podIPv4CIDRs { + if pbr.nfa.IsIPv4Capable() { + for _, ipv4CIDR := range pbr.podIPv4CIDRs { if err := ipRuleAbstraction("-4", "add", ipv4CIDR); err != nil { return err } } } - if nrc.krNode.IsIPv6Capable() { - for _, ipv6CIDR := range nrc.podIPv6CIDRs { + if pbr.nfa.IsIPv6Capable() { + for _, ipv6CIDR := range pbr.podIPv6CIDRs { if err := ipRuleAbstraction("-6", "add", ipv6CIDR); err != nil { return err } @@ -59,21 +86,21 @@ func (nrc *NetworkRoutingController) enablePolicyBasedRouting() error { return nil } -func (nrc *NetworkRoutingController) disablePolicyBasedRouting() error { - err := utils.RouteTableAdd(customRouteTableID, customRouteTableName) +func (pbr *PBR) DisablePolicyBasedRouting() error { + err := utils.RouteTableAdd(CustomTableID, CustomTableName) if err != nil { return fmt.Errorf("failed to update rt_tables file: %s", err) } - if nrc.krNode.IsIPv4Capable() { - for _, ipv4CIDR := range nrc.podIPv4CIDRs { + if pbr.nfa.IsIPv4Capable() { + for _, ipv4CIDR := range pbr.podIPv4CIDRs { if err := ipRuleAbstraction("-4", "del", ipv4CIDR); err != nil { return err } } } - if nrc.krNode.IsIPv6Capable() { - for _, ipv6CIDR := range nrc.podIPv6CIDRs { + if pbr.nfa.IsIPv6Capable() { + for _, ipv6CIDR := range pbr.podIPv6CIDRs { if err := ipRuleAbstraction("-6", "del", ipv6CIDR); err != nil { return err } From 52ba7a25459d44b643b81655631b46ccabf316b9 Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Tue, 1 Oct 2024 16:52:34 -0500 Subject: [PATCH 3/6] fact(tunnels): separate linux tunneling functionality --- .../routing/network_routes_controller.go | 199 +--------- .../routing/network_routes_controller_test.go | 34 -- pkg/controllers/routing/utils.go | 98 ----- pkg/tunnels/linux_tunnels.go | 368 ++++++++++++++++++ pkg/tunnels/linux_tunnels_test.go | 40 ++ 5 files changed, 423 insertions(+), 316 deletions(-) create mode 100644 pkg/tunnels/linux_tunnels.go create mode 100644 pkg/tunnels/linux_tunnels_test.go diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index b85f44522..758976f68 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -18,6 +18,7 @@ import ( "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" "github.com/cloudnativelabs/kube-router/v2/pkg/options" "github.com/cloudnativelabs/kube-router/v2/pkg/routes" + "github.com/cloudnativelabs/kube-router/v2/pkg/tunnels" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" "github.com/coreos/go-iptables/iptables" gobgpapi "github.com/osrg/gobgp/v3/api" @@ -67,15 +68,6 @@ const ( bgpCommunityMaxPartSize = 16 routeReflectorMaxID = 32 ipv4MaskMinBits = 32 - - encapTypeFOU = "fou" - encapTypeIPIP = "ipip" - - ipipModev4 = "ipip" - ipipModev6 = "ip6ip6" - - maxPort = uint16(65535) - minPort = uint16(1024) ) // NetworkRoutingController is struct to hold necessary information required by controller @@ -111,8 +103,6 @@ type NetworkRoutingController struct { iptablesCmdHandlers map[v1core.IPFamily]utils.IPTablesHandler enableOverlays bool overlayType string - overlayEncap string - overlayEncapPort uint16 peerMultihopTTL uint8 MetricsEnabled bool bgpServerStarted bool @@ -137,6 +127,7 @@ type NetworkRoutingController struct { ipsetMutex *sync.Mutex routeSyncer routes.RouteSyncer pbr routes.PBRer + tunneler tunnels.Tunneler nodeLister cache.Indexer svcLister cache.Indexer @@ -185,7 +176,7 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll if err != nil { klog.Errorf("Failed to enable required policy based routing: %s", err.Error()) } - if nrc.overlayEncap == "fou" { + if nrc.tunneler.EncapType() == tunnels.EncapTypeFOU { // enable FoU module for the overlay tunnel if _, err := exec.Command("modprobe", "fou").CombinedOutput(); err != nil { klog.Errorf("Failed to enable FoU for tunnel overlay: %s", err.Error()) @@ -581,7 +572,7 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { return err } - tunnelName := generateTunnelName(nextHop.String()) + tunnelName := tunnels.GenerateTunnelName(nextHop.String()) checkNHSameSubnet := func(needle net.IP, haystack []net.IP) bool { for _, nodeIP := range haystack { nodeSubnet, _, err := utils.GetNodeSubnet(nodeIP, nil) @@ -622,7 +613,7 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { nextHop.String()) // Also delete route from state map so that it doesn't get re-synced after deletion nrc.routeSyncer.DelInjectedRoute(dst) - nrc.cleanupTunnel(dst, tunnelName) + tunnels.CleanupTunnel(dst, tunnelName) return nil } @@ -648,14 +639,14 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { // if the user has disabled overlays, don't create tunnels. If we're not creating a tunnel, check to see if there is // any cleanup that needs to happen. if shouldCreateTunnel() { - link, err = nrc.setupOverlayTunnel(tunnelName, nextHop, dst) + link, err = nrc.tunneler.SetupOverlayTunnel(tunnelName, nextHop, dst) if err != nil { return err } } else { // knowing that a tunnel shouldn't exist for this route, check to see if there are any lingering tunnels / // routes that need to be cleaned up. - nrc.cleanupTunnel(dst, tunnelName) + tunnels.CleanupTunnel(dst, tunnelName) } switch { @@ -724,164 +715,6 @@ func (nrc *NetworkRoutingController) isPeerEstablished(peerIP string) (bool, err return peerConnected, nil } -// cleanupTunnel removes any traces of tunnels / routes that were setup by nrc.setupOverlayTunnel() and are no longer -// needed. All errors are logged only, as we want to attempt to perform all cleanup actions regardless of their success -func (nrc *NetworkRoutingController) cleanupTunnel(destinationSubnet *net.IPNet, tunnelName string) { - klog.V(1).Infof("Cleaning up old routes for %s if there are any", destinationSubnet.String()) - if err := routes.DeleteByDestination(destinationSubnet); err != nil { - klog.Errorf("Failed to cleanup routes: %v", err) - } - - klog.V(1).Infof("Cleaning up any lingering tunnel interfaces named: %s", tunnelName) - if link, err := netlink.LinkByName(tunnelName); err == nil { - if err = netlink.LinkDel(link); err != nil { - klog.Errorf("Failed to delete tunnel link for the node due to " + err.Error()) - } - } -} - -// setupOverlayTunnel attempts to create a tunnel link and corresponding routes for IPIP based overlay networks -func (nrc *NetworkRoutingController) setupOverlayTunnel(tunnelName string, nextHop net.IP, - nextHopSubnet *net.IPNet) (netlink.Link, error) { - var out []byte - link, err := netlink.LinkByName(tunnelName) - - var bestIPForFamily net.IP - var ipipMode, fouLinkType string - isIPv6 := false - ipBase := make([]string, 0) - strFormattedEncapPort := strconv.FormatInt(int64(nrc.overlayEncapPort), 10) - - if nextHop.To4() != nil { - bestIPForFamily = nrc.krNode.FindBestIPv4NodeAddress() - ipipMode = encapTypeIPIP - fouLinkType = ipipModev4 - } else { - // Need to activate the ip command in IPv6 mode - ipBase = append(ipBase, "-6") - bestIPForFamily = nrc.krNode.FindBestIPv6NodeAddress() - ipipMode = ipipModev6 - fouLinkType = "ip6tnl" - isIPv6 = true - } - if nil == bestIPForFamily { - return nil, fmt.Errorf("not able to find an appropriate configured IP address on node for destination "+ - "IP family: %s", nextHop.String()) - } - - // This indicated that the tunnel already exists, so it's possible that there might be nothing more needed. However, - // it is also possible that the user changed the encap type, so we need to make sure that the encap type matches - // and if it doesn't, create it - recreate := false - if err == nil { - klog.V(1).Infof("Tunnel interface: %s with encap type %s for the node %s already exists.", - tunnelName, link.Attrs().EncapType, nextHop.String()) - - switch nrc.overlayEncap { - case encapTypeIPIP: - if linkFOUEnabled(tunnelName) { - klog.Infof("Was configured to use ipip tunnels, but found existing fou tunnels in place, cleaning up") - recreate = true - - // Even though we are setup for IPIP tunels we have existing tunnels that are FoU tunnels, remove them - // so that we can recreate them as IPIP - nrc.cleanupTunnel(nextHopSubnet, tunnelName) - - // If we are transitioning from FoU to IPIP we also need to clean up the old FoU port if it exists - if fouPortAndProtoExist(nrc.overlayEncapPort, isIPv6) { - fouArgs := ipBase - fouArgs = append(fouArgs, "fou", "del", "port", strFormattedEncapPort) - out, err := exec.Command("ip", fouArgs...).CombinedOutput() - if err != nil { - klog.Warningf("failed to clean up previous FoU tunnel port (this is only a warning because it "+ - "won't stop kube-router from working for now, but still shouldn't have happened) - error: "+ - "%v, output %s", err, out) - } - } - } - case encapTypeFOU: - if !linkFOUEnabled(tunnelName) { - klog.Infof("Was configured to use fou tunnels, but found existing ipip tunnels in place, cleaning up") - recreate = true - // Even though we are setup for FoU tunels we have existing tunnels that are IPIP tunnels, remove them - // so that we can recreate them as IPIP - nrc.cleanupTunnel(nextHopSubnet, tunnelName) - } - } - } - - // an error here indicates that the tunnel didn't exist, so we need to create it, if it already exists there's - // nothing to do here - if err != nil || recreate { - klog.Infof("Creating tunnel %s of type %s with encap %s for destination %s", - tunnelName, fouLinkType, nrc.overlayEncap, nextHop.String()) - cmdArgs := ipBase - switch nrc.overlayEncap { - case encapTypeIPIP: - // Plain IPIP tunnel without any encapsulation - cmdArgs = append(cmdArgs, "tunnel", "add", tunnelName, "mode", ipipMode, "local", bestIPForFamily.String(), - "remote", nextHop.String()) - - case encapTypeFOU: - // Ensure that the FOU tunnel port is set correctly - if !fouPortAndProtoExist(nrc.overlayEncapPort, isIPv6) { - fouArgs := ipBase - fouArgs = append(fouArgs, "fou", "add", "port", strFormattedEncapPort, "gue") - out, err := exec.Command("ip", fouArgs...).CombinedOutput() - if err != nil { - //nolint:goconst // don't need to make error messages a constant - return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ - "Failed to set FoU tunnel port - error: %s, output: %s", tunnelName, err, string(out)) - } - } - - // Prep IPIP tunnel for FOU encapsulation - cmdArgs = append(cmdArgs, "link", "add", "name", tunnelName, "type", fouLinkType, "remote", nextHop.String(), - "local", bestIPForFamily.String(), "ttl", "225", "encap", "gue", "encap-sport", "auto", "encap-dport", - strFormattedEncapPort, "mode", ipipMode) - - default: - return nil, fmt.Errorf("unknown tunnel encapsulation was passed: %s, unable to continue with overlay "+ - "setup", nrc.overlayEncap) - } - - klog.V(2).Infof("Executing the following command to create tunnel: ip %s", cmdArgs) - out, err := exec.Command("ip", cmdArgs...).CombinedOutput() - if err != nil { - return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ - "Failed to create tunnel interface %s. error: %s, output: %s", - nextHop, tunnelName, err, string(out)) - } - - link, err = netlink.LinkByName(tunnelName) - if err != nil { - return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ - "Failed to get tunnel interface by name error: %s", tunnelName, err) - } - if err = netlink.LinkSetUp(link); err != nil { - return nil, errors.New("Failed to bring tunnel interface " + tunnelName + " up due to: " + err.Error()) - } - } - - // Now that the tunnel link exists, we need to add a route to it, so the node knows where to send traffic bound for - // this interface - //nolint:gocritic // we understand that we are appending to a new slice - cmdArgs := append(ipBase, "route", "list", "table", routes.CustomTableID) - out, err = exec.Command("ip", cmdArgs...).CombinedOutput() - // This used to be "dev "+tunnelName+" scope" but this isn't consistent with IPv6's output, so we changed it to just - // "dev "+tunnelName, but at this point I'm unsure if there was a good reason for adding scope on before, so that's - // why this comment is here. - if err != nil || !strings.Contains(string(out), "dev "+tunnelName) { - //nolint:gocritic // we understand that we are appending to a new slice - cmdArgs = append(ipBase, "route", "add", nextHop.String(), "dev", tunnelName, "table", routes.CustomTableID) - if out, err = exec.Command("ip", cmdArgs...).CombinedOutput(); err != nil { - return nil, fmt.Errorf("failed to add route in custom route table, err: %s, output: %s", err, string(out)) - } - } - - return link, nil -} - // Cleanup performs the cleanup of configurations done func (nrc *NetworkRoutingController) Cleanup() { klog.Infof("Cleaning up NetworkRoutesController configurations") @@ -1533,18 +1366,16 @@ func NewNetworkRoutingController(clientset kubernetes.Interface, nrc.autoMTU = kubeRouterConfig.AutoMTU nrc.enableOverlays = kubeRouterConfig.EnableOverlay nrc.overlayType = kubeRouterConfig.OverlayType - nrc.overlayEncap = kubeRouterConfig.OverlayEncap - switch nrc.overlayEncap { - case encapTypeIPIP: - case encapTypeFOU: - default: - return nil, fmt.Errorf("unknown --overlay-encap option '%s' selected, unable to continue", nrc.overlayEncap) + overlayEncap, err := tunnels.ParseEncapType(kubeRouterConfig.OverlayEncap) + if err != nil { + return nil, fmt.Errorf("unknown --overlay-encap option '%s' selected, unable to continue", overlayEncap) } - nrc.overlayEncapPort = kubeRouterConfig.OverlayEncapPort - if nrc.overlayEncapPort > maxPort || nrc.overlayEncapPort < minPort { - return nil, fmt.Errorf("specified encap port is out of range of valid ports: %d, valid range is from %d to %d", - nrc.overlayEncapPort, minPort, maxPort) + overlayEncapPort, err := tunnels.ParseEncapPort(kubeRouterConfig.OverlayEncapPort) + if err != nil { + return nil, fmt.Errorf("unknown --overlay-encap-port option '%d' selected, unable to continue, err: %v", + overlayEncapPort, err) } + nrc.tunneler = tunnels.NewOverlayTunnel(nrc.krNode, overlayEncap, overlayEncapPort) nrc.CNIFirewallSetup = sync.NewCond(&sync.Mutex{}) nrc.bgpPort = kubeRouterConfig.BGPPort diff --git a/pkg/controllers/routing/network_routes_controller_test.go b/pkg/controllers/routing/network_routes_controller_test.go index a55b4ab2c..c5e063c46 100644 --- a/pkg/controllers/routing/network_routes_controller_test.go +++ b/pkg/controllers/routing/network_routes_controller_test.go @@ -11,7 +11,6 @@ import ( "time" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" - "github.com/stretchr/testify/assert" v1core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" @@ -2822,39 +2821,6 @@ func Test_OnNodeUpdate(t *testing.T) { } */ -func Test_generateTunnelName(t *testing.T) { - testcases := []struct { - name string - nodeIP string - tunnelName string - }{ - { - "IP less than 12 characters after removing '.'", - "10.0.0.1", - "tun-e443169117a", - }, - { - "IP has 12 characters after removing '.'", - "100.200.300.400", - "tun-9033d7906c7", - }, - { - "IPv6 tunnel names are properly handled and consistent", - "2001:db8:42:2::/64", - "tun-ba56986ef05", - }, - } - - for _, testcase := range testcases { - t.Run(testcase.name, func(t *testing.T) { - tunnelName := generateTunnelName(testcase.nodeIP) - assert.Lessf(t, len(tunnelName), 16, "the maximum length of the tunnel name should never exceed"+ - "15 characters as 16 characters is the maximum length of a Unix interface name") - assert.Equal(t, testcase.tunnelName, tunnelName, "did not get expected tunnel interface name") - }) - } -} - func createServices(clientset kubernetes.Interface, svcs []*v1core.Service) error { for _, svc := range svcs { _, err := clientset.CoreV1().Services(svc.ObjectMeta.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) diff --git a/pkg/controllers/routing/utils.go b/pkg/controllers/routing/utils.go index 5de5257d5..79bff78ef 100644 --- a/pkg/controllers/routing/utils.go +++ b/pkg/controllers/routing/utils.go @@ -1,15 +1,12 @@ package routing import ( - "bufio" - "crypto/sha256" "encoding/base64" "encoding/binary" "errors" "fmt" "hash/fnv" "net" - "os/exec" "regexp" "strconv" "strings" @@ -142,23 +139,6 @@ func generateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (str return configRouterID, nil } -// generateTunnelName will generate a name for a tunnel interface given a node IP -// Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing -// non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both -// long IPv4 addresses and much longer IPv6 addresses. -func generateTunnelName(nodeIP string) string { - // remove dots from an IPv4 address - strippedIP := strings.ReplaceAll(nodeIP, ".", "") - // remove colons from an IPv6 address - strippedIP = strings.ReplaceAll(strippedIP, ":", "") - - h := sha256.New() - h.Write([]byte(strippedIP)) - sum := h.Sum(nil) - - return "tun-" + fmt.Sprintf("%x", sum)[0:11] -} - // validateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to // gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it // returns an error. @@ -313,81 +293,3 @@ func (nrc *NetworkRoutingController) getBGPRouteInfoForVIP(vip string) (subnet u err = fmt.Errorf("could not convert IP to IPv4 or IPv6, unable to find subnet for: %s", vip) return } - -// fouPortAndProtoExist checks to see if the given FoU port is already configured on the system via iproute2 -// tooling for the given protocol -// -// fou show, shows both IPv4 and IPv6 ports in the same show command, they look like: -// port 5556 gue -// port 5556 gue -6 -// where the only thing that distinguishes them is the -6 or not on the end -// WARNING we're parsing a CLI tool here not an API, this may break at some point in the future -func fouPortAndProtoExist(port uint16, isIPv6 bool) bool { - const ipRoute2IPv6Prefix = "-6" - strPort := strconv.FormatInt(int64(port), 10) - fouArgs := make([]string, 0) - klog.V(2).Infof("Checking FOU Port and Proto... %s - %t", strPort, isIPv6) - - if isIPv6 { - fouArgs = append(fouArgs, ipRoute2IPv6Prefix) - } - fouArgs = append(fouArgs, "fou", "show") - - out, err := exec.Command("ip", fouArgs...).CombinedOutput() - // iproute2 returns an error if no fou configuration exists - if err != nil { - return false - } - - strOut := string(out) - klog.V(2).Infof("Combined output of ip fou show: %s", strOut) - scanner := bufio.NewScanner(strings.NewReader(strOut)) - - // loop over all lines of output - for scanner.Scan() { - scannedLine := scanner.Text() - // if the output doesn't contain our port at all, then continue - if !strings.Contains(scannedLine, strPort) { - continue - } - - // if this is IPv6 port and it has the correct IPv6 suffix (see example above) then return true - if isIPv6 && strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { - return true - } - - // if this is not IPv6 and it does not have an IPv6 suffix (see example above) then return true - if !isIPv6 && !strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { - return true - } - } - - return false -} - -// linkFOUEnabled checks to see whether the given link has FoU (Foo over Ethernet) enabled on it, specifically since -// kube-router only works with GUE (Generic UDP Encapsulation) we look for that and not just FoU in general. If the -// linkName is enabled with FoU GUE then we return true, otherwise false -// -// Output for a FoU Enabled GUE tunnel looks like: -// ipip ipip remote local dev ttl 225 pmtudisc encap gue encap-sport auto encap-dport 5555 ... -// Output for a normal IPIP tunnel looks like: -// ipip ipip remote local dev ttl inherit ... -func linkFOUEnabled(linkName string) bool { - const fouEncapEnabled = "encap gue" - cmdArgs := []string{"-details", "link", "show", linkName} - - out, err := exec.Command("ip", cmdArgs...).CombinedOutput() - - if err != nil { - klog.Warningf("recevied an error while trying to look at the link details of %s, this shouldn't have happened", - linkName) - return false - } - - if strings.Contains(string(out), fouEncapEnabled) { - return true - } - - return false -} diff --git a/pkg/tunnels/linux_tunnels.go b/pkg/tunnels/linux_tunnels.go new file mode 100644 index 000000000..37f98b08d --- /dev/null +++ b/pkg/tunnels/linux_tunnels.go @@ -0,0 +1,368 @@ +package tunnels + +import ( + "bufio" + "crypto/sha256" + "fmt" + "net" + "os/exec" + "strconv" + "strings" + + "github.com/cloudnativelabs/kube-router/v2/pkg/routes" + "github.com/cloudnativelabs/kube-router/v2/pkg/utils" + "github.com/vishvananda/netlink" + "k8s.io/klog/v2" +) + +const ( + EncapTypeFOU = EncapType("fou") + EncapTypeIPIP = EncapType("ipip") + + // FOU modes used for the iproute2 tooling + fouIPv4LinkMode = "ipip" + fouIPv6LinkMode = "ip6tnl" + + // IPIP modes used for the iproute2 tooling + ipipIPv4Mode = "ipip" + ipipIPv6Mode = "ip6ip6" + + // The maximum and minimum port numbers for encap ports + maxPort = uint16(65535) + minPort = uint16(1024) +) + +var ( + validEncapTypes = []EncapType{EncapTypeFOU, EncapTypeIPIP} +) + +// EncapType represents the type of encapsulation used for an overlay tunnel in kube-router. +type EncapType string + +// IsValid checks if the encapsulation type is valid by comparing it against a list of valid types. +// It returns true if the encapsulation type is valid, otherwise it returns false. +func (e EncapType) IsValid() bool { + for _, validType := range validEncapTypes { + if e == validType { + return true + } + } + return false +} + +// ParseEncapType parses the given string and returns an Encap type if valid. +// It returns an error if the encapsulation type is invalid. +// +// Parameters: +// - s: A string representing the encapsulation type. +// +// Returns: +// - Encap: The parsed encapsulation type. +// - error: An error if the encapsulation type is invalid. +func ParseEncapType(encapType string) (EncapType, error) { + encap := EncapType(encapType) + if !encap.IsValid() { + return "", fmt.Errorf("invalid encapsulation type: %s", encapType) + } + return encap, nil +} + +type EncapPort uint16 + +func (e EncapPort) checkWithinRange() error { + if uint16(e) > minPort && uint16(e) < maxPort { + return nil + } + return fmt.Errorf("specified encap port is out of range of valid ports: %d, valid range is from %d to %d", + e, minPort, maxPort) +} + +func ParseEncapPort(encapPort uint16) (EncapPort, error) { + port := EncapPort(encapPort) + if err := port.checkWithinRange(); err != nil { + return 0, err + } + return port, nil +} + +type Tunneler interface { + SetupOverlayTunnel(tunnelName string, nextHop net.IP, nextHopSubnet *net.IPNet) (netlink.Link, error) + EncapType() EncapType + EncapPort() EncapPort +} + +type OverlayTunnel struct { + krNode utils.NodeIPAndFamilyAware + overlayEncapPort EncapPort + overlayEncap EncapType +} + +func NewOverlayTunnel(krNode utils.NodeIPAndFamilyAware, overlayEncap EncapType, + overlayEncapPort EncapPort) *OverlayTunnel { + return &OverlayTunnel{ + krNode: krNode, + overlayEncapPort: overlayEncapPort, + overlayEncap: overlayEncap, + } +} + +func (o *OverlayTunnel) EncapType() EncapType { + return o.overlayEncap +} + +func (o *OverlayTunnel) EncapPort() EncapPort { + return o.overlayEncapPort +} + +// setupOverlayTunnel attempts to create a tunnel link and corresponding routes for IPIP based overlay networks +func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, + nextHopSubnet *net.IPNet) (netlink.Link, error) { + var out []byte + link, err := netlink.LinkByName(tunnelName) + + var bestIPForFamily net.IP + var ipipMode, fouLinkType string + isIPv6 := false + ipBase := make([]string, 0) + strFormattedEncapPort := strconv.FormatInt(int64(o.overlayEncapPort), 10) + + if nextHop.To4() != nil { + bestIPForFamily = o.krNode.FindBestIPv4NodeAddress() + ipipMode = ipipIPv4Mode + fouLinkType = fouIPv4LinkMode + } else { + // Need to activate the ip command in IPv6 mode + ipBase = append(ipBase, "-6") + bestIPForFamily = o.krNode.FindBestIPv6NodeAddress() + ipipMode = ipipIPv6Mode + fouLinkType = fouIPv6LinkMode + isIPv6 = true + } + if nil == bestIPForFamily { + return nil, fmt.Errorf("not able to find an appropriate configured IP address on node for destination "+ + "IP family: %s", nextHop.String()) + } + + // This indicated that the tunnel already exists, so it's possible that there might be nothing more needed. However, + // it is also possible that the user changed the encap type, so we need to make sure that the encap type matches + // and if it doesn't, create it + recreate := false + if err == nil { + klog.V(1).Infof("Tunnel interface: %s with encap type %s for the node %s already exists.", + tunnelName, link.Attrs().EncapType, nextHop.String()) + + switch o.overlayEncap { + case EncapTypeIPIP: + if linkFOUEnabled(tunnelName) { + klog.Infof("Was configured to use ipip tunnels, but found existing fou tunnels in place, cleaning up") + recreate = true + + // Even though we are setup for IPIP tunels we have existing tunnels that are FoU tunnels, remove them + // so that we can recreate them as IPIP + CleanupTunnel(nextHopSubnet, tunnelName) + + // If we are transitioning from FoU to IPIP we also need to clean up the old FoU port if it exists + if fouPortAndProtoExist(o.overlayEncapPort, isIPv6) { + fouArgs := ipBase + fouArgs = append(fouArgs, "fou", "del", "port", strFormattedEncapPort) + out, err := exec.Command("ip", fouArgs...).CombinedOutput() + if err != nil { + klog.Warningf("failed to clean up previous FoU tunnel port (this is only a warning because it "+ + "won't stop kube-router from working for now, but still shouldn't have happened) - error: "+ + "%v, output %s", err, out) + } + } + } + case EncapTypeFOU: + if !linkFOUEnabled(tunnelName) { + klog.Infof("Was configured to use fou tunnels, but found existing ipip tunnels in place, cleaning up") + recreate = true + // Even though we are setup for FoU tunels we have existing tunnels that are IPIP tunnels, remove them + // so that we can recreate them as IPIP + CleanupTunnel(nextHopSubnet, tunnelName) + } + } + } + + // an error here indicates that the tunnel didn't exist, so we need to create it, if it already exists there's + // nothing to do here + if err != nil || recreate { + klog.Infof("Creating tunnel %s of type %s with encap %s for destination %s", + tunnelName, fouLinkType, o.overlayEncap, nextHop.String()) + cmdArgs := ipBase + switch o.overlayEncap { + case EncapTypeIPIP: + // Plain IPIP tunnel without any encapsulation + cmdArgs = append(cmdArgs, "tunnel", "add", tunnelName, "mode", ipipMode, "local", bestIPForFamily.String(), + "remote", nextHop.String()) + + case EncapTypeFOU: + // Ensure that the FOU tunnel port is set correctly + if !fouPortAndProtoExist(o.overlayEncapPort, isIPv6) { + fouArgs := ipBase + fouArgs = append(fouArgs, "fou", "add", "port", strFormattedEncapPort, "gue") + out, err := exec.Command("ip", fouArgs...).CombinedOutput() + if err != nil { + //nolint:goconst // don't need to make error messages a constant + return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ + "Failed to set FoU tunnel port - error: %s, output: %s", tunnelName, err, string(out)) + } + } + + // Prep IPIP tunnel for FOU encapsulation + cmdArgs = append(cmdArgs, "link", "add", "name", tunnelName, "type", fouLinkType, "remote", nextHop.String(), + "local", bestIPForFamily.String(), "ttl", "225", "encap", "gue", "encap-sport", "auto", "encap-dport", + strFormattedEncapPort, "mode", ipipMode) + + default: + return nil, fmt.Errorf("unknown tunnel encapsulation was passed: %s, unable to continue with overlay "+ + "setup", o.overlayEncap) + } + + klog.V(2).Infof("Executing the following command to create tunnel: ip %s", cmdArgs) + out, err := exec.Command("ip", cmdArgs...).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ + "Failed to create tunnel interface %s. error: %s, output: %s", + nextHop, tunnelName, err, string(out)) + } + + link, err = netlink.LinkByName(tunnelName) + if err != nil { + return nil, fmt.Errorf("route not injected for the route advertised by the node %s "+ + "Failed to get tunnel interface by name error: %s", tunnelName, err) + } + if err = netlink.LinkSetUp(link); err != nil { + return nil, fmt.Errorf("failed to bring tunnel interface %s up due to: %v", tunnelName, err) + } + } + + // Now that the tunnel link exists, we need to add a route to it, so the node knows where to send traffic bound for + // this interface + //nolint:gocritic // we understand that we are appending to a new slice + cmdArgs := append(ipBase, "route", "list", "table", routes.CustomTableID) + out, err = exec.Command("ip", cmdArgs...).CombinedOutput() + // This used to be "dev "+tunnelName+" scope" but this isn't consistent with IPv6's output, so we changed it to just + // "dev "+tunnelName, but at this point I'm unsure if there was a good reason for adding scope on before, so that's + // why this comment is here. + if err != nil || !strings.Contains(string(out), "dev "+tunnelName) { + //nolint:gocritic // we understand that we are appending to a new slice + cmdArgs = append(ipBase, "route", "add", nextHop.String(), "dev", tunnelName, "table", routes.CustomTableID) + if out, err = exec.Command("ip", cmdArgs...).CombinedOutput(); err != nil { + return nil, fmt.Errorf("failed to add route in custom route table, err: %s, output: %s", err, string(out)) + } + } + + return link, nil +} + +// cleanupTunnel removes any traces of tunnels / routes that were setup by nrc.setupOverlayTunnel() and are no longer +// needed. All errors are logged only, as we want to attempt to perform all cleanup actions regardless of their success +func CleanupTunnel(destinationSubnet *net.IPNet, tunnelName string) { + klog.V(1).Infof("Cleaning up old routes for %s if there are any", destinationSubnet.String()) + if err := routes.DeleteByDestination(destinationSubnet); err != nil { + klog.Errorf("Failed to cleanup routes: %v", err) + } + + klog.V(1).Infof("Cleaning up any lingering tunnel interfaces named: %s", tunnelName) + if link, err := netlink.LinkByName(tunnelName); err == nil { + if err = netlink.LinkDel(link); err != nil { + klog.Errorf("Failed to delete tunnel link for the node due to " + err.Error()) + } + } +} + +// GenerateTunnelName will generate a name for a tunnel interface given a node IP +// Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing +// non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both +// long IPv4 addresses and much longer IPv6 addresses. +func GenerateTunnelName(nodeIP string) string { + // remove dots from an IPv4 address + strippedIP := strings.ReplaceAll(nodeIP, ".", "") + // remove colons from an IPv6 address + strippedIP = strings.ReplaceAll(strippedIP, ":", "") + + h := sha256.New() + h.Write([]byte(strippedIP)) + sum := h.Sum(nil) + + return "tun-" + fmt.Sprintf("%x", sum)[0:11] +} + +// fouPortAndProtoExist checks to see if the given FoU port is already configured on the system via iproute2 +// tooling for the given protocol +// +// fou show, shows both IPv4 and IPv6 ports in the same show command, they look like: +// port 5556 gue +// port 5556 gue -6 +// where the only thing that distinguishes them is the -6 or not on the end +// WARNING we're parsing a CLI tool here not an API, this may break at some point in the future +func fouPortAndProtoExist(port EncapPort, isIPv6 bool) bool { + const ipRoute2IPv6Prefix = "-6" + strPort := strconv.FormatInt(int64(port), 10) + fouArgs := make([]string, 0) + klog.V(2).Infof("Checking FOU Port and Proto... %s - %t", strPort, isIPv6) + + if isIPv6 { + fouArgs = append(fouArgs, ipRoute2IPv6Prefix) + } + fouArgs = append(fouArgs, "fou", "show") + + out, err := exec.Command("ip", fouArgs...).CombinedOutput() + // iproute2 returns an error if no fou configuration exists + if err != nil { + return false + } + + strOut := string(out) + klog.V(2).Infof("Combined output of ip fou show: %s", strOut) + scanner := bufio.NewScanner(strings.NewReader(strOut)) + + // loop over all lines of output + for scanner.Scan() { + scannedLine := scanner.Text() + // if the output doesn't contain our port at all, then continue + if !strings.Contains(scannedLine, strPort) { + continue + } + + // if this is IPv6 port and it has the correct IPv6 suffix (see example above) then return true + if isIPv6 && strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { + return true + } + + // if this is not IPv6 and it does not have an IPv6 suffix (see example above) then return true + if !isIPv6 && !strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { + return true + } + } + + return false +} + +// linkFOUEnabled checks to see whether the given link has FoU (Foo over Ethernet) enabled on it, specifically since +// kube-router only works with GUE (Generic UDP Encapsulation) we look for that and not just FoU in general. If the +// linkName is enabled with FoU GUE then we return true, otherwise false +// +// Output for a FoU Enabled GUE tunnel looks like: +// ipip ipip remote local dev ttl 225 pmtudisc encap gue encap-sport auto encap-dport 5555 ... +// Output for a normal IPIP tunnel looks like: +// ipip ipip remote local dev ttl inherit ... +func linkFOUEnabled(linkName string) bool { + const fouEncapEnabled = "encap gue" + cmdArgs := []string{"-details", "link", "show", linkName} + + out, err := exec.Command("ip", cmdArgs...).CombinedOutput() + + if err != nil { + klog.Warningf("recevied an error while trying to look at the link details of %s, this shouldn't have happened", + linkName) + return false + } + + if strings.Contains(string(out), fouEncapEnabled) { + return true + } + + return false +} diff --git a/pkg/tunnels/linux_tunnels_test.go b/pkg/tunnels/linux_tunnels_test.go new file mode 100644 index 000000000..24e2a2458 --- /dev/null +++ b/pkg/tunnels/linux_tunnels_test.go @@ -0,0 +1,40 @@ +package tunnels + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_GenerateTunnelName(t *testing.T) { + testcases := []struct { + name string + nodeIP string + tunnelName string + }{ + { + "IP less than 12 characters after removing '.'", + "10.0.0.1", + "tun-e443169117a", + }, + { + "IP has 12 characters after removing '.'", + "100.200.300.400", + "tun-9033d7906c7", + }, + { + "IPv6 tunnel names are properly handled and consistent", + "2001:db8:42:2::/64", + "tun-ba56986ef05", + }, + } + + for _, testcase := range testcases { + t.Run(testcase.name, func(t *testing.T) { + tunnelName := GenerateTunnelName(testcase.nodeIP) + assert.Lessf(t, len(tunnelName), 16, "the maximum length of the tunnel name should never exceed"+ + "15 characters as 16 characters is the maximum length of a Unix interface name") + assert.Equal(t, testcase.tunnelName, tunnelName, "did not get expected tunnel interface name") + }) + } +} From 463a514a3dfc8e640416ed515885379f208edd39 Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Wed, 2 Oct 2024 16:46:02 -0500 Subject: [PATCH 4/6] fact(bgp): separate bgp functionality into new package --- pkg/bgp/id.go | 67 +++++++++++ pkg/bgp/id_test.go | 39 ++++++ pkg/bgp/parse.go | 63 ++++++++++ .../routing/network_routes_controller.go | 17 ++- pkg/controllers/routing/utils.go | 111 +----------------- pkg/controllers/routing/utils_test.go | 32 ----- 6 files changed, 178 insertions(+), 151 deletions(-) create mode 100644 pkg/bgp/id.go create mode 100644 pkg/bgp/id_test.go create mode 100644 pkg/bgp/parse.go diff --git a/pkg/bgp/id.go b/pkg/bgp/id.go new file mode 100644 index 000000000..3d3a0a3d7 --- /dev/null +++ b/pkg/bgp/id.go @@ -0,0 +1,67 @@ +package bgp + +import ( + "encoding/binary" + "errors" + "fmt" + "hash/fnv" + "net" + "regexp" + "strconv" + + "github.com/cloudnativelabs/kube-router/v2/pkg/utils" + gobgp "github.com/osrg/gobgp/v3/pkg/packet/bgp" +) + +const ( + CommunityMaxSize = 32 + CommunityMaxPartSize = 16 +) + +// GenerateRouterID will generate a router ID based upon the user's configuration (or lack there of) and the node's +// primary IP address if the user has not specified. If the user has configured the router ID as "generate" then we +// will generate a router ID based upon fnv hashing the node's primary IP address. +func GenerateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (string, error) { + switch { + case configRouterID == "generate": + h := fnv.New32a() + h.Write(nodeIPAware.GetPrimaryNodeIP()) + hs := h.Sum32() + gip := make(net.IP, 4) + binary.BigEndian.PutUint32(gip, hs) + return gip.String(), nil + case configRouterID != "": + return configRouterID, nil + } + + if nodeIPAware.GetPrimaryNodeIP().To4() == nil { + return "", errors.New("router-id must be specified when primary node IP is an IPv6 address") + } + return configRouterID, nil +} + +// ValidateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to +// gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it +// returns an error. +func ValidateCommunity(arg string) error { + _, err := strconv.ParseUint(arg, 10, CommunityMaxSize) + if err == nil { + return nil + } + + _regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`) + elems := _regexpCommunity.FindStringSubmatch(arg) + if len(elems) == 3 { + if _, err := strconv.ParseUint(elems[1], 10, CommunityMaxPartSize); err == nil { + if _, err = strconv.ParseUint(elems[2], 10, CommunityMaxPartSize); err == nil { + return nil + } + } + } + for _, v := range gobgp.WellKnownCommunityNameMap { + if arg == v { + return nil + } + } + return fmt.Errorf("failed to parse %s as community", arg) +} diff --git a/pkg/bgp/id_test.go b/pkg/bgp/id_test.go new file mode 100644 index 000000000..e2de4a992 --- /dev/null +++ b/pkg/bgp/id_test.go @@ -0,0 +1,39 @@ +package bgp + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_ValidateCommunity(t *testing.T) { + t.Run("BGP community specified as a 32-bit integer should pass validation", func(t *testing.T) { + assert.Nil(t, ValidateCommunity("4294967041")) + assert.Nil(t, ValidateCommunity("4294967295")) + }) + t.Run("BGP community specified as 2 16-bit integers should pass validation", func(t *testing.T) { + assert.Nil(t, ValidateCommunity("65535:65281")) + assert.Nil(t, ValidateCommunity("65535:65535")) + }) + t.Run("Well known BGP communities passed as a string should pass validation", func(t *testing.T) { + assert.Nil(t, ValidateCommunity("no-export")) + assert.Nil(t, ValidateCommunity("internet")) + assert.Nil(t, ValidateCommunity("planned-shut")) + assert.Nil(t, ValidateCommunity("accept-own")) + assert.Nil(t, ValidateCommunity("blackhole")) + assert.Nil(t, ValidateCommunity("no-advertise")) + assert.Nil(t, ValidateCommunity("no-peer")) + }) + t.Run("BGP community that is greater than 32-bit integer should fail validation", func(t *testing.T) { + assert.Error(t, ValidateCommunity("4294967296")) + }) + t.Run("BGP community that is greater than 2 16-bit integers should fail validation", func(t *testing.T) { + assert.Error(t, ValidateCommunity("65536:65535")) + assert.Error(t, ValidateCommunity("65535:65536")) + assert.Error(t, ValidateCommunity("65536:65536")) + }) + t.Run("BGP community that is not a number should fail validation", func(t *testing.T) { + assert.Error(t, ValidateCommunity("0xFFFFFFFF")) + assert.Error(t, ValidateCommunity("community")) + }) +} diff --git a/pkg/bgp/parse.go b/pkg/bgp/parse.go new file mode 100644 index 000000000..9ec2ddff4 --- /dev/null +++ b/pkg/bgp/parse.go @@ -0,0 +1,63 @@ +package bgp + +import ( + "fmt" + "net" + + gobgpapi "github.com/osrg/gobgp/v3/api" + "github.com/vishvananda/netlink" +) + +// ParseNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it +// can't parse a next hop IP from the GoBGP Path, it returns an error. +func ParseNextHop(path *gobgpapi.Path) (net.IP, error) { + for _, pAttr := range path.GetPattrs() { + unmarshalNew, err := pAttr.UnmarshalNew() + if err != nil { + return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err) + } + switch t := unmarshalNew.(type) { + case *gobgpapi.NextHopAttribute: + // This is the primary way that we receive NextHops and happens when both the client and the server exchange + // next hops on the same IP family that they negotiated BGP on + nextHopIP := net.ParseIP(t.NextHop) + if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { + return nextHopIP, nil + } + return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop) + case *gobgpapi.MpReachNLRIAttribute: + // in the case where the server and the client are exchanging next-hops that don't relate to their primary + // IP family, we get MpReachNLRIAttribute instead of NextHopAttributes + // TODO: here we only take the first next hop, at some point in the future it would probably be best to + // consider multiple next hops + nextHopIP := net.ParseIP(t.NextHops[0]) + if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { + return nextHopIP, nil + } + return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0]) + } + } + return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path) +} + +// ParsePath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes. +// If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it +// can't parse the destination or the next hop IP, it returns an error. +func ParsePath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { + nextHop, err := ParseNextHop(path) + if err != nil { + return nil, nil, err + } + + nlri := path.GetNlri() + var prefix gobgpapi.IPAddressPrefix + err = nlri.UnmarshalTo(&prefix) + if err != nil { + return nil, nil, fmt.Errorf("invalid nlri in advertised path") + } + dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen)) + if err != nil { + return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path") + } + return dstSubnet, nextHop, nil +} diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index 758976f68..db2478898 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -14,6 +14,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" + "github.com/cloudnativelabs/kube-router/v2/pkg/bgp" "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" "github.com/cloudnativelabs/kube-router/v2/pkg/options" @@ -62,12 +63,10 @@ const ( ClusterIPST = "ClusterIP" NodePortST = "NodePort" - prependPathMaxBits = 8 - asnMaxBitSize = 32 - bgpCommunityMaxSize = 32 - bgpCommunityMaxPartSize = 16 - routeReflectorMaxID = 32 - ipv4MaskMinBits = 32 + prependPathMaxBits = 8 + asnMaxBitSize = 32 + routeReflectorMaxID = 32 + ipv4MaskMinBits = 32 ) // NetworkRoutingController is struct to hold necessary information required by controller @@ -567,7 +566,7 @@ func (nrc *NetworkRoutingController) injectRoute(path *gobgpapi.Path) error { var route *netlink.Route var link netlink.Link - dst, nextHop, err := parseBGPPath(path) + dst, nextHop, err := bgp.ParsePath(path) if err != nil { return err } @@ -965,7 +964,7 @@ func (nrc *NetworkRoutingController) startBgpServer(grpcServer bool) error { } else { nodeCommunities = stringToSlice(nodeBGPCommunitiesAnnotation, ",") for _, nodeCommunity := range nodeCommunities { - if err = validateCommunity(nodeCommunity); err != nil { + if err = bgp.ValidateCommunity(nodeCommunity); err != nil { klog.Warningf("cannot add BGP community '%s' from node annotation as it does not appear "+ "to be a valid community identifier", nodeCommunity) continue @@ -1302,7 +1301,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface, } } - nrc.routerID, err = generateRouterID(nrc.krNode, kubeRouterConfig.RouterID) + nrc.routerID, err = bgp.GenerateRouterID(nrc.krNode, kubeRouterConfig.RouterID) if err != nil { return nil, err } diff --git a/pkg/controllers/routing/utils.go b/pkg/controllers/routing/utils.go index 79bff78ef..5c904afe5 100644 --- a/pkg/controllers/routing/utils.go +++ b/pkg/controllers/routing/utils.go @@ -2,22 +2,15 @@ package routing import ( "encoding/base64" - "encoding/binary" - "errors" "fmt" - "hash/fnv" "net" - "regexp" "strconv" "strings" - "github.com/cloudnativelabs/kube-router/v2/pkg/utils" gobgpapi "github.com/osrg/gobgp/v3/api" - "github.com/osrg/gobgp/v3/pkg/packet/bgp" + v1core "k8s.io/api/core/v1" "k8s.io/klog/v2" - - "github.com/vishvananda/netlink" ) // Used for processing Annotations that may contain multiple items @@ -117,108 +110,6 @@ func statementsEqualByName(a, b []*gobgpapi.Statement) bool { return true } -// generateRouterID will generate a router ID based upon the user's configuration (or lack there of) and the node's -// primary IP address if the user has not specified. If the user has configured the router ID as "generate" then we -// will generate a router ID based upon fnv hashing the node's primary IP address. -func generateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (string, error) { - switch { - case configRouterID == "generate": - h := fnv.New32a() - h.Write(nodeIPAware.GetPrimaryNodeIP()) - hs := h.Sum32() - gip := make(net.IP, 4) - binary.BigEndian.PutUint32(gip, hs) - return gip.String(), nil - case configRouterID != "": - return configRouterID, nil - } - - if nodeIPAware.GetPrimaryNodeIP().To4() == nil { - return "", errors.New("router-id must be specified when primary node IP is an IPv6 address") - } - return configRouterID, nil -} - -// validateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to -// gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it -// returns an error. -func validateCommunity(arg string) error { - _, err := strconv.ParseUint(arg, 10, bgpCommunityMaxSize) - if err == nil { - return nil - } - - _regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`) - elems := _regexpCommunity.FindStringSubmatch(arg) - if len(elems) == 3 { - if _, err := strconv.ParseUint(elems[1], 10, bgpCommunityMaxPartSize); err == nil { - if _, err = strconv.ParseUint(elems[2], 10, bgpCommunityMaxPartSize); err == nil { - return nil - } - } - } - for _, v := range bgp.WellKnownCommunityNameMap { - if arg == v { - return nil - } - } - return fmt.Errorf("failed to parse %s as community", arg) -} - -// parseBGPNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it -// can't parse a next hop IP from the GoBGP Path, it returns an error. -func parseBGPNextHop(path *gobgpapi.Path) (net.IP, error) { - for _, pAttr := range path.GetPattrs() { - unmarshalNew, err := pAttr.UnmarshalNew() - if err != nil { - return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err) - } - switch t := unmarshalNew.(type) { - case *gobgpapi.NextHopAttribute: - // This is the primary way that we receive NextHops and happens when both the client and the server exchange - // next hops on the same IP family that they negotiated BGP on - nextHopIP := net.ParseIP(t.NextHop) - if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { - return nextHopIP, nil - } - return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop) - case *gobgpapi.MpReachNLRIAttribute: - // in the case where the server and the client are exchanging next-hops that don't relate to their primary - // IP family, we get MpReachNLRIAttribute instead of NextHopAttributes - // TODO: here we only take the first next hop, at some point in the future it would probably be best to - // consider multiple next hops - nextHopIP := net.ParseIP(t.NextHops[0]) - if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { - return nextHopIP, nil - } - return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0]) - } - } - return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path) -} - -// parseBGPPath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes. -// If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it -// can't parse the destination or the next hop IP, it returns an error. -func parseBGPPath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { - nextHop, err := parseBGPNextHop(path) - if err != nil { - return nil, nil, err - } - - nlri := path.GetNlri() - var prefix gobgpapi.IPAddressPrefix - err = nlri.UnmarshalTo(&prefix) - if err != nil { - return nil, nil, fmt.Errorf("invalid nlri in advertised path") - } - dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen)) - if err != nil { - return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path") - } - return dstSubnet, nextHop, nil -} - // getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The // order of preference is: // 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility) diff --git a/pkg/controllers/routing/utils_test.go b/pkg/controllers/routing/utils_test.go index aa7c34541..38891f830 100644 --- a/pkg/controllers/routing/utils_test.go +++ b/pkg/controllers/routing/utils_test.go @@ -50,35 +50,3 @@ func Test_stringSliceToIPNets(t *testing.T) { assert.Nil(t, ips) }) } - -func Test_validateCommunity(t *testing.T) { - t.Run("BGP community specified as a 32-bit integer should pass validation", func(t *testing.T) { - assert.Nil(t, validateCommunity("4294967041")) - assert.Nil(t, validateCommunity("4294967295")) - }) - t.Run("BGP community specified as 2 16-bit integers should pass validation", func(t *testing.T) { - assert.Nil(t, validateCommunity("65535:65281")) - assert.Nil(t, validateCommunity("65535:65535")) - }) - t.Run("Well known BGP communities passed as a string should pass validation", func(t *testing.T) { - assert.Nil(t, validateCommunity("no-export")) - assert.Nil(t, validateCommunity("internet")) - assert.Nil(t, validateCommunity("planned-shut")) - assert.Nil(t, validateCommunity("accept-own")) - assert.Nil(t, validateCommunity("blackhole")) - assert.Nil(t, validateCommunity("no-advertise")) - assert.Nil(t, validateCommunity("no-peer")) - }) - t.Run("BGP community that is greater than 32-bit integer should fail validation", func(t *testing.T) { - assert.Error(t, validateCommunity("4294967296")) - }) - t.Run("BGP community that is greater than 2 16-bit integers should fail validation", func(t *testing.T) { - assert.Error(t, validateCommunity("65536:65535")) - assert.Error(t, validateCommunity("65535:65536")) - assert.Error(t, validateCommunity("65536:65536")) - }) - t.Run("BGP community that is not a number should fail validation", func(t *testing.T) { - assert.Error(t, validateCommunity("0xFFFFFFFF")) - assert.Error(t, validateCommunity("community")) - }) -} From 2a01df3888c8907ba5490137707172ca5d5cca2f Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Tue, 8 Oct 2024 15:33:23 -0500 Subject: [PATCH 5/6] fix(tunnels): fix encap port validation Make new logic similar to previous logic and remove superfluous check Co-authored-by: Tom Wieczorek --- pkg/tunnels/linux_tunnels.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tunnels/linux_tunnels.go b/pkg/tunnels/linux_tunnels.go index 37f98b08d..241644ece 100644 --- a/pkg/tunnels/linux_tunnels.go +++ b/pkg/tunnels/linux_tunnels.go @@ -70,7 +70,7 @@ func ParseEncapType(encapType string) (EncapType, error) { type EncapPort uint16 func (e EncapPort) checkWithinRange() error { - if uint16(e) > minPort && uint16(e) < maxPort { + if uint16(e) >= minPort { return nil } return fmt.Errorf("specified encap port is out of range of valid ports: %d, valid range is from %d to %d", From 67e6d8e821628d90550bd01da21f2d205cca267a Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Tue, 8 Oct 2024 15:47:42 -0500 Subject: [PATCH 6/6] fix: add PR review suggestions --- pkg/bgp/id.go | 11 ++-- .../routing/network_routes_controller.go | 28 ++++++--- pkg/routes/pbr.go | 23 ++++---- pkg/routes/route_sync.go | 8 --- pkg/tunnels/linux_tunnels.go | 59 ++++++++----------- 5 files changed, 62 insertions(+), 67 deletions(-) diff --git a/pkg/bgp/id.go b/pkg/bgp/id.go index 3d3a0a3d7..3396fe714 100644 --- a/pkg/bgp/id.go +++ b/pkg/bgp/id.go @@ -6,8 +6,8 @@ import ( "fmt" "hash/fnv" "net" - "regexp" "strconv" + "strings" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" gobgp "github.com/osrg/gobgp/v3/pkg/packet/bgp" @@ -49,11 +49,10 @@ func ValidateCommunity(arg string) error { return nil } - _regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`) - elems := _regexpCommunity.FindStringSubmatch(arg) - if len(elems) == 3 { - if _, err := strconv.ParseUint(elems[1], 10, CommunityMaxPartSize); err == nil { - if _, err = strconv.ParseUint(elems[2], 10, CommunityMaxPartSize); err == nil { + elem1, elem2, found := strings.Cut(arg, ":") + if found { + if _, err := strconv.ParseUint(elem1, 10, CommunityMaxPartSize); err == nil { + if _, err = strconv.ParseUint(elem2, 10, CommunityMaxPartSize); err == nil { return nil } } diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index db2478898..9bb69a670 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -69,6 +69,20 @@ const ( ipv4MaskMinBits = 32 ) +// RouteSyncer is an interface that defines the methods needed to sync routes to the kernel's routing table +type RouteSyncer interface { + AddInjectedRoute(dst *net.IPNet, route *netlink.Route) + DelInjectedRoute(dst *net.IPNet) + Run(stopCh <-chan struct{}, wg *sync.WaitGroup) + SyncLocalRouteTable() +} + +// PolicyBasedRouting is an interface that defines the methods needed to enable/disable policy based routing +type PolicyBasedRouter interface { + Enable() error + Disable() error +} + // NetworkRoutingController is struct to hold necessary information required by controller type NetworkRoutingController struct { krNode utils.NodeAware @@ -124,8 +138,8 @@ type NetworkRoutingController struct { podIPv6CIDRs []string CNIFirewallSetup *sync.Cond ipsetMutex *sync.Mutex - routeSyncer routes.RouteSyncer - pbr routes.PBRer + routeSyncer RouteSyncer + pbr PolicyBasedRouter tunneler tunnels.Tunneler nodeLister cache.Indexer @@ -165,13 +179,13 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll nrc.CNIFirewallSetup.Broadcast() - nrc.pbr = routes.NewPBR(nrc.krNode, nrc.podIPv4CIDRs, nrc.podIPv6CIDRs) + nrc.pbr = routes.NewPolicyBasedRules(nrc.krNode, nrc.podIPv4CIDRs, nrc.podIPv6CIDRs) // Handle ipip tunnel overlay if nrc.enableOverlays { klog.V(1).Info("Tunnel Overlay enabled in configuration.") klog.V(1).Info("Setting up overlay networking.") - err = nrc.pbr.EnablePolicyBasedRouting() + err = nrc.pbr.Enable() if err != nil { klog.Errorf("Failed to enable required policy based routing: %s", err.Error()) } @@ -187,7 +201,7 @@ func (nrc *NetworkRoutingController) Run(healthChan chan<- *healthcheck.Controll } else { klog.V(1).Info("Tunnel Overlay disabled in configuration.") klog.V(1).Info("Cleaning up old overlay networking if needed.") - err = nrc.pbr.DisablePolicyBasedRouting() + err = nrc.pbr.Disable() if err != nil { klog.Errorf("Failed to disable policy based routing: %s", err.Error()) } @@ -1365,8 +1379,8 @@ func NewNetworkRoutingController(clientset kubernetes.Interface, nrc.autoMTU = kubeRouterConfig.AutoMTU nrc.enableOverlays = kubeRouterConfig.EnableOverlay nrc.overlayType = kubeRouterConfig.OverlayType - overlayEncap, err := tunnels.ParseEncapType(kubeRouterConfig.OverlayEncap) - if err != nil { + overlayEncap, ok := tunnels.ParseEncapType(kubeRouterConfig.OverlayEncap) + if !ok { return nil, fmt.Errorf("unknown --overlay-encap option '%s' selected, unable to continue", overlayEncap) } overlayEncapPort, err := tunnels.ParseEncapPort(kubeRouterConfig.OverlayEncapPort) diff --git a/pkg/routes/pbr.go b/pkg/routes/pbr.go index 2abf9429f..50b913b64 100644 --- a/pkg/routes/pbr.go +++ b/pkg/routes/pbr.go @@ -15,20 +15,16 @@ const ( CustomTableName = "kube-router" ) -type PBR struct { +// PolicyBasedRules is a struct that holds all of the information needed for manipulating policy based routing rules +type PolicyBasedRules struct { nfa utils.NodeFamilyAware podIPv4CIDRs []string podIPv6CIDRs []string } -type PBRer interface { - EnablePolicyBasedRouting() error - DisablePolicyBasedRouting() error -} - -// NewPBR creates a new PBR object which will be used to manipulate policy based routing rules -func NewPBR(nfa utils.NodeFamilyAware, podIPv4CIDRs, podIPv6CIDRs []string) *PBR { - return &PBR{ +// NewPolicyBasedRules creates a new PBR object which will be used to manipulate policy based routing rules +func NewPolicyBasedRules(nfa utils.NodeFamilyAware, podIPv4CIDRs, podIPv6CIDRs []string) *PolicyBasedRules { + return &PolicyBasedRules{ nfa: nfa, podIPv4CIDRs: podIPv4CIDRs, podIPv6CIDRs: podIPv6CIDRs, @@ -60,9 +56,9 @@ func ipRuleAbstraction(ipProtocol, ipOp, cidr string) error { return nil } -// setup a custom routing table that will be used for policy based routing to ensure traffic originating -// on tunnel interface only leaves through tunnel interface irrespective rp_filter enabled/disabled -func (pbr *PBR) EnablePolicyBasedRouting() error { +// Enable setup a custom routing table that will be used for policy based routing to ensure traffic +// originating on tunnel interface only leaves through tunnel interface irrespective rp_filter enabled/disabled +func (pbr *PolicyBasedRules) Enable() error { err := utils.RouteTableAdd(CustomTableID, CustomTableName) if err != nil { return fmt.Errorf("failed to update rt_tables file: %s", err) @@ -86,7 +82,8 @@ func (pbr *PBR) EnablePolicyBasedRouting() error { return nil } -func (pbr *PBR) DisablePolicyBasedRouting() error { +// Disable removes the custom routing table that was used for policy based routing +func (pbr *PolicyBasedRules) Disable() error { err := utils.RouteTableAdd(CustomTableID, CustomTableName) if err != nil { return fmt.Errorf("failed to update rt_tables file: %s", err) diff --git a/pkg/routes/route_sync.go b/pkg/routes/route_sync.go index 2da0061a3..6a5ded040 100644 --- a/pkg/routes/route_sync.go +++ b/pkg/routes/route_sync.go @@ -9,14 +9,6 @@ import ( "k8s.io/klog/v2" ) -// RouteSyncer is an interface that defines the methods needed to sync routes to the kernel's routing table -type RouteSyncer interface { - AddInjectedRoute(dst *net.IPNet, route *netlink.Route) - DelInjectedRoute(dst *net.IPNet) - Run(stopCh <-chan struct{}, wg *sync.WaitGroup) - SyncLocalRouteTable() -} - // RouteSync is a struct that holds all of the information needed for syncing routes to the kernel's routing table type RouteSync struct { routeTableStateMap map[string]*netlink.Route diff --git a/pkg/tunnels/linux_tunnels.go b/pkg/tunnels/linux_tunnels.go index 241644ece..1da4f3d58 100644 --- a/pkg/tunnels/linux_tunnels.go +++ b/pkg/tunnels/linux_tunnels.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "os/exec" + "slices" "strconv" "strings" @@ -39,17 +40,6 @@ var ( // EncapType represents the type of encapsulation used for an overlay tunnel in kube-router. type EncapType string -// IsValid checks if the encapsulation type is valid by comparing it against a list of valid types. -// It returns true if the encapsulation type is valid, otherwise it returns false. -func (e EncapType) IsValid() bool { - for _, validType := range validEncapTypes { - if e == validType { - return true - } - } - return false -} - // ParseEncapType parses the given string and returns an Encap type if valid. // It returns an error if the encapsulation type is invalid. // @@ -58,13 +48,13 @@ func (e EncapType) IsValid() bool { // // Returns: // - Encap: The parsed encapsulation type. -// - error: An error if the encapsulation type is invalid. -func ParseEncapType(encapType string) (EncapType, error) { +// - bool: A boolean indicating whether the encapsulation type is valid. +func ParseEncapType(encapType string) (EncapType, bool) { encap := EncapType(encapType) - if !encap.IsValid() { - return "", fmt.Errorf("invalid encapsulation type: %s", encapType) + if !slices.Contains(validEncapTypes, encap) { + return "", false } - return encap, nil + return encap, true } type EncapPort uint16 @@ -92,26 +82,25 @@ type Tunneler interface { } type OverlayTunnel struct { - krNode utils.NodeIPAndFamilyAware - overlayEncapPort EncapPort - overlayEncap EncapType + krNode utils.NodeIPAware + encapPort EncapPort + encapType EncapType } -func NewOverlayTunnel(krNode utils.NodeIPAndFamilyAware, overlayEncap EncapType, - overlayEncapPort EncapPort) *OverlayTunnel { +func NewOverlayTunnel(krNode utils.NodeIPAware, encapType EncapType, encapPort EncapPort) *OverlayTunnel { return &OverlayTunnel{ - krNode: krNode, - overlayEncapPort: overlayEncapPort, - overlayEncap: overlayEncap, + krNode: krNode, + encapPort: encapPort, + encapType: encapType, } } func (o *OverlayTunnel) EncapType() EncapType { - return o.overlayEncap + return o.encapType } func (o *OverlayTunnel) EncapPort() EncapPort { - return o.overlayEncapPort + return o.encapPort } // setupOverlayTunnel attempts to create a tunnel link and corresponding routes for IPIP based overlay networks @@ -124,7 +113,7 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, var ipipMode, fouLinkType string isIPv6 := false ipBase := make([]string, 0) - strFormattedEncapPort := strconv.FormatInt(int64(o.overlayEncapPort), 10) + strFormattedEncapPort := strconv.FormatInt(int64(o.encapPort), 10) if nextHop.To4() != nil { bestIPForFamily = o.krNode.FindBestIPv4NodeAddress() @@ -151,7 +140,7 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, klog.V(1).Infof("Tunnel interface: %s with encap type %s for the node %s already exists.", tunnelName, link.Attrs().EncapType, nextHop.String()) - switch o.overlayEncap { + switch o.encapType { case EncapTypeIPIP: if linkFOUEnabled(tunnelName) { klog.Infof("Was configured to use ipip tunnels, but found existing fou tunnels in place, cleaning up") @@ -162,7 +151,7 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, CleanupTunnel(nextHopSubnet, tunnelName) // If we are transitioning from FoU to IPIP we also need to clean up the old FoU port if it exists - if fouPortAndProtoExist(o.overlayEncapPort, isIPv6) { + if fouPortAndProtoExist(o.encapPort, isIPv6) { fouArgs := ipBase fouArgs = append(fouArgs, "fou", "del", "port", strFormattedEncapPort) out, err := exec.Command("ip", fouArgs...).CombinedOutput() @@ -188,9 +177,9 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, // nothing to do here if err != nil || recreate { klog.Infof("Creating tunnel %s of type %s with encap %s for destination %s", - tunnelName, fouLinkType, o.overlayEncap, nextHop.String()) + tunnelName, fouLinkType, o.encapType, nextHop.String()) cmdArgs := ipBase - switch o.overlayEncap { + switch o.encapType { case EncapTypeIPIP: // Plain IPIP tunnel without any encapsulation cmdArgs = append(cmdArgs, "tunnel", "add", tunnelName, "mode", ipipMode, "local", bestIPForFamily.String(), @@ -198,7 +187,7 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, case EncapTypeFOU: // Ensure that the FOU tunnel port is set correctly - if !fouPortAndProtoExist(o.overlayEncapPort, isIPv6) { + if !fouPortAndProtoExist(o.encapPort, isIPv6) { fouArgs := ipBase fouArgs = append(fouArgs, "fou", "add", "port", strFormattedEncapPort, "gue") out, err := exec.Command("ip", fouArgs...).CombinedOutput() @@ -216,7 +205,7 @@ func (o *OverlayTunnel) SetupOverlayTunnel(tunnelName string, nextHop net.IP, default: return nil, fmt.Errorf("unknown tunnel encapsulation was passed: %s, unable to continue with overlay "+ - "setup", o.overlayEncap) + "setup", o.encapType) } klog.V(2).Infof("Executing the following command to create tunnel: ip %s", cmdArgs) @@ -276,6 +265,10 @@ func CleanupTunnel(destinationSubnet *net.IPNet, tunnelName string) { // Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing // non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both // long IPv4 addresses and much longer IPv6 addresses. +// +// TODO: In the future, we should consider using the hexadecimal byte representation of IPv4 addresses and using a the +// SHA256 of the hash. Additionally, we should not remove non-entropic characters as it can cause hash collisions as +// "21.3.0.4" would has the same as "2.13.0.4" without "."'s. func GenerateTunnelName(nodeIP string) string { // remove dots from an IPv4 address strippedIP := strings.ReplaceAll(nodeIP, ".", "")