From 659d380b594c80a82c442304e9821b62bc498eb5 Mon Sep 17 00:00:00 2001 From: Dennis Marttinen Date: Sat, 18 Jan 2025 17:34:29 +0200 Subject: [PATCH] feat: add Cilium netkit device mode support This allows for enabling the [netkit device mode](https://docs.cilium.io/en/latest/operations/performance/tuning/#netkit-device-mode) of Cilium for [even higher performance](https://isovalent.com/blog/post/cilium-netkit-a-new-container-networking-paradigm-for-the-ai-era/). Note that this absolutely requires kernel 6.8 or newer (that is, Talos v1.9 or newer), otherwise you *will* break the networking of your cluster. Ask me how I know. Signed-off-by: Dennis Marttinen --- bootstrap.py | 7 ++++++- clusters/example.yaml | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bootstrap.py b/bootstrap.py index 61339fa..68248b8 100755 --- a/bootstrap.py +++ b/bootstrap.py @@ -53,6 +53,7 @@ "ipv4-cidr": str_schema, "direct-routes": bool, }, + Optional("netkit"): bool, Optional("bgp"): { "enabled": bool, }, @@ -423,7 +424,6 @@ def apply_configuration(node_set, configuration_file, global_patches): "ipam.mode=kubernetes", "kubeProxyReplacement=true", "bpf.masquerade=true", # eBPF-based masquerading - # "bpf.datapathMode=netkit", # netkit device mode, requires kernel 6.8 (not yet in Talos) "securityContext.capabilities.ciliumAgent={CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK," "SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}", "securityContext.capabilities.cleanCiliumState={NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}", @@ -499,6 +499,11 @@ def apply_configuration(node_set, configuration_file, global_patches): f"autoDirectNodeRoutes={'true' if native_routing["direct-routes"] else 'false'}", ] + if config["cluster"]["cilium"].get("netkit"): + cilium_opts += [ + "bpf.datapathMode=netkit", # netkit device mode, REQUIRES kernel >= 6.8 (Talos v1.9) + ] + if bgp := config["cluster"]["cilium"].get("bgp"): if bgp["enabled"]: cilium_opts += [ diff --git a/clusters/example.yaml b/clusters/example.yaml index ad88f38..235ded2 100644 --- a/clusters/example.yaml +++ b/clusters/example.yaml @@ -26,6 +26,10 @@ cluster: enabled: true # Enable Cilium native routing datapath ipv4-cidr: 10.244.0.0/16 # IPv4 CIDR used for native routing direct-routes: true # Enable if you have L2 connectivity between all nodes + # Enable Cilium netkit device mode instead of veth (optional) + # WARNING: REQUIRES kernel 6.8 or newer (Talos v1.9), cluster WILL BECOME INACCESSIBLE if enabled on older kernels! + # For details, see https://docs.cilium.io/en/latest/operations/performance/tuning/#netkit-device-mode + netkit: true bgp: # Configure Cilium BGP Control Plane support (optional) enabled: true # Enable Cilium BGP Control Plane sops: my-cluster.example.com # GPG ID/fingerprint of Mozilla SOPS key (https://github.com/mozilla/sops) (optional)