-
Notifications
You must be signed in to change notification settings - Fork 473
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(hairpin): set hairpin_mode for veth iface
It used to be that the kubelet handled setting hairpin mode for us: kubernetes/kubernetes#13628 Then this functionality moved to the dockershim: kubernetes/kubernetes#62212 Then the functionality was removed entirely: kubernetes/kubernetes@83265c9171f Unfortunately, it was lost that we ever depended on this in order for our hairpin implementation to work, if we ever knew it at all. Additionally, I suspect that containerd and cri-o implementations never worked correctly with hairpinning. Without this, the NAT rules that we implement for hairpinning don't work correctly. Because hairpin_mode isn't implemented on the virtual interface of the container on the host, the packet bubbles up to the kube-bridge. At some point in the traffic flow, the route back to the pod gets resolved to the mac address inside the container, at that point, the packet's source mac and destination mac don't match the kube-bridge interface and the packet is black-holed. This can also be fixed by putting the kube-bridge interface into promiscuous mode so that it accepts all mac addresses, but I think that going back to the original functionality of enabling hairpin_mode on the veth interface of the container is likely the lesser of two evils here as putting the kube-bridge interface into promiscuous mode will likely have unintentional consequences.
- Loading branch information
Showing
6 changed files
with
437 additions
and
218 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
package proxy | ||
|
||
import ( | ||
"fmt" | ||
"net" | ||
"os" | ||
"path" | ||
"runtime" | ||
"sync" | ||
"time" | ||
|
||
"github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" | ||
"github.com/cloudnativelabs/kube-router/v2/pkg/utils" | ||
"github.com/vishvananda/netns" | ||
"k8s.io/klog/v2" | ||
) | ||
|
||
type hairpinController struct { | ||
epC <-chan string | ||
nsc *NetworkServicesController | ||
} | ||
|
||
func (hpc *hairpinController) Run(stopCh <-chan struct{}, wg *sync.WaitGroup, | ||
healthChan chan<- *healthcheck.ControllerHeartbeat) { | ||
defer wg.Done() | ||
klog.Infof("Starting hairping controller (handles setting hairpin_mode for veth interfaces)") | ||
|
||
t := time.NewTicker(healthcheck.HPCSyncPeriod) | ||
defer t.Stop() | ||
for { | ||
// Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first | ||
select { | ||
case <-stopCh: | ||
klog.Info("Shutting down Hairpin Controller goroutine") | ||
return | ||
default: | ||
} | ||
select { | ||
case <-stopCh: | ||
klog.Info("Shutting down Hairpin Controller goroutine") | ||
return | ||
case endpointIP := <-hpc.epC: | ||
klog.V(1).Infof("Received request for hairpin setup of endpoint %s, processing", endpointIP) | ||
err := hpc.ensureHairpinEnabledForPodInterface(endpointIP) | ||
if err != nil { | ||
klog.Errorf("unable to set hairpin mode for endpoint %s, its possible that hairpinning will not "+ | ||
"work as expected. Error was: %v", | ||
endpointIP, err) | ||
} | ||
case <-t.C: | ||
healthcheck.SendHeartBeat(healthChan, "HPC") | ||
} | ||
} | ||
} | ||
|
||
func (hpc *hairpinController) ensureHairpinEnabledForPodInterface(endpointIP string) error { | ||
klog.V(2).Infof("Attempting to enable hairpin mode for endpoint IP %s", endpointIP) | ||
crRuntime, containerID, err := hpc.nsc.findContainerRuntimeReferences(endpointIP) | ||
if err != nil { | ||
return err | ||
} | ||
klog.V(2).Infof("Detected runtime %s and container ID %s for endpoint IP %s", crRuntime, containerID, endpointIP) | ||
|
||
runtime.LockOSThread() | ||
defer runtime.UnlockOSThread() | ||
|
||
hostNetworkNSHandle, err := netns.Get() | ||
if err != nil { | ||
return fmt.Errorf("failed to get namespace due to %v", err) | ||
} | ||
defer utils.CloseCloserDisregardError(&hostNetworkNSHandle) | ||
|
||
var pid int | ||
if crRuntime == "docker" { | ||
// WARN: This method is deprecated and will be removed once docker-shim is removed from kubelet. | ||
pid, err = hpc.nsc.ln.getContainerPidWithDocker(containerID) | ||
if err != nil { | ||
return fmt.Errorf("failed to prepare endpoint %s to do direct server return due to %v", | ||
endpointIP, err) | ||
} | ||
} else { | ||
// We expect CRI compliant runtimes here | ||
// ugly workaround, refactoring of pkg/Proxy is required | ||
pid, err = hpc.nsc.ln.getContainerPidWithCRI(hpc.nsc.dsr.runtimeEndpoint, containerID) | ||
if err != nil { | ||
return fmt.Errorf("failed to prepare endpoint %s to do DSR due to: %v", endpointIP, err) | ||
} | ||
} | ||
klog.V(2).Infof("Found PID %d for endpoint IP %s", pid, endpointIP) | ||
|
||
// Get the interface link ID from inside the container so that we can link it to the veth on the host namespace | ||
ifaceID, err := hpc.nsc.ln.findIfaceLinkForPid(pid) | ||
if err != nil { | ||
return fmt.Errorf("failed to find the interface ID inside the container NS for endpoint IP: %s, due to: %v", | ||
endpointIP, err) | ||
} | ||
klog.V(2).Infof("Found Interface Link ID %d for endpoint IP %s", ifaceID, endpointIP) | ||
|
||
ifaceName, err := net.InterfaceByIndex(ifaceID) | ||
if err != nil { | ||
return fmt.Errorf("failed to get the interface name from the link ID inside the container for endpoint IP: "+ | ||
"%s and Interface ID: %d due to: %v", endpointIP, ifaceID, err) | ||
} | ||
|
||
klog.V(1).Infof("Enabling hairpin for interface %s for endpoint IP %s", ifaceName.Name, endpointIP) | ||
hpPath := path.Join(sysFSVirtualNetPath, ifaceName.Name, sysFSHairpinRelPath) | ||
if _, err := os.Stat(hpPath); err != nil { | ||
return fmt.Errorf("hairpin path %s doesn't appear to exist for us to set", hpPath) | ||
} | ||
|
||
return os.WriteFile(hpPath, []byte(hairpinEnable), 0644) | ||
} | ||
|
||
func NewHairpinController(nsc *NetworkServicesController, endpointCh <-chan string) *hairpinController { | ||
hpc := hairpinController{ | ||
nsc: nsc, | ||
epC: endpointCh, | ||
} | ||
|
||
return &hpc | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.