diff --git a/decorator/all/all.go b/decorator/all/all.go index aa17725..af957b7 100644 --- a/decorator/all/all.go +++ b/decorator/all/all.go @@ -9,4 +9,5 @@ import ( _ "github.com/siemens/ghostwire/v2/decorator/dockerproxy" // activate nerdctl-managed CNI network alias name decoration. _ "github.com/siemens/ghostwire/v2/decorator/ieappicon" // include (on-demand) IE App icon decoration. _ "github.com/siemens/ghostwire/v2/decorator/nerdctlnet" // activate nerdctl-managed CNI network alias name decoration. + _ "github.com/siemens/ghostwire/v2/decorator/podmannet" // activate podman-managed network alias name decoration. ) diff --git a/decorator/dockernet/dockernet.go b/decorator/dockernet/dockernet.go index 01b86aa..b274b13 100644 --- a/decorator/dockernet/dockernet.go +++ b/decorator/dockernet/dockernet.go @@ -61,35 +61,37 @@ type dockerNetworks struct { engineNetns *network.NetworkNamespace // ...of the managing Docker engine. } -// makeDockerNetworks returns a dockerNetworks with the networks managed by the -// specified Docker engine. If discovery failed, a zero dockerNetworks will be -// returned instead, to be used in the engine map to signal that the asked the -// engine, but it failed, so no more attempts to talk to it, please. +// makeDockerNetworks returns a dockerNetworks object with the networks managed +// by the specified Docker engine. If discovery failed, a zero-value +// dockerNetworks object will be returned instead, to be used in the engine map +// to signal that we asked the engine, but it failed, so no more attempts to +// talk to it, please. func makeDockerNetworks(ctx context.Context, engine *model.ContainerEngine, allnetns network.NetworkNamespaces) ( docknets dockerNetworks, ) { dockerclient, err := client.NewClientWithOpts( client.WithHost(engine.API), client.WithAPIVersionNegotiation()) - if err == nil { - networks, _ := dockerclient.NetworkList(ctx, types.NetworkListOptions{}) - _ = dockerclient.Close() - netnsid, _ := ops.NamespacePath(fmt.Sprintf("/proc/%d/ns/net", engine.PID)).ID() - docknets.networks = networks - docknets.engine = engine - docknets.engineNetns = allnetns[netnsid] - log.Infof("found %d Docker networks related to net:[%d] %s", - len(networks), docknets.engineNetns.ID().Ino, docknets.engineNetns.DisplayName()) - } else { - log.Warnf("cannot discover Docker-managed networks, API %s", engine.API) + if err != nil { + log.Warnf("cannot discover Docker-managed networks from API %s, reason: %s", + engine.API, err.Error()) + return } + networks, _ := dockerclient.NetworkList(ctx, types.NetworkListOptions{}) + _ = dockerclient.Close() + netnsid, _ := ops.NamespacePath(fmt.Sprintf("/proc/%d/ns/net", engine.PID)).ID() + docknets.networks = networks + docknets.engine = engine + docknets.engineNetns = allnetns[netnsid] + log.Infof("found %d Docker networks related to net:[%d] %s", + len(networks), docknets.engineNetns.ID().Ino, docknets.engineNetns.DisplayName()) return } -// Decorate decorates bridge network interfaces with alias names that are the -// names of their corresponding Docker "bridge" networks, where applicable (a -// copy is stored also in the labels in Gostwire's key namespace). Additionally, -// it copies over any user-defined network labels. +// Decorate decorates bridge and macvlan master network interfaces with alias +// names that are the names of their corresponding Docker “bridge” or “macvlan” +// networks, where applicable (a copy is stored also in the labels in Gostwire's +// key namespace). Additionally, it copies over any user-defined network labels. func Decorate( ctx context.Context, allnetns network.NetworkNamespaces, diff --git a/decorator/podmannet/_test/pind/Dockerfile b/decorator/podmannet/_test/pind/Dockerfile new file mode 100644 index 0000000..bf0b1a1 --- /dev/null +++ b/decorator/podmannet/_test/pind/Dockerfile @@ -0,0 +1,11 @@ +# Based on https://www.redhat.com/sysadmin/podman-inside-container +ARG FEDORA_TAG + +FROM fedora:${FEDORA_TAG} +RUN dnf -y install \ + procps systemd podman fuse-overlayfs \ + --exclude container-selinux && \ + dnf clean all && \ + rm -rf /var/cache /var/log/dnf* /var/log/yum.* && \ + systemctl enable podman.socket +CMD [ "/usr/sbin/init" ] diff --git a/decorator/podmannet/doc.go b/decorator/podmannet/doc.go new file mode 100644 index 0000000..af383be --- /dev/null +++ b/decorator/podmannet/doc.go @@ -0,0 +1,29 @@ +/* +Package podmannet implements a Gostwire decorator that discovers podman (v4+) +managed networks and then decorates their corresponding Linux-kernel network +interfaces. Supported types of podman networks are “bridge” and “macvlan”. + +In case of “bridge” networks this decorator assigns network names as alias names +to the corresponding Linux-kernel bridges and also as a Gostwire-specific label. + +For “MACVLAN” networks this decorator assigns the network names as alias names +to the “parent” network interface (or “master” in Linux parlance). + +This decorator also copies any network labels it finds into the corresponding +network.Interface instances in a Gostwire discovery information model. + +# Note + +The Docker-compatible podman API is subtly incompatible: it uses a different +bridge name-allocating method, and it doesn't reveal the bridge and macvlan +master names. + +In consequence, we need to resort to a self-rolled minimal HTTP-over-UDS client +that supports a minimal subset of the podman-proprietary libpod API. As of +podman v4 the libpod API endpoint returns network information. As a nice +benefit, the network information endpoint abstracts from the different podmen +networking mechanisms, that is, CNI-based and/or [netavark]-based. + +[netavark]: https://github.com/containers/netavark +*/ +package podmannet diff --git a/decorator/podmannet/libpodclient.go b/decorator/podmannet/libpodclient.go new file mode 100644 index 0000000..ba05be9 --- /dev/null +++ b/decorator/podmannet/libpodclient.go @@ -0,0 +1,167 @@ +// (c) Siemens AG 2024 +// +// SPDX-License-Identifier: MIT + +package podmannet + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/url" + "path" + "time" +) + +// UserAgent specifies the HTTP agent string used when talking to podman's +// libpod API. +const UserAgent = "Gostwire (The Sequel)" + +// Client is a minimalist HTTP-over-UDS (unix domain socket) client for +// conversing with podmen libpod API endpoints. +type Client struct { + httpClient *http.Client + endpointURL *url.URL + libpodVersion string // libpod API semver, without "v" prefix. +} + +// newLibpodClient returns a new podman libpod API client. The endpoint must be +// using the "unix" protocol. +// +// Please note that this libpod API client is absolutely minimalist and just +// suffices for querying the podman-managed networks. +func newLibpodClient(endpoint string) (*Client, error) { + epurl, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid endpoint, reason: %w", err) + } + if epurl.Scheme != "unix" { + return nil, fmt.Errorf("unsupported endpoint protocol '%s'", epurl.Scheme) + } + c := &Client{ + httpClient: &http.Client{ + Transport: &http.Transport{ + DisableCompression: true, + }, + }, + endpointURL: epurl, + } + dialer := &net.Dialer{ + // same as Docker's unix socket default transport configuration, see + // also + // https://github.com/docker/go-connections/blob/fa09c952e3eadbffaf8afc5b8a1667158ba38ace/sockets/sockets.go#L11 + Timeout: 10 * time.Second, + } + c.httpClient.Transport.(*http.Transport).DialContext = func(ctx context.Context, _ string, _ string) (net.Conn, error) { + // we don't want to dial the libpod API endpoint, but instead the engine + // API endpoint as such... + return dialer.DialContext(ctx, epurl.Scheme, epurl.Path) + } + return c, nil +} + +// Close closes idle connections. +func (c *Client) Close() error { + if c.httpClient == nil { + return nil + } + c.httpClient.CloseIdleConnections() + return nil +} + +// apiPath takes a non-versioned libpod API endpoint, such as “/info” and +// “networks/json”; it then returns a versioned libpod path when the libpod +// version is already known, such as “/v1.2.3/libpod/networks/json”. Otherwise +// it returns a “/v0/libpod/...”-based path. In consequence, without the +// libpodVersion set on the Client, only use the “/info” service endpoint, as +// this seems to be version-independent, but still needs any version in its +// endpoint path. +func (c *Client) apiPath(apipath string) string { + if c.libpodVersion == "" { + // use only for initial libpod info (API version) retrieval; please note + // that all libpod API endpoints are versioned, there are not + // un-versioned endpoints like the Docker API does. + return path.Join("/v0/libpod", apipath) + } + return path.Join("/v"+c.libpodVersion+"/libpod", apipath) +} + +// get issues an HTTP GET request for the specified (yet unversioned) API +// endpoint, such as “/networks/json”. It then returns the HTTP response or an +// error. +func (c *Client) get(ctx context.Context, apipath string) (*http.Response, error) { + req, err := http.NewRequestWithContext( + ctx, + http.MethodGet, + "http://localhost"+c.apiPath(apipath), + nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", UserAgent) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices { + return nil, fmt.Errorf("podman service returned status code %d", resp.StatusCode) + } + return resp, nil +} + +// ensureReaderClosed helper to drain any service response. +func ensureReaderClosed(resp *http.Response) { + if resp.Body == nil { + return + } + _, _ = io.CopyN(io.Discard, resp.Body, 512) + resp.Body.Close() +} + +// essentialLibpodInformation grabs just the API version information from the +// JSON salad returned by a “/vX/libpod/info” endpoint. +type essentialLibpodInformation struct { + Version struct { + APIVersion string // major.minor.patch, without "v" prefix + } `json:"version"` +} + +// info returns the “essential” libpod information, that is, the libpod API +// version. +func (c *Client) info(ctx context.Context) (essentialLibpodInformation, error) { + resp, err := c.get(ctx, "/info") + var info essentialLibpodInformation + if err != nil { + return info, err + } + err = json.NewDecoder(resp.Body).Decode(&info) + return info, err +} + +// NetworkResource grabs just the few things from a podman network we're +// interested here for the purposes of correctly decorating network interfaces +// with podman network names. We simply ignore all the other JSON salad returned +// from the “/vX/libpod/networks/json” endpoint. +type NetworkResource struct { + Name string `json:"name"` // name of the network + ID string `json:"id"` // unique ID within the particular podman engine instance + Driver string `json:"driver"` // name of the driver; "bridge", "macvlan", "ipvlan" + NetworkInterface string `json:"network_interface"` // name of the associated (master) network interface + Internal bool `json:"internal"` // network is host-internal only, without external connectivity + Labels map[string]string `json:"labels"` +} + +// networkList returns the list of managed podman networks. +func (c *Client) networkList(ctx context.Context) ([]NetworkResource, error) { + var netrscs []NetworkResource + resp, err := c.get(ctx, "/networks/json") + defer ensureReaderClosed(resp) + if err != nil { + return nil, err + } + err = json.NewDecoder(resp.Body).Decode(&netrscs) + return netrscs, err +} diff --git a/decorator/podmannet/package_test.go b/decorator/podmannet/package_test.go new file mode 100644 index 0000000..a680cb2 --- /dev/null +++ b/decorator/podmannet/package_test.go @@ -0,0 +1,17 @@ +// (c) Siemens AG 2023 +// +// SPDX-License-Identifier: MIT + +package podmannet + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestGostwireDecoratorPodmannet(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "ghostwire/decorator/podmannet package") +} diff --git a/decorator/podmannet/podmannet.go b/decorator/podmannet/podmannet.go new file mode 100644 index 0000000..5ba14ec --- /dev/null +++ b/decorator/podmannet/podmannet.go @@ -0,0 +1,132 @@ +// (c) Siemens AG 2024 +// +// SPDX-License-Identifier: MIT + +package podmannet + +import ( + "context" + "fmt" + + "github.com/siemens/ghostwire/v2/decorator" + "github.com/siemens/ghostwire/v2/decorator/dockernet" + "github.com/siemens/ghostwire/v2/network" + "github.com/siemens/turtlefinder/activator/podman" + + "github.com/thediveo/go-plugger/v3" + "github.com/thediveo/lxkns/log" + "github.com/thediveo/lxkns/model" + "github.com/thediveo/lxkns/ops" +) + +// GostwireNetworkNameKey defines the label key for storing the Docker network +// name of bridge networks. +const GostwireNetworkNameKey = dockernet.GostwireNetworkNameKey + +// Register this Decorator plugin. +func init() { + plugger.Group[decorator.Decorate]().Register( + Decorate, plugger.WithPlugin("podmannet-v4+")) +} + +// podmanNetworks describes the networks managed by a podman engine from +// Gostwire's perspective: the Docker-related information, as well as the +// NetworkNamespace these networks are managed in, which are the network +// namespace of the particular managing podman engine. +type podmanNetworks struct { + networks []NetworkResource // podman-managed network information. + engine *model.ContainerEngine // corresponding podman engine. + engineNetns *network.NetworkNamespace // ...of the managing podman engine. +} + +// makePodmanNetworks returns a podmanNetworks object with the networks managed +// by the specified Docker engine. If discovery failed, a zero-valued +// podmanNetworks object will be returned instead, to be used in the engine map +// to signal that we asked the engine, but it failed, so no more attempts to +// talk to it, please. +func makePodmanNetworks(ctx context.Context, engine *model.ContainerEngine, allnetns network.NetworkNamespaces) ( + podmannets podmanNetworks, +) { + libpodclient, err := newLibpodClient(engine.API) + if err != nil { + log.Warnf("cannot discover podman-managed networks from API %s, reason: %s", + engine.API, err.Error()) + return + } + info, err := libpodclient.info(ctx) + if err != nil { + log.Warnf("cannot discover podman-managed networks from API %s, reason: %s", + engine.API, err.Error()) + return + } + libpodclient.libpodVersion = info.Version.APIVersion + networks, _ := libpodclient.networkList(ctx) + _ = libpodclient.Close() + netnsid, _ := ops.NamespacePath(fmt.Sprintf("/proc/%d/ns/net", engine.PID)).ID() + podmannets.networks = networks + podmannets.engine = engine + podmannets.engineNetns = allnetns[netnsid] + log.Infof("found %d podman networks related to net:[%d] %s", + len(networks), podmannets.engineNetns.ID().Ino, podmannets.engineNetns.DisplayName()) + return +} + +// Decorate decorates bridge and macvlan master network interfaces with alias +// names that are the names of their corresponding Docker “bridge” or “macvlan” +// networks, where applicable (a copy is stored also in the labels in Gostwire's +// key namespace). Additionally, it copies over any user-defined network labels. +func Decorate( + ctx context.Context, + allnetns network.NetworkNamespaces, + allprocs model.ProcessTable, + engines []*model.ContainerEngine, +) { + log.Debugf("discovering podman-managed networks") + // As some container engines currently might not manage any container + // workload, we will prime the container engine networks cache with the + // networks discovered from then engines we're told are under supervision. + // This way, we ensure to discover networks even for engines without any + // workload, because otherwise we won't see them at all in the containers + // attached to the network namespaces. + podmanNets := map[model.PIDType]podmanNetworks{} + for _, engine := range engines { + if engine.Type != podman.Type { + continue + } + podmanNets[engine.PID] = makePodmanNetworks(ctx, engine, allnetns) + } + // Now that we know about the podman networks, try to locate the matching + // Linux-kernel network interfaces so we can set/override the alias names of + // the interfaces. + for _, podmannet := range podmanNets { + for _, netw := range podmannet.networks { + var nifname string + switch netw.Driver { + case "bridge", "macvlan": + nifname = netw.NetworkInterface + default: + continue + } + // Try to locate the Linux network interface related to this podman + // network, and if successful, set its alias name. + netif, ok := podmannet.engineNetns.NamedNifs[nifname] + if !ok { + continue + } + nif := netif.Nif() + nif.Alias = netw.Name + // We additionally also label the network interface with the Docker + // network name. + nif.Labels[GostwireNetworkNameKey] = netw.Name + nif.AddLabels(netw.Labels) + // In case this is an "internal" podman bridge network, then label + // it as being internal. While this only applies to "bridge"-driver + // networks, the flag is always present in libpod's API structure, + // so we don't need to differentiate here. + if netw.Internal { + nif.Labels[network.GostwireInternalBridgeKey] = "true" + } + // Is there something like Docker's default network...? + } + } +} diff --git a/decorator/podmannet/podmannet_test.go b/decorator/podmannet/podmannet_test.go new file mode 100644 index 0000000..369211c --- /dev/null +++ b/decorator/podmannet/podmannet_test.go @@ -0,0 +1,196 @@ +// (c) Siemens AG 2024 +// +// SPDX-License-Identifier: MIT + +package podmannet + +import ( + "context" + "io" + "os" + "time" + + "github.com/ory/dockertest/v3" + "github.com/ory/dockertest/v3/docker" + "github.com/siemens/ghostwire/v2/internal/discover" + "github.com/siemens/turtlefinder" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + . "github.com/onsi/gomega/gleak" + . "github.com/thediveo/fdooze" + "github.com/thediveo/lxkns/model" + . "github.com/thediveo/success" +) + +const ( + fedoraTag = "39" + + pindName = "ghostwire-pind" + pindImageName = "siemens/ghostwire-pind" + + spinupTimeout = 10 * time.Second + spinupPolling = 500 * time.Millisecond + + goroutinesUnwindTimeout = 2 * time.Second + goroutinesUnwindPolling = 250 * time.Millisecond +) + +var _ = Describe("turtle finder", Ordered, Serial, func() { + + var pindCntr *dockertest.Resource + + BeforeAll(func() { + if os.Getuid() != 0 { + Skip("needs root") + } + + goodfds := Filedescriptors() + goodgos := Goroutines() // avoid other failed goroutine tests to spill over + DeferCleanup(func() { + Eventually(Goroutines).WithTimeout(goroutinesUnwindTimeout).WithPolling(goroutinesUnwindPolling). + ShouldNot(HaveLeaked(goodgos)) + Expect(Filedescriptors()).NotTo(HaveLeakedFds(goodfds)) + }) + + By("spinning up a Docker container with a podman system service") + pool := Successful(dockertest.NewPool("unix:///run/docker.sock")) + _ = pool.RemoveContainerByName(pindName) + // The necessary container start arguments loosely base on + // https://www.redhat.com/sysadmin/podman-inside-container but had to be + // heavily modified because they didn't work out as is, for whatever + // reasons. This is now a mash-up of the args used to get the KinD + // base-based images correctly working and some "spirit" of the before + // mentioned RedHat blog post. + // + // Lesson learnt: podman in Docker is much more fragile than the podmen + // want us to believe. + // + // docker run -it --rm --name pind + // --privileged \ + // --cgroupns=private \ + // --tmpfs /tmp \ + // --tmpfs /run \ + // --volume /var \ + // --device=/dev/fuse \ + // pind + // + // Please note that the initial build of the podman-in-Docker image is + // really slow, as fedora installs lots of things. + Expect(pool.Client.BuildImage(docker.BuildImageOptions{ + Name: pindImageName, + ContextDir: "./_test/pind", + Dockerfile: "Dockerfile", + BuildArgs: []docker.BuildArg{ + {Name: "FEDORA_TAG", Value: fedoraTag}, + }, + OutputStream: io.Discard, + })).To(Succeed()) + pindCntr = Successful(pool.RunWithOptions( + &dockertest.RunOptions{ + Name: pindName, + Repository: pindImageName, + Privileged: true, + Mounts: []string{ + "/var", // well, this actually is an unnamed volume + }, + Tty: true, + }, func(hc *docker.HostConfig) { + hc.Init = false + hc.Tmpfs = map[string]string{ + "/tmp": "", + "/run": "", + } + hc.Devices = []docker.Device{ + {PathOnHost: "/dev/fuse"}, + } + })) + + By("waiting for systemd default target to be reached") + // We need to wait for the container "contents" to have fully "booted", + // because otherwise trying to pull a container image and run it gets + // flaky. So we want to wait for systemd to reach its default target. To + // slightly complicate things, we might be too fast so that the system + // dbus inside the container isn't created yet and that would make + // systemctl fail. We thus first wait for the system dbus socket to + // appear and only then use systemctl for the container contents to + // fully boot up... + Expect(pindCntr.Exec([]string{ + "/bin/bash", "-c", + "while [ ! -S \"/var/run/dbus/system_bus_socket\" ]; do sleep 1; done" + + " && systemctl is-system-running --wait", + }, dockertest.ExecOptions{ + StdOut: GinkgoWriter, + StdErr: GinkgoWriter, + })).Error().To(Succeed()) + + By("creating a podman MACVLAN network") + Expect(pindCntr.Exec([]string{ + "podman", + "network", "create", + "-d", "macvlan", + "mcwielahm", + "-o", "parent=eth0", + }, dockertest.ExecOptions{ + StdOut: GinkgoWriter, + StdErr: GinkgoWriter, + })).Error().To(Succeed()) + + By("running a canary container connected to the default 'podman' network") + Expect(pindCntr.Exec([]string{ + "podman", "run", "-d", "-it", "--rm", "--name", "canary", "busybox", + }, dockertest.ExecOptions{ + StdOut: GinkgoWriter, + StdErr: GinkgoWriter, + })).Error().To(Succeed()) + + DeferCleanup(func() { + By("removing the podman-in-Docker container") + Expect(pool.Purge(pindCntr)).To(Succeed()) + }) + }) + + BeforeEach(func() { + goodfds := Filedescriptors() + goodgos := Goroutines() // avoid other failed goroutine tests to spill over + DeferCleanup(func() { + Eventually(Goroutines).WithTimeout(goroutinesUnwindTimeout).WithPolling(goroutinesUnwindPolling). + ShouldNot(HaveLeaked(goodgos)) + Expect(Filedescriptors()).NotTo(HaveLeakedFds(goodfds)) + }) + }) + + It("decorates podman-managed network interfaces", func(ctx context.Context) { + if os.Getuid() != 0 { + Skip("needs root") + } + + By("creating a turtlefinder") + ctx, cancel := context.WithCancel(ctx) + cizer := turtlefinder.New(func() context.Context { return ctx }) + defer cancel() + defer cizer.Close() + + By("running a full Ghostwire discovery that should pick up the podman networks") + allnetns, lxknsdisco := discover.Discover(ctx, cizer, nil) + Expect(lxknsdisco.Processes).To(HaveKey(model.PIDType(pindCntr.Container.State.Pid))) + pindNetnsID := lxknsdisco.Processes[model.PIDType(pindCntr.Container.State.Pid)]. + Namespaces[model.NetNS].ID() + Expect(pindNetnsID).NotTo(BeZero()) + Expect(allnetns).To(HaveKey(pindNetnsID)) + pindNetns := allnetns[pindNetnsID] + // We expect the following network interfaces to be present inside our + // podman-in-docker container: + // - eth0 ... a.k.a. the "mcwielahm" network + // - podman0 ... a.k.a. the "podman" network + Expect(pindNetns.Nifs).To(ContainElements( + HaveField("Nif()", And( + HaveField("Name", "eth0"), + HaveField("Alias", "mcwielahm"))), + HaveField("Nif()", And( + HaveField("Name", "podman0"), + HaveField("Alias", "podman"))), + )) + }) + +}) diff --git a/defs_version.go b/defs_version.go index b6ace12..843e2b9 100644 --- a/defs_version.go +++ b/defs_version.go @@ -4,4 +4,4 @@ package gostwire // SemVersion is the semantic version string of the ghostwire module. -const SemVersion = "2.1.18-7-g6dab913" +const SemVersion = "2.1.18-12-gede2f48"