From ab5c4247e515928535ab584ad7a49de8d7219d91 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Fri, 12 Jan 2024 10:58:03 +0100 Subject: [PATCH 01/20] Improve DNS-sanity check and ensure that it never reaches the cluster. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/resolved_linux.go | 25 +++++++---- pkg/client/rootd/dns/server.go | 57 ++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/pkg/client/rootd/dns/resolved_linux.go b/pkg/client/rootd/dns/resolved_linux.go index 4c7c4f5d65..22ed1ce191 100644 --- a/pkg/client/rootd/dns/resolved_linux.go +++ b/pkg/client/rootd/dns/resolved_linux.go @@ -83,16 +83,23 @@ func (s *Server) tryResolveD(c context.Context, dev vif.Device, configureDNS fun cmdC, cmdCancel := context.WithTimeout(c, 2*time.Second) defer cmdCancel() for cmdC.Err() == nil { - _, _ = net.DefaultResolver.LookupHost(cmdC, "jhfweoitnkgyeta."+tel2SubDomain) - if s.RequestCount() > 0 { - // The query went all way through. Start processing search paths systemd-resolved style - // and return nil for successful validation. - s.processSearchPaths(g, s.updateLinkDomains, dev) - return nil - } - s.flushDNS() - dtime.SleepWithContext(cmdC, 100*time.Millisecond) + go func() { + dlog.Debug(cmdC, "sanity-check lookup") + _, _ = net.DefaultResolver.LookupHost(cmdC, santiyCheck) + if s.RequestCount() > 0 { + cmdCancel() + } + }() + dtime.SleepWithContext(cmdC, 200*time.Millisecond) + } + <-cmdC.Done() + if s.RequestCount() > 0 { + // The query went all way through. Start processing search paths systemd-resolved style + // and return nil for successful validation. + s.processSearchPaths(g, s.updateLinkDomains, dev) + return nil } + s.flushDNS() dlog.Error(c, "resolver did not receive requests from systemd-resolved") return errResolveDNotConfigured }) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index c4b1d7b0e7..82adecb62b 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -32,13 +32,20 @@ import ( type Resolver func(context.Context, *dns.Question) (dnsproxy.RRs, int, error) -// recursionCheck is a special host name in a well known namespace that isn't expected to exist. It -// is used once for determining if the cluster's DNS resolver will call the Telepresence DNS resolver -// recursively. This is common when the cluster is running on the local host (k3s in docker for instance). -const recursionCheck = "tel2-recursion-check.kube-system." - -// defaultClusterDomain used unless traffic-manager reports otherwise. -const defaultClusterDomain = "cluster.local." +const ( + // recursionCheck is a special host name in a well known namespace that isn't expected to exist. It + // is used once for determining if the cluster's DNS resolver will call the Telepresence DNS resolver + // recursively. This is common when the cluster is running on the local host (k3s in docker for instance). + recursionCheck = "tel2-recursion-check.kube-system." + + // defaultClusterDomain used unless traffic-manager reports otherwise. + defaultClusterDomain = "cluster.local." + + // sanityCheck is the query used when verifying that a DNS query reaches our DNS server. It should result + // in an increase of the requestCount but always yield an NXDOMAIN reply. + santiyCheck = "jhfweoitnkgyeta." + tel2SubDomain + santiyCheckDot = santiyCheck + "." +) type FallbackPool interface { Exchange(context.Context, *dns.Client, *dns.Msg) (*dns.Msg, time.Duration, error) @@ -702,6 +709,42 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { }() q := &r.Question[0] + atomic.AddInt64(&s.requestCount, 1) + if q.Name == santiyCheckDot { + msg := new(dns.Msg) + msg.SetRcode(r, dns.RcodeSuccess) + var rr dns.RR + switch q.Qtype { + case dns.TypeA: + rr = &dns.A{ + Hdr: dns.RR_Header{ + Name: q.Name, + Rrtype: q.Qtype, + Class: q.Qclass, + }, + A: localhostIPv4, + } + case dns.TypeAAAA: + rr = &dns.AAAA{ + Hdr: dns.RR_Header{ + Name: q.Name, + Rrtype: q.Qtype, + Class: q.Qclass, + }, + AAAA: localhostIPv6, + } + default: + msg = new(dns.Msg) + msg.SetRcode(r, dns.RcodeNotImplemented) + return + } + msg.Answer = dnsproxy.RRs{rr} + msg.Authoritative = true + dlog.Debug(c, "sanity-check OK") + _ = w.WriteMsg(msg) + return + } + qts := dns.TypeToString[q.Qtype] dlog.Debugf(c, "ServeDNS %5d %-6s %s", r.Id, qts, q.Name) From c4e3af5d660882de3b75296b053641408027468e Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Fri, 12 Jan 2024 11:03:16 +0100 Subject: [PATCH 02/20] Remove search-path handling from the overriding DNS-resolver. It's no longer used and will just collide with the search path used by the cluster's own DNS-resolver. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/server.go | 40 +++------------ pkg/client/rootd/dns/server_linux.go | 76 +--------------------------- pkg/client/rootd/session.go | 2 +- 3 files changed, 8 insertions(+), 110 deletions(-) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index 82adecb62b..47d2ca0f50 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -99,10 +99,6 @@ type Server struct { // Function that sends a lookup request to the traffic-manager clusterLookup Resolver - // onlyNames is set to true when using a legacy traffic-manager incapable of - // using query types - onlyNames bool - error string // ready is closed when the DNS server is fully configured @@ -133,7 +129,7 @@ func (dv *cacheEntry) close() { } // NewServer returns a new dns.Server. -func NewServer(config *rpc.DNSConfig, clusterLookup Resolver, onlyNames bool) *Server { +func NewServer(config *rpc.DNSConfig, clusterLookup Resolver) *Server { if config == nil { config = &rpc.DNSConfig{} } @@ -163,7 +159,6 @@ func NewServer(config *rpc.DNSConfig, clusterLookup Resolver, onlyNames bool) *S searchPathCh: make(chan []string, 5), clusterDomain: defaultClusterDomain, clusterLookup: clusterLookup, - onlyNames: onlyNames, ready: make(chan struct{}), } if lt := config.LookupTimeout; lt != nil { @@ -765,35 +760,12 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { _ = w.WriteMsg(msg) }() - if s.onlyNames { - switch q.Qtype { - case dns.TypeA: - answer, rCode, err = s.cacheResolve(q) - case dns.TypeAAAA: - if atomic.LoadInt32(&s.recursive) == recursionDetected || q.Name == recursionCheck { - rCode = dns.RcodeNameError - break - } - q.Qtype = dns.TypeA - answer, rCode, err = s.cacheResolve(q) - q.Qtype = dns.TypeAAAA - if rCode == dns.RcodeSuccess { - // return EMPTY to indicate that dns.TypeA exists - answer = nil - } - default: - msg = new(dns.Msg) - msg.SetRcode(r, dns.RcodeNotImplemented) - return - } - } else { - if !dnsproxy.SupportedType(q.Qtype) { - msg = new(dns.Msg) - msg.SetRcode(r, dns.RcodeNotImplemented) - return - } - answer, rCode, err = s.cacheResolve(q) + if !dnsproxy.SupportedType(q.Qtype) { + msg = new(dns.Msg) + msg.SetRcode(r, dns.RcodeNotImplemented) + return } + answer, rCode, err = s.cacheResolve(q) if err == nil && rCode == dns.RcodeSuccess { msg = new(dns.Msg) diff --git a/pkg/client/rootd/dns/server_linux.go b/pkg/client/rootd/dns/server_linux.go index 2d950ab5d8..5ba51430e0 100644 --- a/pkg/client/rootd/dns/server_linux.go +++ b/pkg/client/rootd/dns/server_linux.go @@ -10,13 +10,10 @@ import ( "strings" "time" - "github.com/miekg/dns" - "github.com/datawire/dlib/dexec" "github.com/datawire/dlib/dgroup" "github.com/datawire/dlib/dlog" "github.com/datawire/dlib/dtime" - "github.com/telepresenceio/telepresence/v2/pkg/dnsproxy" "github.com/telepresenceio/telepresence/v2/pkg/forwarder" "github.com/telepresenceio/telepresence/v2/pkg/iputil" "github.com/telepresenceio/telepresence/v2/pkg/proc" @@ -48,77 +45,6 @@ func (s *Server) Worker(c context.Context, dev vif.Device, configureDNS func(net return err } -// shouldApplySearch returns true if search path should be applied. -func (s *Server) shouldApplySearch(query string) bool { - if len(s.search) == 0 { - return false - } - - if query == "localhost." { - return false - } - - // Don't apply search paths to the kubernetes zone - if strings.HasSuffix(query, "."+s.clusterDomain) { - return false - } - - // Don't apply search paths if one is already there - for _, s := range s.search { - if strings.HasSuffix(query, s) { - return false - } - } - - // Don't apply search path to namespaces or "svc". - query = query[:len(query)-1] - if lastDot := strings.LastIndexByte(query, '.'); lastDot >= 0 { - tld := query[lastDot+1:] - if _, ok := s.namespaces[tld]; ok || tld == "svc" { - return false - } - } - return true -} - -// resolveInSearch is only used by the overriding resolver. It is needed because unlike other resolvers, this -// resolver does not hook into a DNS system that handles search paths prior to the arrival of the request. -// -// TODO: With the DNS lookups now being done in the cluster, there's only one reason left to have a search path, -// and that's the local-only intercepts which means that using search-paths really should be limited to that -// use-case. -func (s *Server) resolveInSearch(c context.Context, q *dns.Question) (dnsproxy.RRs, int, error) { - query := strings.ToLower(q.Name) - - // Drop all known search path suffixes before sending the query to the cluster. The - // cluster has its own DNS resolver and its own set of search paths. - query = strings.TrimSuffix(query, tel2SubDomainDot) - for _, sfx := range s.dropSuffixes { - query = strings.TrimSuffix(query, sfx) - } - - s.RLock() - if !s.shouldDoClusterLookup(query) { - s.RUnlock() - return nil, dns.RcodeNameError, nil - } - applySearch := s.shouldApplySearch(query) - s.RUnlock() - - if applySearch { - origQuery := q.Name - for _, sp := range s.search { - q.Name = query + sp - if rrs, rCode, err := s.resolveInCluster(c, q); err != nil || len(rrs) > 0 { - q.Name = origQuery - return rrs, rCode, err - } - } - q.Name = origQuery - } - return s.resolveInCluster(c, q) -} - func (s *Server) runOverridingServer(c context.Context, dev vif.Device) error { if s.localIP == nil { rf, err := readResolveFile("/etc/resolv.conf") @@ -186,7 +112,7 @@ func (s *Server) runOverridingServer(c context.Context, dev vif.Device) error { s.flushDNS() return nil }, dev) - return s.Run(c, serverStarted, listeners, pool, s.resolveInSearch) + return s.Run(c, serverStarted, listeners, pool, s.resolveInCluster) }) if proc.RunningInContainer() { diff --git a/pkg/client/rootd/session.go b/pkg/client/rootd/session.go index 6024ec290b..0ab2d486df 100644 --- a/pkg/client/rootd/session.go +++ b/pkg/client/rootd/session.go @@ -363,7 +363,7 @@ func newSession(c context.Context, mi *rpc.OutboundInfo, mc connector.ManagerPro config: cfg, done: make(chan struct{}), } - s.dnsServer = dns.NewServer(mi.Dns, s.clusterLookup, false) + s.dnsServer = dns.NewServer(mi.Dns, s.clusterLookup) s.SetSearchPath(c, nil, nil) dlog.Infof(c, "also-proxy subnets %v", as) dlog.Infof(c, "never-proxy subnets %v", ns) From 3b38ebe7aeb99dc49dba905a7fbe0cf2a54eac03 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Fri, 12 Jan 2024 13:23:00 +0100 Subject: [PATCH 03/20] Fix correct DNS response when A exists but AAAA doesn't, or vice-versa. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/server.go | 47 +++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index 47d2ca0f50..df6b3edf16 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -276,17 +276,24 @@ func (s *Server) resolveInCluster(c context.Context, q *dns.Question) (result dn // But it does, so I need this in order to be // productive at home. We should really // root-cause this, because it's weird. + hdr := dns.RR_Header{ + Name: q.Name, + Rrtype: q.Qtype, + Class: q.Qclass, + } switch q.Qtype { case dns.TypeA: return dnsproxy.RRs{&dns.A{ - Hdr: dns.RR_Header{}, + Hdr: hdr, A: localhostIPv4, }}, dns.RcodeSuccess, nil case dns.TypeAAAA: return dnsproxy.RRs{&dns.AAAA{ - Hdr: dns.RR_Header{}, + Hdr: hdr, AAAA: localhostIPv6, }}, dns.RcodeSuccess, nil + default: + return nil, dns.RcodeNameError, nil } } @@ -299,9 +306,28 @@ func (s *Server) resolveInCluster(c context.Context, q *dns.Question) (result dn defer cancel() result, rCode, err = s.clusterLookup(c, q) + if err != nil || rCode != dns.RcodeSuccess { + // For A and AAAA queries, we check if we have a successful counterpart in the cache. If we + // do, then this query must return NOERROR EMPTY + ck := cacheKey{name: q.Name, qType: dns.TypeNone} + switch q.Qtype { + case dns.TypeA: + ck.qType = dns.TypeAAAA + case dns.TypeAAAA: + ck.qType = dns.TypeA + } + if ck.qType != dns.TypeNone { + if ce, ok := s.cache.Load(ck); ok && !ce.expired() { + err = nil + rCode = dns.RcodeSuccess + } + } + } if err != nil { + dlog.Debugf(c, "clusterLookup returned error: %v", err) return nil, rCode, client.CheckTimeout(c, err) } + // Keep the TTLs of requests resolved in the cluster low. We // cache them locally anyway, but our cache is flushed when things are // intercepted or the namespaces change. @@ -560,12 +586,17 @@ func (s *Server) resolveThruCache(q *dns.Question) (dnsproxy.RRs, int, error) { } <-oldDv.wait if !oldDv.expired() { - copyQType := q.Qtype - // If answer is a mapping, the copy type should be a CNAME. - if len(oldDv.answer) == 1 && oldDv.answer[0].Header().Rrtype == dns.TypeCNAME { - copyQType = dns.TypeCNAME + qTypes := []uint16{q.Qtype} + if q.Qtype != dns.TypeCNAME { + // Allow additional CNAME records if they are present. + for _, rr := range oldDv.answer { + if rr.Header().Rrtype == dns.TypeCNAME { + qTypes = append(qTypes, dns.TypeCNAME) + break + } + } } - return copyRRs(oldDv.answer, []uint16{copyQType}), oldDv.rCode, nil + return copyRRs(oldDv.answer, qTypes), oldDv.rCode, nil } s.cache.Store(key, newDv) } @@ -743,8 +774,6 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { qts := dns.TypeToString[q.Qtype] dlog.Debugf(c, "ServeDNS %5d %-6s %s", r.Id, qts, q.Name) - atomic.AddInt64(&s.requestCount, 1) - var err error var rCode int var answer dnsproxy.RRs From 4ec63798f02ef4a7feb4a5ed55130aca253792dd Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 13:15:43 +0100 Subject: [PATCH 04/20] Use the name "routes" instead of "namespaces" for DNS routes. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/resolved_linux.go | 6 +++--- pkg/client/rootd/dns/server.go | 17 ++++++++++++----- pkg/client/rootd/dns/server_darwin.go | 10 +++++----- pkg/client/rootd/dns/server_linux.go | 6 +++--- pkg/client/rootd/dns/server_windows.go | 6 +++--- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pkg/client/rootd/dns/resolved_linux.go b/pkg/client/rootd/dns/resolved_linux.go index 22ed1ce191..347b43680b 100644 --- a/pkg/client/rootd/dns/resolved_linux.go +++ b/pkg/client/rootd/dns/resolved_linux.go @@ -107,13 +107,13 @@ func (s *Server) tryResolveD(c context.Context, dev vif.Device, configureDNS fun } func (s *Server) updateLinkDomains(c context.Context, paths []string, dev vif.Device) error { - namespaces := make(map[string]struct{}) + routes := make(map[string]struct{}) search := make([]string, 0) for i, path := range paths { if strings.ContainsRune(path, '.') { search = append(search, path) } else { - namespaces[path] = struct{}{} + routes[path] = struct{}{} // Turn namespace into a route paths[i] = "~" + path } @@ -125,7 +125,7 @@ func (s *Server) updateLinkDomains(c context.Context, paths []string, dev vif.De s.Lock() paths = append(paths, "~"+s.clusterDomain, tel2SubDomainDot+s.clusterDomain) - s.namespaces = namespaces + s.routes = routes s.search = search s.Unlock() diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index df6b3edf16..e3e5187021 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -73,10 +73,17 @@ type Server struct { cacheResolve func(*dns.Question) (dnsproxy.RRs, int, error) dropSuffixes []string //nolint:unused // only used on linux - // Namespaces, accessible using . - namespaces map[string]struct{} - domains map[string]struct{} - search []string + // routes are typically namespaces, accessible using .. + routes map[string]struct{} + + // search are appended to a query to form new names that are then dispatched to the + // DNS resolver. The act of appending is not performed by this server, but rather + // by the system's DNS resolver before calling on this server. + search []string + + // domains contains the sum of the include-suffixes and routes. It is currently only + // used by the darwin resolver to keep track of files to add or remove. + domains map[string]struct{} // searchPathCh receives requests to change the search path. searchPathCh chan []string @@ -147,7 +154,7 @@ func NewServer(config *rpc.DNSConfig, clusterLookup Resolver) *Server { } s := &Server{ cache: xsync.NewMapOf[cacheKey, *cacheEntry](), - namespaces: make(map[string]struct{}), + routes: make(map[string]struct{}), domains: make(map[string]struct{}), excludes: config.Excludes, excludeSuffixes: config.ExcludeSuffixes, diff --git a/pkg/client/rootd/dns/server_darwin.go b/pkg/client/rootd/dns/server_darwin.go index 3e081ef9d1..dd7c649af1 100644 --- a/pkg/client/rootd/dns/server_darwin.go +++ b/pkg/client/rootd/dns/server_darwin.go @@ -140,19 +140,19 @@ func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolve } // paths that contain a dot are search paths, the ones that don't are namespaces. - namespaces := make(map[string]struct{}) + routes := make(map[string]struct{}) search := make([]string, 0) for _, path := range paths { if strings.ContainsRune(path, '.') { search = append(search, path) } else if path != "" { - namespaces[path] = struct{}{} + routes[path] = struct{}{} } } // All namespaces and include suffixes become domains - domains := make(map[string]struct{}, len(namespaces)+len(s.includeSuffixes)) - maps.Merge(domains, namespaces) + domains := make(map[string]struct{}, len(routes)+len(s.includeSuffixes)) + maps.Merge(domains, routes) for _, sfx := range s.includeSuffixes { domains[strings.TrimPrefix(sfx, ".")] = struct{}{} } @@ -180,7 +180,7 @@ func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolve search = append([]string{tel2SubDomainDot + s.clusterDomain}, search...) s.search = search - s.namespaces = namespaces + s.routes = routes s.domains = domains for _, domain := range removals { diff --git a/pkg/client/rootd/dns/server_linux.go b/pkg/client/rootd/dns/server_linux.go index 5ba51430e0..573a32367a 100644 --- a/pkg/client/rootd/dns/server_linux.go +++ b/pkg/client/rootd/dns/server_linux.go @@ -96,17 +96,17 @@ func (s *Server) runOverridingServer(c context.Context, dev vif.Device) error { defer close(serverDone) // Server will close the listener, so no need to close it here. s.processSearchPaths(g, func(c context.Context, paths []string, _ vif.Device) error { - namespaces := make(map[string]struct{}) + routes := make(map[string]struct{}) search := make([]string, 0) for _, path := range paths { if strings.ContainsRune(path, '.') { search = append(search, path) } else if path != "" { - namespaces[path] = struct{}{} + routes[path] = struct{}{} } } s.Lock() - s.namespaces = namespaces + s.routes = routes s.search = search s.Unlock() s.flushDNS() diff --git a/pkg/client/rootd/dns/server_windows.go b/pkg/client/rootd/dns/server_windows.go index 66e0c86d3d..22fd18b993 100644 --- a/pkg/client/rootd/dns/server_windows.go +++ b/pkg/client/rootd/dns/server_windows.go @@ -43,7 +43,7 @@ func (s *Server) Worker(c context.Context, dev vif.Device, configureDNS func(net } func (s *Server) updateRouterDNS(c context.Context, paths []string, dev vif.Device) error { - namespaces := make(map[string]struct{}) + routes := make(map[string]struct{}) search := []string{ tel2SubDomain + "." + s.clusterDomain, } @@ -52,11 +52,11 @@ func (s *Server) updateRouterDNS(c context.Context, paths []string, dev vif.Devi if strings.ContainsRune(path, '.') { search = append(search, path) } else if path != "" { - namespaces[path] = struct{}{} + routes[path] = struct{}{} } } s.Lock() - s.namespaces = namespaces + s.routes = routes s.search = search err := dev.SetDNS(c, s.clusterDomain, s.remoteIP, search) s.Unlock() From b48a4efe6ded64e8b7fde87ca46a61031db4f082 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 13:32:29 +0100 Subject: [PATCH 05/20] Simplify the flow of calls during DNS resolution. Gets rid of a reassignment of function pointer which wasn't concurrency protected. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/server.go | 67 +++++++++++----------------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index e3e5187021..2781d8f024 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -69,8 +69,7 @@ type Server struct { resolve Resolver requestCount int64 cache *xsync.MapOf[cacheKey, *cacheEntry] - recursive int32 // one of the recursionXXX constants declared above (unique type avoided because it just gets messy with the atomic calls) - cacheResolve func(*dns.Question) (dnsproxy.RRs, int, error) + recursive int32 // one of the recursionXXX constants declared above (unique type avoided because it just gets messy with the atomic calls) dropSuffixes []string //nolint:unused // only used on linux // routes are typically namespaces, accessible using .. @@ -171,7 +170,6 @@ func NewServer(config *rpc.DNSConfig, clusterLookup Resolver) *Server { if lt := config.LookupTimeout; lt != nil { s.lookupTimeout = lt.AsDuration() } - s.cacheResolve = s.resolveWithRecursionCheck return s } @@ -580,15 +578,17 @@ type cacheKey struct { qType uint16 } -// resolveThruCache resolves the given query by first performing a cache lookup. If a cached -// entry is found that hasn't expired, it's returned. If not, this function will call -// resolveQuery() to resolve and store in the case. -func (s *Server) resolveThruCache(q *dns.Question) (dnsproxy.RRs, int, error) { +// resolveWithRecursionCheck is a special version of resolveThruCache which is only used until the +// recursionCheck query has completed, and it has been determined whether a query that is propagated +// to the cluster will recurse back to this resolver or not. +func (s *Server) resolveWithRecursionCheck(q *dns.Question) (dnsproxy.RRs, int, error) { newDv := &cacheEntry{wait: make(chan struct{}), created: time.Now()} key := cacheKey{name: q.Name, qType: q.Qtype} if oldDv, loaded := s.cache.LoadOrStore(key, newDv); loaded { - if atomic.LoadInt32(&s.recursive) == recursionDetected && atomic.LoadInt32(&oldDv.currentQType) == int32(q.Qtype) { - // We have to assume that this is a recursion from the cluster. + if strings.HasPrefix(q.Name, recursionCheck) { + atomic.StoreInt32(&s.recursive, recursionDetected) + } + if atomic.LoadInt32(&s.recursive) == recursionDetected { return nil, dns.RcodeNameError, nil } <-oldDv.wait @@ -607,7 +607,17 @@ func (s *Server) resolveThruCache(q *dns.Question) (dnsproxy.RRs, int, error) { } s.cache.Store(key, newDv) } - return s.resolveQuery(q, newDv) + + answer, rCode, err := s.resolveQuery(q, newDv) + if strings.HasPrefix(q.Name, recursionCheck) { + if atomic.LoadInt32(&s.recursive) == recursionDetected { + dlog.Debug(s.ctx, "DNS resolver is recursive") + } else { + atomic.StoreInt32(&s.recursive, recursionNotDetected) + dlog.Debug(s.ctx, "DNS resolver is not recursive") + } + } + return answer, rCode, err } func (s *Server) resolveThruCacheWithUnqualifiedHostName(q *dns.Question) (dnsproxy.RRs, int, error) { @@ -626,7 +636,7 @@ func (s *Server) resolveThruCacheWithUnqualifiedHostName(q *dns.Question) (dnspr q.Name = uhm } - rrs, rCode, err := s.resolveThruCache(q) + rrs, rCode, err := s.resolveWithRecursionCheck(q) // Restore the original query name which was stripped before, or the response won't be formed correctly. if uhm != "" { @@ -639,39 +649,6 @@ func (s *Server) resolveThruCacheWithUnqualifiedHostName(q *dns.Question) (dnspr return rrs, rCode, err } -// resolveWithRecursionCheck is a special version of resolveThruCache which is only used until the -// recursionCheck query has completed, and it has been determined whether a query that is propagated -// to the cluster will recurse back to this resolver or not. -func (s *Server) resolveWithRecursionCheck(q *dns.Question) (dnsproxy.RRs, int, error) { - newDv := &cacheEntry{wait: make(chan struct{}), created: time.Now()} - key := cacheKey{name: q.Name, qType: q.Qtype} - if oldDv, loaded := s.cache.LoadOrStore(key, newDv); loaded { - if strings.HasPrefix(q.Name, recursionCheck) { - atomic.StoreInt32(&s.recursive, recursionDetected) - } - if atomic.LoadInt32(&s.recursive) == recursionDetected { - return nil, dns.RcodeNameError, nil - } - <-oldDv.wait - if !oldDv.expired() { - return copyRRs(oldDv.answer, []uint16{q.Qtype}), oldDv.rCode, nil - } - s.cache.Store(key, newDv) - } - - answer, rCode, err := s.resolveQuery(q, newDv) - if strings.HasPrefix(q.Name, recursionCheck) { - if atomic.LoadInt32(&s.recursive) == recursionDetected { - dlog.Debug(s.ctx, "DNS resolver is recursive") - } else { - atomic.StoreInt32(&s.recursive, recursionNotDetected) - dlog.Debug(s.ctx, "DNS resolver is not recursive") - } - s.cacheResolve = s.resolveThruCacheWithUnqualifiedHostName - } - return answer, rCode, err -} - // dfs is a func that implements the fmt.Stringer interface. Used in log statements to ensure // that the function isn't evaluated until the log output is formatted (which will happen only // if the given loglevel is enabled). @@ -801,7 +778,7 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { msg.SetRcode(r, dns.RcodeNotImplemented) return } - answer, rCode, err = s.cacheResolve(q) + answer, rCode, err = s.resolveThruCacheWithUnqualifiedHostName(q) if err == nil && rCode == dns.RcodeSuccess { msg = new(dns.Msg) From 187a6d28b05edc4cf43a6bf07a3710ffd6de9511 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 13:37:37 +0100 Subject: [PATCH 06/20] Use an exported constant for DNS DefaultExcludeSuffixes. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/server.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index 2781d8f024..dd7891f854 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -61,6 +61,15 @@ const ( recursionTestInProgress ) +//nolint:gochecknoglobals // constant +var DefaultExcludeSuffixes = []string{ + ".com", + ".io", + ".net", + ".org", + ".ru", +} + // Server is a DNS server which implements the github.com/miekg/dns Handler interface. type Server struct { sync.RWMutex @@ -140,13 +149,7 @@ func NewServer(config *rpc.DNSConfig, clusterLookup Resolver) *Server { config = &rpc.DNSConfig{} } if len(config.ExcludeSuffixes) == 0 { - config.ExcludeSuffixes = []string{ - ".com", - ".io", - ".net", - ".org", - ".ru", - } + config.ExcludeSuffixes = DefaultExcludeSuffixes } if config.LookupTimeout.AsDuration() <= 0 { config.LookupTimeout = durationpb.New(8 * time.Second) From 49a0878d467e37f37d9d2aeac632d06d3730c019 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 13:49:05 +0100 Subject: [PATCH 07/20] Let clusters LookupIP return NXNAME when it finds zero IPs or errors Signed-off-by: Thomas Hallgren --- pkg/client/rootd/session.go | 14 ++++++++++++-- pkg/dnsproxy/lookup.go | 11 +++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pkg/client/rootd/session.go b/pkg/client/rootd/session.go index 0ab2d486df..81c10ee663 100644 --- a/pkg/client/rootd/session.go +++ b/pkg/client/rootd/session.go @@ -383,10 +383,20 @@ func (s *Session) clusterLookup(ctx context.Context, q *dns2.Question) (dnsproxy }) if err != nil { s.dnsFailures++ - return nil, dns2.RcodeServerFailure, err + rCode := dns2.RcodeServerFailure + switch status.Code(err) { + case codes.Unavailable, codes.DeadlineExceeded: + rCode = dns2.RcodeNameError + err = nil + } + return nil, rCode, err } answer, rCode, err := dnsproxy.FromRPC(r) - if err == nil && len(s.localTranslationSubnets) > 0 { + if err != nil { + s.dnsFailures++ + return nil, dns2.RcodeServerFailure, err + } + if len(s.localTranslationSubnets) > 0 { for _, rr := range answer { switch rr := rr.(type) { case *dns2.A: diff --git a/pkg/dnsproxy/lookup.go b/pkg/dnsproxy/lookup.go index d8521d1c05..4a3daded45 100644 --- a/pkg/dnsproxy/lookup.go +++ b/pkg/dnsproxy/lookup.go @@ -170,6 +170,13 @@ func lookupIP(ctx context.Context, network, qName string, r *net.Resolver) ([]ne dlog.Errorf(ctx, "LookupIP failed, trying LookupIP %q", qName) ips, err = r.LookupIP(ctx, network, qName) } + if err == nil && len(ips) == 0 { + err = &net.DNSError{ + Err: "no such host", + Name: name, + IsNotFound: true, + } + } return ips, err } @@ -180,9 +187,9 @@ func makeError(err error) (RRs, int, error) { case dnsErr.IsNotFound: return nil, dns.RcodeNameError, nil case dnsErr.IsTemporary: - return nil, dns.RcodeServerFailure, status.Error(codes.Unavailable, dnsErr.Error()) + return nil, dns.RcodeNameError, status.Error(codes.Unavailable, dnsErr.Error()) case dnsErr.IsTimeout: - return nil, dns.RcodeServerFailure, status.Error(codes.DeadlineExceeded, dnsErr.Error()) + return nil, dns.RcodeNameError, status.Error(codes.DeadlineExceeded, dnsErr.Error()) } } return nil, dns.RcodeServerFailure, status.Error(codes.Internal, err.Error()) From ca316141c06a86f995d5b55b7cf17d668f1d494e Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 13:50:29 +0100 Subject: [PATCH 08/20] An intercept no longer changes DNS, so there's no point to wait for it. Historically, the Telepresence DNS would not find single-label service names until an intercept was active, and code was in place to verify that this DNS-change took place. Now, with a connection tied to a specific namespace, this no longer happens and the wait is therefore pointless. Signed-off-by: Thomas Hallgren --- pkg/client/userd/trafficmgr/intercept.go | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/pkg/client/userd/trafficmgr/intercept.go b/pkg/client/userd/trafficmgr/intercept.go index 011b01b470..7d83839fc8 100644 --- a/pkg/client/userd/trafficmgr/intercept.go +++ b/pkg/client/userd/trafficmgr/intercept.go @@ -18,7 +18,6 @@ import ( "github.com/datawire/dlib/dgroup" "github.com/datawire/dlib/dlog" - "github.com/datawire/dlib/dtime" "github.com/telepresenceio/telepresence/rpc/v2/common" rpc "github.com/telepresenceio/telepresence/rpc/v2/connector" "github.com/telepresenceio/telepresence/rpc/v2/daemon" @@ -28,7 +27,6 @@ import ( "github.com/telepresenceio/telepresence/v2/pkg/client/docker" "github.com/telepresenceio/telepresence/v2/pkg/client/remotefs" "github.com/telepresenceio/telepresence/v2/pkg/client/userd" - "github.com/telepresenceio/telepresence/v2/pkg/dnsproxy" "github.com/telepresenceio/telepresence/v2/pkg/errcat" "github.com/telepresenceio/telepresence/v2/pkg/forwarder" "github.com/telepresenceio/telepresence/v2/pkg/iputil" @@ -641,9 +639,6 @@ func (s *session) AddIntercept(c context.Context, ir *rpc.CreateInterceptRequest continue } result.InterceptInfo = ii - if !waitForDNS(c, spec.ServiceName) { - dlog.Warningf(c, "DNS cannot resolve name of intercepted %q service", spec.ServiceName) - } select { case <-c.Done(): return InterceptError(common.InterceptError_FAILED_TO_ESTABLISH, client.CheckTimeout(c, c.Err())) @@ -666,21 +661,6 @@ func (s *session) InterceptEpilog(context.Context, *rpc.CreateInterceptRequest, return nil } -func waitForDNS(c context.Context, host string) bool { - c, cancel := context.WithTimeout(c, 12*time.Second) - defer cancel() - for c.Err() == nil { - dtime.SleepWithContext(c, 200*time.Millisecond) - dlog.Debugf(c, "Attempting to resolve DNS for %s", host) - ips := dnsproxy.TimedExternalLookup(c, host, 5*time.Second) - if len(ips) > 0 { - dlog.Debugf(c, "Attempt succeeded, DNS for %s is %v", host, ips) - return true - } - } - return false -} - // RemoveIntercept removes one intercept by name. func (s *session) RemoveIntercept(c context.Context, name string) error { dlog.Debugf(c, "Removing intercept %s", name) From ed464d764c84a94af1335b250c070193b9f9f07d Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 14:07:44 +0100 Subject: [PATCH 09/20] Unify handling of search paths for all OSes. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/resolved_linux.go | 42 +++++++++++----------- pkg/client/rootd/dns/server.go | 49 +++++++++++++------------- pkg/client/rootd/dns/server_darwin.go | 35 +++++------------- pkg/client/rootd/dns/server_linux.go | 20 +++-------- pkg/client/rootd/dns/server_windows.go | 24 ++++--------- 5 files changed, 68 insertions(+), 102 deletions(-) diff --git a/pkg/client/rootd/dns/resolved_linux.go b/pkg/client/rootd/dns/resolved_linux.go index 347b43680b..971cb8caed 100644 --- a/pkg/client/rootd/dns/resolved_linux.go +++ b/pkg/client/rootd/dns/resolved_linux.go @@ -64,8 +64,11 @@ func (s *Server) tryResolveD(c context.Context, dev vif.Device, configureDNS fun // Some installation have default DNS configured with ~. routing path. // If two interfaces with DefaultRoute: yes present, the one with the // routing key used and SanityCheck fails. Hence, tel2SubDomain - // must be used as a routing key. - if err = s.updateLinkDomains(c, []string{tel2SubDomain}, dev); err != nil { + // must be used as a route. + s.Lock() + s.routes = map[string]struct{}{tel2SubDomain: {}} + s.Unlock() + if err = s.updateLinkDomains(c, dev); err != nil { dlog.Error(c, err) initDone <- struct{}{} return errResolveDNotConfigured @@ -106,27 +109,26 @@ func (s *Server) tryResolveD(c context.Context, dev vif.Device, configureDNS fun return g.Wait() } -func (s *Server) updateLinkDomains(c context.Context, paths []string, dev vif.Device) error { - routes := make(map[string]struct{}) - search := make([]string, 0) - for i, path := range paths { - if strings.ContainsRune(path, '.') { - search = append(search, path) - } else { - routes[path] = struct{}{} - // Turn namespace into a route - paths[i] = "~" + path - } +func (s *Server) updateLinkDomains(c context.Context, dev vif.Device) error { + s.Lock() + paths := make([]string, len(s.routes)+len(s.search)+len(s.includeSuffixes)+1) + + // Namespaces are copied verbatim. Entries that aren't prefixed with "~" are considered search path entries. + copy(paths, s.search) + i := len(s.search) + for ns := range s.routes { + paths[i] = "~" + ns + i++ } + + // Include-suffixes are routes, i.e. in contrast to search paths, they are never appended to the name, but + // used as a filter that will direct queries for names ending with them to this resolver. Routes must be + // prefixed with "~". for _, sfx := range s.includeSuffixes { - paths = append(paths, "~"+strings.TrimPrefix(sfx, ".")) + paths[i] = "~" + strings.TrimPrefix(sfx, ".") + i++ } - - s.Lock() - paths = append(paths, "~"+s.clusterDomain, tel2SubDomainDot+s.clusterDomain) - - s.routes = routes - s.search = search + paths[i] = "~" + s.clusterDomain s.Unlock() if err := dbus.SetLinkDomains(dcontext.HardContext(c), int(dev.Index()), paths...); err != nil { diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index dd7891f854..dd52a68a00 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -488,23 +488,10 @@ func newLocalUDPListener(c context.Context) (net.PacketConn, error) { return lc.ListenPacket(c, "udp", "127.0.0.1:0") } -func (s *Server) processSearchPaths(g *dgroup.Group, processor func(context.Context, []string, vif.Device) error, dev vif.Device) { - g.Go("RecursionCheck", func(c context.Context) error { - if dev != nil { - s.RLock() - _ = dev.SetDNS(c, s.clusterDomain, s.remoteIP, []string{tel2SubDomain}) - s.RUnlock() - } - if runtime.GOOS == "windows" { - // Give the DNS setting some time to take effect. - dtime.SleepWithContext(c, 500*time.Millisecond) - } - s.performRecursionCheck(c) - return nil - }) - +func (s *Server) processSearchPaths(g *dgroup.Group, processor func(context.Context, vif.Device) error, dev vif.Device) { g.Go("SearchPaths", func(c context.Context) error { - var prevPaths []string + s.performRecursionCheck(c) + prevPaths := s.search unchanged := func(paths []string) bool { if len(paths) != len(prevPaths) { return false @@ -522,18 +509,32 @@ func (s *Server) processSearchPaths(g *dgroup.Group, processor func(context.Cont case <-c.Done(): return nil case paths := <-s.searchPathCh: - if len(s.searchPathCh) > 0 { - // Only interested in the last one + // Only interested in the last one, and only if it differs + if len(s.searchPathCh) > 0 || unchanged(paths) { continue } - if !unchanged(paths) { - dlog.Debugf(c, "%v -> %v", prevPaths, paths) - prevPaths = make([]string, len(paths)) - copy(prevPaths, paths) - if err := processor(c, paths, dev); err != nil { - return err + + dlog.Debugf(c, "%v -> %v", prevPaths, paths) + prevPaths = make([]string, len(paths)) + copy(prevPaths, paths) + + routes := make(map[string]struct{}) + search := make([]string, 0) + for _, path := range paths { + if path == tel2SubDomain || strings.ContainsRune(path, '.') { + search = append(search, path) + } else if path != "" { + routes[path] = struct{}{} } } + s.Lock() + s.routes = routes + s.search = search + s.Unlock() + + if err := processor(c, dev); err != nil { + return err + } } } }) diff --git a/pkg/client/rootd/dns/server_darwin.go b/pkg/client/rootd/dns/server_darwin.go index dd7c649af1..04123c9d54 100644 --- a/pkg/client/rootd/dns/server_darwin.go +++ b/pkg/client/rootd/dns/server_darwin.go @@ -105,8 +105,8 @@ func (s *Server) Worker(c context.Context, dev vif.Device, configureDNS func(net // Start local DNS server g := dgroup.NewGroup(c, dgroup.GroupConfig{}) g.Go("Server", func(c context.Context) error { - s.processSearchPaths(g, func(c context.Context, paths []string, _ vif.Device) error { - return s.updateResolverFiles(c, resolverDirName, resolverFileName, dnsAddr, paths) + s.processSearchPaths(g, func(c context.Context, _ vif.Device) error { + return s.updateResolverFiles(c, resolverDirName, resolverFileName, dnsAddr) }, dev) // Server will close the listener, so no need to close it here. return s.Run(c, make(chan struct{}), []net.PacketConn{listener}, nil, s.resolveInCluster) @@ -132,34 +132,22 @@ func (s *Server) removeResolverFiles(c context.Context, resolverDirName string) return nil } -func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolverFileName string, dnsAddr *net.UDPAddr, paths []string) error { - dlog.Infof(c, "setting search paths %s", strings.Join(paths, " ")) +func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolverFileName string, dnsAddr *net.UDPAddr) error { rf, err := readResolveFile(resolverFileName) if err != nil { return err } + s.Lock() + defer s.Unlock() - // paths that contain a dot are search paths, the ones that don't are namespaces. - routes := make(map[string]struct{}) - search := make([]string, 0) - for _, path := range paths { - if strings.ContainsRune(path, '.') { - search = append(search, path) - } else if path != "" { - routes[path] = struct{}{} - } - } + // All routes and include suffixes become domains - // All namespaces and include suffixes become domains - domains := make(map[string]struct{}, len(routes)+len(s.includeSuffixes)) - maps.Merge(domains, routes) + domains := make(map[string]struct{}, len(s.routes)+len(s.includeSuffixes)) + maps.Merge(domains, s.routes) for _, sfx := range s.includeSuffixes { domains[strings.TrimPrefix(sfx, ".")] = struct{}{} } - s.Lock() - defer s.Unlock() - // On Darwin, we provide resolution of NAME.NAMESPACE by adding one domain // for each namespace in its own domain file under /etc/resolver. Each file // is named "telepresence..local" @@ -176,11 +164,6 @@ func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolve additions = append(additions, domain) } } - - search = append([]string{tel2SubDomainDot + s.clusterDomain}, search...) - - s.search = search - s.routes = routes s.domains = domains for _, domain := range removals { @@ -203,7 +186,7 @@ func (s *Server) updateResolverFiles(c context.Context, resolverDirName, resolve } } - rf.setSearchPaths(search...) + rf.setSearchPaths(s.search...) // Versions prior to Big Sur will not trigger an update unless the resolver file // is removed and recreated. diff --git a/pkg/client/rootd/dns/server_linux.go b/pkg/client/rootd/dns/server_linux.go index 573a32367a..eea0d92fbf 100644 --- a/pkg/client/rootd/dns/server_linux.go +++ b/pkg/client/rootd/dns/server_linux.go @@ -23,7 +23,10 @@ import ( const ( maxRecursionTestRetries = 10 - recursionTestTimeout = 500 * time.Millisecond + + // We use a fairly short delay here because if DNS recursion is a thing, then the cluster's DNS-server + // has access to the caller host's network, so it runs locally in a Docker container or similar. + recursionTestTimeout = 200 * time.Millisecond ) var errResolveDNotConfigured = errors.New("resolved not configured") @@ -95,20 +98,7 @@ func (s *Server) runOverridingServer(c context.Context, dev vif.Device) error { g.Go("Server", func(c context.Context) error { defer close(serverDone) // Server will close the listener, so no need to close it here. - s.processSearchPaths(g, func(c context.Context, paths []string, _ vif.Device) error { - routes := make(map[string]struct{}) - search := make([]string, 0) - for _, path := range paths { - if strings.ContainsRune(path, '.') { - search = append(search, path) - } else if path != "" { - routes[path] = struct{}{} - } - } - s.Lock() - s.routes = routes - s.search = search - s.Unlock() + s.processSearchPaths(g, func(c context.Context, _ vif.Device) error { s.flushDNS() return nil }, dev) diff --git a/pkg/client/rootd/dns/server_windows.go b/pkg/client/rootd/dns/server_windows.go index 22fd18b993..080891c1db 100644 --- a/pkg/client/rootd/dns/server_windows.go +++ b/pkg/client/rootd/dns/server_windows.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "net" - "strings" "time" "github.com/datawire/dlib/dgroup" @@ -33,32 +32,23 @@ func (s *Server) Worker(c context.Context, dev vif.Device, configureDNS func(net // No need to close listener. It's closed by the dns server. defer func() { c, cancel := context.WithTimeout(context.WithoutCancel(c), 5*time.Second) + s.Lock() _ = dev.SetDNS(c, s.clusterDomain, s.remoteIP, nil) + s.Unlock() cancel() }() + if err := s.updateRouterDNS(c, dev); err != nil { + return err + } s.processSearchPaths(g, s.updateRouterDNS, dev) return s.Run(c, make(chan struct{}), []net.PacketConn{listener}, nil, s.resolveInCluster) }) return g.Wait() } -func (s *Server) updateRouterDNS(c context.Context, paths []string, dev vif.Device) error { - routes := make(map[string]struct{}) - search := []string{ - tel2SubDomain + "." + s.clusterDomain, - } - - for _, path := range paths { - if strings.ContainsRune(path, '.') { - search = append(search, path) - } else if path != "" { - routes[path] = struct{}{} - } - } +func (s *Server) updateRouterDNS(c context.Context, dev vif.Device) error { s.Lock() - s.routes = routes - s.search = search - err := dev.SetDNS(c, s.clusterDomain, s.remoteIP, search) + err := dev.SetDNS(c, s.clusterDomain, s.remoteIP, s.search) s.Unlock() s.flushDNS() if err != nil { From 9a0320489201812f22934fe58e70d2ae6c40a41a Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 14:13:19 +0100 Subject: [PATCH 10/20] Ensure that iptables-legacy is imported to the telepresence client image Also adds logging to IP-tables command used by overriding DNS resolver. Signed-off-by: Thomas Hallgren --- build-aux/docker/images/Dockerfile.client | 4 +++- pkg/client/rootd/dns/server_linux.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build-aux/docker/images/Dockerfile.client b/build-aux/docker/images/Dockerfile.client index 87b41b7ca8..87064d0d90 100644 --- a/build-aux/docker/images/Dockerfile.client +++ b/build-aux/docker/images/Dockerfile.client @@ -41,7 +41,9 @@ RUN setcap 'cap_net_bind_service+ep' /usr/local/bin/telepresence # The telepresence target is the one that gets published. It aims to be a small as possible. FROM alpine as telepresence -RUN apk add --no-cache ca-certificates iptables bash +RUN apk add --no-cache ca-certificates iptables iptables-legacy bash +RUN rm /sbin/iptables && ln -s /sbin/iptables-legacy /sbin/iptables +RUN rm /sbin/ip6tables && ln -s /sbin/ip6tables-legacy /sbin/ip6tables # the telepresence binary COPY --from=telepresence-build /usr/local/bin/telepresence /usr/local/bin diff --git a/pkg/client/rootd/dns/server_linux.go b/pkg/client/rootd/dns/server_linux.go index eea0d92fbf..f5f0d95366 100644 --- a/pkg/client/rootd/dns/server_linux.go +++ b/pkg/client/rootd/dns/server_linux.go @@ -239,7 +239,7 @@ func runNatTableCmd(c context.Context, args ...string) error { // want to leave things in a half-cleaned-up state. args = append([]string{"-t", "nat"}, args...) cmd := dexec.CommandContext(c, "iptables", args...) - cmd.DisableLogging = true + cmd.DisableLogging = dlog.MaxLogLevel(c) < dlog.LogLevelDebug dlog.Debug(c, shellquote.ShellString("iptables", args)) return cmd.Run() } From bc7cb29fd3e67de8ca5bfaafb9bdad2bba103d70 Mon Sep 17 00:00:00 2001 From: Thomas Hallgren Date: Wed, 17 Jan 2024 14:16:46 +0100 Subject: [PATCH 11/20] Improve DNS-servers handling of recursion and the tel2-search domain. Rewrites part of the logic to make it more consistent. The tel2-search domain is now caught early on and removed so that the remaining logic can safely disregard it. More states were added to the recursion detection logic to make it more reliable. Signed-off-by: Thomas Hallgren --- pkg/client/rootd/dns/server.go | 662 +++++++++++++++------------- pkg/client/rootd/dns/server_test.go | 71 ++- 2 files changed, 404 insertions(+), 329 deletions(-) diff --git a/pkg/client/rootd/dns/server.go b/pkg/client/rootd/dns/server.go index dd52a68a00..e5b85627de 100644 --- a/pkg/client/rootd/dns/server.go +++ b/pkg/client/rootd/dns/server.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "net" - "runtime" "slices" "strings" "sync" @@ -33,11 +32,6 @@ import ( type Resolver func(context.Context, *dns.Question) (dnsproxy.RRs, int, error) const ( - // recursionCheck is a special host name in a well known namespace that isn't expected to exist. It - // is used once for determining if the cluster's DNS resolver will call the Telepresence DNS resolver - // recursively. This is common when the cluster is running on the local host (k3s in docker for instance). - recursionCheck = "tel2-recursion-check.kube-system." - // defaultClusterDomain used unless traffic-manager reports otherwise. defaultClusterDomain = "cluster.local." @@ -45,6 +39,10 @@ const ( // in an increase of the requestCount but always yield an NXDOMAIN reply. santiyCheck = "jhfweoitnkgyeta." + tel2SubDomain santiyCheckDot = santiyCheck + "." + + // dnsTTL is the number of seconds that a found DNS record should be allowed to live in the callers cache. We + // keep this low to avoid such caching. + dnsTTL = 4 ) type FallbackPool interface { @@ -56,9 +54,10 @@ type FallbackPool interface { const ( _ = int32(iota) + recursionQueryNotYetReceived + recursionQueryReceived recursionNotDetected recursionDetected - recursionTestInProgress ) //nolint:gochecknoglobals // constant @@ -199,71 +198,89 @@ var ( localhostIPv6 = net.IP{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} //nolint:gochecknoglobals // constant ) -func (s *Server) tel2SubDomainHostname(query string) string { - // Has to be trimmed since value for an unqualified hostname can be : - // - .tel2-search.cluster.local - s.RLock() - uqHostSuffix := "." + tel2SubDomainDot + s.clusterDomain - s.RUnlock() - if strings.HasSuffix(query, uqHostSuffix) { - return strings.TrimSuffix(query, uqHostSuffix) + "." - } - return "" -} - func (s *Server) shouldDoClusterLookup(query string) bool { + name := query[:len(query)-1] // skip last dot if strings.HasPrefix(query, wpadDot) { // Reject "wpad.*" + dlog.Debugf(s.ctx, `Cluster DNS excluded by exclude-prefix "wpad." for name %q`, name) return false } - if strings.HasSuffix(query, "."+s.clusterDomain) && strings.Count(query, ".") < 4 { - // Reject "