istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/dns/client/dns.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package client
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net"
    21  	"net/netip"
    22  	"os"
    23  	"strings"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/google/uuid"
    28  	"github.com/miekg/dns"
    29  
    30  	"istio.io/istio/pilot/pkg/serviceregistry/provider"
    31  	"istio.io/istio/pkg/config/host"
    32  	dnsProto "istio.io/istio/pkg/dns/proto"
    33  	istiolog "istio.io/istio/pkg/log"
    34  	netutil "istio.io/istio/pkg/util/net"
    35  	"istio.io/istio/pkg/util/sets"
    36  )
    37  
    38  var log = istiolog.RegisterScope("dns", "Istio DNS proxy")
    39  
    40  // LocalDNSServer holds configurations for the DNS downstreamUDPServer in Istio Agent
    41  type LocalDNSServer struct {
    42  	// Holds the pointer to the DNS lookup table
    43  	lookupTable atomic.Value
    44  
    45  	// nameTable holds the original NameTable, for debugging
    46  	nameTable atomic.Value
    47  
    48  	dnsProxies []*dnsProxy
    49  
    50  	resolvConfServers []string
    51  	searchNamespaces  []string
    52  	// The namespace where the proxy resides
    53  	// determines the hosts used for shortname resolution
    54  	proxyNamespace string
    55  	// Optimizations to save space and time
    56  	proxyDomain      string
    57  	proxyDomainParts []string
    58  
    59  	respondBeforeSync         bool
    60  	forwardToUpstreamParallel bool
    61  }
    62  
    63  // LookupTable is borrowed from https://github.com/coredns/coredns/blob/master/plugin/hosts/hostsfile.go
    64  type LookupTable struct {
    65  	// This table will be first looked up to see if the host is something that we got a Nametable entry for
    66  	// (i.e. came from istiod's service registry). If it is, then we will be able to confidently return
    67  	// NXDOMAIN errors for AAAA records for such hosts when only A records exist (or vice versa). If the
    68  	// host does not exist in this map, then we will return nil, causing the caller to query the upstream
    69  	// DNS server to resolve the host. Without this map, we would end up making unnecessary upstream DNS queries
    70  	// for hosts that will never resolve (e.g., AAAA for svc1.ns1.svc.cluster.local.svc.cluster.local.)
    71  	allHosts sets.String
    72  
    73  	// The key is a FQDN matching a DNS query (like example.com.), the value is pre-created DNS RR records
    74  	// of A or AAAA type as appropriate.
    75  	name4 map[string][]dns.RR
    76  	name6 map[string][]dns.RR
    77  	// The cname records here (comprised of different variants of the hosts above,
    78  	// expanded by the search namespaces) pointing to the actual host.
    79  	cname map[string][]dns.RR
    80  }
    81  
    82  const (
    83  	// In case the client decides to honor the TTL, keep it low so that we can always serve
    84  	// the latest IP for a host.
    85  	// TODO: make it configurable
    86  	defaultTTLInSeconds = 30
    87  )
    88  
    89  func NewLocalDNSServer(proxyNamespace, proxyDomain string, addr string, forwardToUpstreamParallel bool) (*LocalDNSServer, error) {
    90  	h := &LocalDNSServer{
    91  		proxyNamespace:            proxyNamespace,
    92  		forwardToUpstreamParallel: forwardToUpstreamParallel,
    93  	}
    94  
    95  	// proxyDomain could contain the namespace making it redundant.
    96  	// we just need the .svc.cluster.local piece
    97  	parts := strings.Split(proxyDomain, ".")
    98  	if len(parts) > 0 {
    99  		if parts[0] == proxyNamespace {
   100  			parts = parts[1:]
   101  		}
   102  		h.proxyDomainParts = parts
   103  		h.proxyDomain = strings.Join(parts, ".")
   104  	}
   105  
   106  	resolvConf := "/etc/resolv.conf"
   107  	// If running as root and the alternate resolv.conf file exists, use it instead.
   108  	// This is used when running in Docker or VMs, without iptables DNS interception.
   109  	if strings.HasSuffix(addr, ":53") {
   110  		if os.Getuid() == 0 {
   111  			h.respondBeforeSync = true
   112  			// TODO: we can also copy /etc/resolv.conf to /var/lib/istio/resolv.conf and
   113  			// replace it with 'nameserver 127.0.0.1'
   114  			if _, err := os.Stat("/var/lib/istio/resolv.conf"); !os.IsNotExist(err) {
   115  				resolvConf = "/var/lib/istio/resolv.conf"
   116  			}
   117  		} else {
   118  			log.Error("DNS address :53 and not running as root, use default")
   119  			addr = "localhost:15053"
   120  		}
   121  	}
   122  
   123  	// We will use the local resolv.conf for resolving unknown names.
   124  	dnsConfig, err := dns.ClientConfigFromFile(resolvConf)
   125  	if err != nil {
   126  		log.Warnf("failed to load %s: %v", resolvConf, err)
   127  		return nil, err
   128  	}
   129  
   130  	// Unlike traditional DNS resolvers, we do not need to append the search
   131  	// namespace to a given query and try to resolve it. This is because the
   132  	// agent acts as a DNS interceptor for DNS queries made by the application.
   133  	// The application's resolver is already sending us DNS queries, one for each
   134  	// of the DNS search namespaces. We simply need to check the existence of this
   135  	// name in our local nametable. If not, we will forward the query to the
   136  	// upstream resolvers as is.
   137  	if dnsConfig != nil {
   138  		for _, s := range dnsConfig.Servers {
   139  			h.resolvConfServers = append(h.resolvConfServers, net.JoinHostPort(s, dnsConfig.Port))
   140  		}
   141  		h.searchNamespaces = dnsConfig.Search
   142  	}
   143  
   144  	log.WithLabels("search", h.searchNamespaces, "servers", h.resolvConfServers).Debugf("initialized DNS")
   145  
   146  	if addr == "" {
   147  		addr = "localhost:15053"
   148  	}
   149  	v4, v6 := netutil.ParseIPsSplitToV4V6(dnsConfig.Servers)
   150  	host, port, err := net.SplitHostPort(addr)
   151  	if err != nil {
   152  		return nil, fmt.Errorf("dns address must be a valid host:port: %v", err)
   153  	}
   154  	addresses := []string{addr}
   155  	if host == "localhost" && len(v4)+len(v6) > 0 {
   156  		addresses = []string{}
   157  		// When binding to "localhost", go will pick v4 OR v6. In dual stack, we may need v4 AND v6.
   158  		// If we are in this situation, explicitly listen to v4, v6, or both.
   159  		if len(v4) > 0 {
   160  			addresses = append(addresses, net.JoinHostPort("127.0.0.1", port))
   161  		}
   162  		if len(v6) > 0 {
   163  			addresses = append(addresses, net.JoinHostPort("::1", port))
   164  		}
   165  	}
   166  	for _, ipAddr := range addresses {
   167  		for _, proto := range []string{"udp", "tcp"} {
   168  			proxy, err := newDNSProxy(proto, ipAddr, h)
   169  			if err != nil {
   170  				return nil, err
   171  			}
   172  			h.dnsProxies = append(h.dnsProxies, proxy)
   173  
   174  		}
   175  	}
   176  
   177  	return h, nil
   178  }
   179  
   180  // StartDNS starts DNS-over-UDP and DNS-over-TCP servers.
   181  func (h *LocalDNSServer) StartDNS() {
   182  	for _, p := range h.dnsProxies {
   183  		go p.start()
   184  	}
   185  }
   186  
   187  func (h *LocalDNSServer) UpdateLookupTable(nt *dnsProto.NameTable) {
   188  	lookupTable := &LookupTable{
   189  		allHosts: sets.String{},
   190  		name4:    map[string][]dns.RR{},
   191  		name6:    map[string][]dns.RR{},
   192  		cname:    map[string][]dns.RR{},
   193  	}
   194  	h.BuildAlternateHosts(nt, lookupTable.buildDNSAnswers)
   195  	h.lookupTable.Store(lookupTable)
   196  	h.nameTable.Store(nt)
   197  	log.Debugf("updated lookup table with %d hosts", len(lookupTable.allHosts))
   198  }
   199  
   200  // BuildAlternateHosts builds alternate hosts for Kubernetes services in the name table and
   201  // calls the passed in function with the built alternate hosts.
   202  func (h *LocalDNSServer) BuildAlternateHosts(nt *dnsProto.NameTable,
   203  	apply func(map[string]struct{}, []netip.Addr, []netip.Addr, []string),
   204  ) {
   205  	for hostname, ni := range nt.Table {
   206  		// Given a host
   207  		// if its a non-k8s host, store the host+. as the key with the pre-computed DNS RR records
   208  		// if its a k8s host, store all variants (i.e. shortname+., shortname+namespace+., fqdn+., etc.)
   209  		// shortname+. is only for hosts in current namespace
   210  		var altHosts sets.String
   211  		if ni.Registry == string(provider.Kubernetes) {
   212  			altHosts = generateAltHosts(hostname, ni, h.proxyNamespace, h.proxyDomain, h.proxyDomainParts)
   213  		} else {
   214  			if !strings.HasSuffix(hostname, ".") {
   215  				hostname += "."
   216  			}
   217  			altHosts = sets.New(hostname)
   218  		}
   219  		ipv4, ipv6 := netutil.ParseIPsSplitToV4V6(ni.Ips)
   220  		if len(ipv6) == 0 && len(ipv4) == 0 {
   221  			// malformed ips
   222  			continue
   223  		}
   224  		apply(altHosts, ipv4, ipv6, h.searchNamespaces)
   225  	}
   226  }
   227  
   228  // upstream sends the request to the upstream server, with associated logs and metrics
   229  func (h *LocalDNSServer) upstream(proxy *dnsProxy, req *dns.Msg, hostname string) *dns.Msg {
   230  	upstreamRequests.Increment()
   231  	start := time.Now()
   232  	// We did not find the host in our internal cache. Query upstream and return the response as is.
   233  	log.Debugf("response for hostname %q not found in dns proxy, querying upstream", hostname)
   234  	response := h.queryUpstream(proxy.upstreamClient, req, log)
   235  	requestDuration.Record(time.Since(start).Seconds())
   236  	log.Debugf("upstream response for hostname %q : %v", hostname, response)
   237  	return response
   238  }
   239  
   240  // ServeDNS is the implementation of DNS interface
   241  func (h *LocalDNSServer) ServeDNS(proxy *dnsProxy, w dns.ResponseWriter, req *dns.Msg) {
   242  	requests.Increment()
   243  	var response *dns.Msg
   244  	log := log.WithLabels("protocol", proxy.protocol, "edns", req.IsEdns0() != nil)
   245  	if log.DebugEnabled() {
   246  		id := uuid.New()
   247  		log = log.WithLabels("id", id)
   248  	}
   249  	log.Debugf("request %v", req)
   250  
   251  	if len(req.Question) == 0 {
   252  		response = new(dns.Msg)
   253  		response.SetReply(req)
   254  		response.Rcode = dns.RcodeServerFailure
   255  		_ = w.WriteMsg(response)
   256  		return
   257  	}
   258  
   259  	lp := h.lookupTable.Load()
   260  	hostname := strings.ToLower(req.Question[0].Name)
   261  	if lp == nil {
   262  		if h.respondBeforeSync {
   263  			response = h.upstream(proxy, req, hostname)
   264  			response.Truncate(size(proxy.protocol, req))
   265  			_ = w.WriteMsg(response)
   266  		} else {
   267  			log.Debugf("dns request for host %q before lookup table is loaded", hostname)
   268  			response = new(dns.Msg)
   269  			response.SetReply(req)
   270  			response.Rcode = dns.RcodeServerFailure
   271  			_ = w.WriteMsg(response)
   272  		}
   273  		return
   274  	}
   275  	lookupTable := lp.(*LookupTable)
   276  	var answers []dns.RR
   277  
   278  	// This name will always end in a dot.
   279  	// We expect only one question in the query even though the spec allows many
   280  	// clients usually do not do more than one query either.
   281  	answers, hostFound := lookupTable.lookupHost(req.Question[0].Qtype, hostname)
   282  
   283  	if hostFound {
   284  		response = new(dns.Msg)
   285  		response.SetReply(req)
   286  		// We are the authority here, since we control DNS for known hostnames
   287  		response.Authoritative = true
   288  		// Even if answers is empty, we still return NOERROR. This matches expected behavior of DNS
   289  		// servers. NXDOMAIN means we do not know *anything* about the domain; if we set it here then
   290  		// a client (ie curl, see https://github.com/istio/istio/issues/31250) sending parallel
   291  		// requests for A and AAAA may get NXDOMAIN for AAAA and treat the entire thing as a NXDOMAIN
   292  		response.Answer = answers
   293  		// Randomize the responses; this ensures for things like headless services we can do DNS-LB
   294  		// This matches standard kube-dns behavior. We only do this for cached responses as the
   295  		// upstream DNS server would already round robin if desired.
   296  		if len(answers) > 0 {
   297  			roundRobinResponse(response)
   298  		}
   299  		log.Debugf("response for hostname %q (found=true): %v", hostname, response)
   300  	} else {
   301  		response = h.upstream(proxy, req, hostname)
   302  	}
   303  	// Compress the response - we don't know if the incoming response was compressed or not. If it was,
   304  	// but we don't compress on the outbound, we will run into issues. For example, if the compressed
   305  	// size is 450 bytes but uncompressed 1000 bytes now we are outside of the non-eDNS UDP size limits
   306  	response.Truncate(size(proxy.protocol, req))
   307  	_ = w.WriteMsg(response)
   308  }
   309  
   310  // IsReady returns true if DNS lookup table is updated at least once.
   311  func (h *LocalDNSServer) IsReady() bool {
   312  	return h.lookupTable.Load() != nil
   313  }
   314  
   315  func (h *LocalDNSServer) NameTable() *dnsProto.NameTable {
   316  	lt := h.nameTable.Load()
   317  	if lt == nil {
   318  		return nil
   319  	}
   320  	return lt.(*dnsProto.NameTable)
   321  }
   322  
   323  // Inspired by https://github.com/coredns/coredns/blob/master/plugin/loadbalance/loadbalance.go
   324  func roundRobinResponse(res *dns.Msg) {
   325  	if res.Rcode != dns.RcodeSuccess {
   326  		return
   327  	}
   328  
   329  	if res.Question[0].Qtype == dns.TypeAXFR || res.Question[0].Qtype == dns.TypeIXFR {
   330  		return
   331  	}
   332  
   333  	res.Answer = roundRobin(res.Answer)
   334  	res.Ns = roundRobin(res.Ns)
   335  	res.Extra = roundRobin(res.Extra)
   336  }
   337  
   338  func roundRobin(in []dns.RR) []dns.RR {
   339  	cname := make([]dns.RR, 0)
   340  	address := make([]dns.RR, 0)
   341  	mx := make([]dns.RR, 0)
   342  	rest := make([]dns.RR, 0)
   343  	for _, r := range in {
   344  		switch r.Header().Rrtype {
   345  		case dns.TypeCNAME:
   346  			cname = append(cname, r)
   347  		case dns.TypeA, dns.TypeAAAA:
   348  			address = append(address, r)
   349  		case dns.TypeMX:
   350  			mx = append(mx, r)
   351  		default:
   352  			rest = append(rest, r)
   353  		}
   354  	}
   355  
   356  	roundRobinShuffle(address)
   357  	roundRobinShuffle(mx)
   358  
   359  	out := append(cname, rest...)
   360  	out = append(out, address...)
   361  	out = append(out, mx...)
   362  	return out
   363  }
   364  
   365  func roundRobinShuffle(records []dns.RR) {
   366  	switch l := len(records); l {
   367  	case 0, 1:
   368  		break
   369  	case 2:
   370  		if dns.Id()%2 == 0 {
   371  			records[0], records[1] = records[1], records[0]
   372  		}
   373  	default:
   374  		for j := 0; j < l*(int(dns.Id())%4+1); j++ {
   375  			q := int(dns.Id()) % l
   376  			p := int(dns.Id()) % l
   377  			if q == p {
   378  				p = (p + 1) % l
   379  			}
   380  			records[q], records[p] = records[p], records[q]
   381  		}
   382  	}
   383  }
   384  
   385  func (h *LocalDNSServer) Close() {
   386  	for _, p := range h.dnsProxies {
   387  		p.close()
   388  	}
   389  }
   390  
   391  func (h *LocalDNSServer) queryUpstream(upstreamClient *dns.Client, req *dns.Msg, scope *istiolog.Scope) *dns.Msg {
   392  	if h.forwardToUpstreamParallel {
   393  		return h.queryUpstreamParallel(upstreamClient, req, scope)
   394  	}
   395  
   396  	var response *dns.Msg
   397  
   398  	for _, upstream := range h.resolvConfServers {
   399  		cResponse, _, err := upstreamClient.Exchange(req, upstream)
   400  		if err == nil {
   401  			response = cResponse
   402  			break
   403  		}
   404  		scope.Infof("upstream failure: %v", err)
   405  	}
   406  
   407  	if response == nil {
   408  		response = serverFailure(req)
   409  	}
   410  	return response
   411  }
   412  
   413  // queryUpstreamParallel will send parallel queries to all nameservers and return first successful response immediately.
   414  // The overall approach of parallel resolution is likely not widespread, but there are already some widely used
   415  // clients support it:
   416  //
   417  //   - dnsmasq: setting flag '--all-servers' forces dnsmasq to send all queries to all available servers. The reply from
   418  //     the server which answers first will be returned to the original requester.
   419  //   - tailscale: will either proxy all DNS requests—in which case we query all nameservers in parallel and use the quickest
   420  //     response—or defer to the operating system, which we have no control over.
   421  //   - systemd-resolved: which is used as a default resolver in many Linux distributions nowadays also performs parallel
   422  //     lookups for multiple DNS servers and returns the first successful response.
   423  func (h *LocalDNSServer) queryUpstreamParallel(upstreamClient *dns.Client, req *dns.Msg, scope *istiolog.Scope) *dns.Msg {
   424  	// Guarantee that the ctx we use below is done when this function returns.
   425  	ctx, cancel := context.WithCancel(context.Background())
   426  	defer cancel()
   427  
   428  	responseCh := make(chan *dns.Msg)
   429  	errCh := make(chan error)
   430  
   431  	queryOne := func(upstream string) {
   432  		// Note: After DialContext in ExchangeContext is called, this function cannot be cancelled by context.
   433  		cResponse, _, err := upstreamClient.ExchangeContext(ctx, req, upstream)
   434  		if err == nil {
   435  			// Only reserve first response and ignore others.
   436  			select {
   437  			case responseCh <- cResponse:
   438  			case <-ctx.Done():
   439  			}
   440  			return
   441  		}
   442  		scope.Infof("parallel querying upstream failure: %v", err)
   443  		select {
   444  		case errCh <- err:
   445  		case <-ctx.Done():
   446  		}
   447  	}
   448  
   449  	for _, upstream := range h.resolvConfServers {
   450  		go queryOne(upstream)
   451  	}
   452  
   453  	errorsCount := 0
   454  	for {
   455  		select {
   456  		case response := <-responseCh:
   457  			// We got the first response.
   458  			return response
   459  		case <-errCh:
   460  			errorsCount++
   461  			// All servers returned error - return failure.
   462  			if errorsCount == len(h.resolvConfServers) {
   463  				scope.Infof("all upstream failed")
   464  				return serverFailure(req)
   465  			}
   466  		}
   467  	}
   468  }
   469  
   470  func serverFailure(req *dns.Msg) *dns.Msg {
   471  	failures.Increment()
   472  	response := new(dns.Msg)
   473  	response.SetReply(req)
   474  	response.Rcode = dns.RcodeServerFailure
   475  	return response
   476  }
   477  
   478  func generateAltHosts(hostname string, nameinfo *dnsProto.NameTable_NameInfo, proxyNamespace, proxyDomain string,
   479  	proxyDomainParts []string,
   480  ) sets.String {
   481  	out := sets.New[string]()
   482  	if strings.HasSuffix(hostname, ".") {
   483  		return out
   484  	}
   485  	out.Insert(hostname + ".")
   486  	// do not generate alt hostnames if the service is in a different domain (i.e. cluster) than the proxy
   487  	// as we have no way to resolve conflicts on name.namespace entries across clusters of different domains
   488  	if proxyDomain == "" || !strings.HasSuffix(hostname, proxyDomain) {
   489  		return out
   490  	}
   491  	out.Insert(nameinfo.Shortname + "." + nameinfo.Namespace + ".")
   492  	if proxyNamespace == nameinfo.Namespace {
   493  		out.Insert(nameinfo.Shortname + ".")
   494  	}
   495  	// Do we need to generate entries for name.namespace.svc, name.namespace.svc.cluster, etc. ?
   496  	// If these are not that frequently used, then not doing so here will save some space and time
   497  	// as some people have very long proxy domains with multiple dots
   498  	// For now, we will generate just one more domain (which is usually the .svc piece).
   499  	out.Insert(nameinfo.Shortname + "." + nameinfo.Namespace + "." + proxyDomainParts[0] + ".")
   500  
   501  	// Add any additional alt hostnames.
   502  	// nolint: staticcheck
   503  	for _, altHost := range nameinfo.AltHosts {
   504  		out.Insert(altHost + ".")
   505  	}
   506  	return out
   507  }
   508  
   509  // Given a host, this function first decides if the host is part of our service registry.
   510  // If it is not part of the registry, return nil so that caller queries upstream. If it is part
   511  // of registry, we will look it up in one of our tables, failing which we will return NXDOMAIN.
   512  func (table *LookupTable) lookupHost(qtype uint16, hostname string) ([]dns.RR, bool) {
   513  	question := string(host.Name(hostname))
   514  	wildcard := false
   515  	// First check if host exists in all hosts.
   516  	hostFound := table.allHosts.Contains(hostname)
   517  	// If it is not found, check if a wildcard host exists for it.
   518  	// For example for "*.example.com", with the question "svc.svcns.example.com",
   519  	// we check if we have entries for "*.svcns.example.com", "*.example.com" etc.
   520  	if !hostFound {
   521  		labels := dns.SplitDomainName(hostname)
   522  		for idx := range labels {
   523  			qhost := "*." + strings.Join(labels[idx+1:], ".") + "."
   524  			if hostFound = table.allHosts.Contains(qhost); hostFound {
   525  				wildcard = true
   526  				hostname = qhost
   527  				break
   528  			}
   529  		}
   530  	}
   531  
   532  	if !hostFound {
   533  		return nil, false
   534  	}
   535  
   536  	var out []dns.RR
   537  	var ipAnswers []dns.RR
   538  	var wcAnswers []dns.RR
   539  	var cnAnswers []dns.RR
   540  
   541  	// Odds are, the first query will always be an expanded hostname
   542  	// (productpage.ns1.svc.cluster.local.ns1.svc.cluster.local)
   543  	// So lookup the cname table first
   544  	for _, cn := range table.cname[hostname] {
   545  		// this was a cname match
   546  		copied := dns.Copy(cn).(*dns.CNAME)
   547  		copied.Header().Name = question
   548  		cnAnswers = append(cnAnswers, copied)
   549  		hostname = copied.Target
   550  	}
   551  
   552  	switch qtype {
   553  	case dns.TypeA:
   554  		ipAnswers = table.name4[hostname]
   555  	case dns.TypeAAAA:
   556  		ipAnswers = table.name6[hostname]
   557  	default:
   558  		// TODO: handle PTR records for reverse dns lookups
   559  		return nil, false
   560  	}
   561  
   562  	if len(ipAnswers) > 0 {
   563  		// For wildcard hosts, set the host that is being queried for.
   564  		if wildcard {
   565  			for _, answer := range ipAnswers {
   566  				copied := dns.Copy(answer)
   567  				/// If there is a CNAME record for the wildcard host, we will sent a chained response of CNAME + A/AAAA pointer
   568  				/// Otherwise we expand the wildcard to the original question domain
   569  				if len(cnAnswers) > 0 {
   570  					copied.Header().Name = hostname
   571  				} else {
   572  					copied.Header().Name = question
   573  				}
   574  				wcAnswers = append(wcAnswers, copied)
   575  			}
   576  		}
   577  
   578  		// We will return a chained response. In a chained response, the first entry is the cname record,
   579  		// and the second one is the A/AAAA record itself. Some clients do not follow cname redirects
   580  		// with additional DNS queries. Instead, they expect all the resolved records to be in the same
   581  		// big DNS response (presumably assuming that a recursive DNS query should do the deed, resolve
   582  		// cname et al and return the composite response).
   583  		out = append(out, cnAnswers...)
   584  		if wildcard {
   585  			out = append(out, wcAnswers...)
   586  		} else {
   587  			out = append(out, ipAnswers...)
   588  		}
   589  	}
   590  	return out, hostFound
   591  }
   592  
   593  // This function stores the list of hostnames along with the precomputed DNS response for that hostname.
   594  // Most hostnames have a DNS response containing the A/AAAA records. In addition, this function stores a
   595  // variant of the host+ the first search domain in resolv.conf as the first query
   596  // is likely to be host.ns.svc.cluster.local (e.g., www.google.com.ns1.svc.cluster.local) due to
   597  // the list of search namespaces in resolv.conf (unless the app explicitly does www.google.com. which is unlikely).
   598  // We will resolve www.google.com.ns1.svc.cluster.local with a CNAME record pointing to www.google.com.
   599  // which will cause the client's resolver to automatically resolve www.google.com. , and short circuit the lengthy
   600  // search process down to just two DNS queries. This will eliminate unnecessary upstream DNS queries from the
   601  // agent, reduce load on DNS servers and improve overall latency. This idea was borrowed and adapted from
   602  // the autopath plugin in coredns. The implementation here is very different from auto path though.
   603  // Autopath does inline computation to see if the given query could potentially match something else
   604  // and then returns a CNAME record. In our case, we preemptively store these random dns names as a host
   605  // in the lookup table with a CNAME record as the DNS response. This technique eliminates the need
   606  // to do string parsing, memory allocations, etc. at query time at the cost of Nx number of entries (i.e. memory) to store
   607  // the lookup table, where N is number of search namespaces.
   608  func (table *LookupTable) buildDNSAnswers(altHosts map[string]struct{}, ipv4 []netip.Addr, ipv6 []netip.Addr, searchNamespaces []string) {
   609  	for h := range altHosts {
   610  		h = strings.ToLower(h)
   611  		table.allHosts.Insert(h)
   612  		if len(ipv4) > 0 {
   613  			table.name4[h] = a(h, ipv4)
   614  		}
   615  		if len(ipv6) > 0 {
   616  			table.name6[h] = aaaa(h, ipv6)
   617  		}
   618  		if len(searchNamespaces) > 0 {
   619  			// NOTE: Right now, rather than storing one expanded host for each one of the search namespace
   620  			// entries, we are going to store just the first one (assuming that most clients will
   621  			// do sequential dns resolution, starting with the first search namespace)
   622  
   623  			// host h already ends with a .
   624  			// search namespace might not. So we append one in the end if needed
   625  			expandedHost := strings.ToLower(h + searchNamespaces[0])
   626  			if !strings.HasSuffix(searchNamespaces[0], ".") {
   627  				expandedHost += "."
   628  			}
   629  			// make sure this is not a proper hostname
   630  			// if host is productpage, and search namespace is ns1.svc.cluster.local
   631  			// then the expanded host productpage.ns1.svc.cluster.local is a valid hostname
   632  			// that is likely to be already present in the altHosts
   633  			if _, exists := altHosts[expandedHost]; !exists {
   634  				table.cname[expandedHost] = cname(expandedHost, h)
   635  				table.allHosts.Insert(expandedHost)
   636  			}
   637  		}
   638  	}
   639  }
   640  
   641  // Borrowed from https://github.com/coredns/coredns/blob/master/plugin/hosts/hosts.go
   642  // a takes a slice of ip string and returns a slice of A RRs.
   643  func a(host string, ips []netip.Addr) []dns.RR {
   644  	answers := make([]dns.RR, len(ips))
   645  	for i, ip := range ips {
   646  		r := new(dns.A)
   647  		r.Hdr = dns.RR_Header{Name: host, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: defaultTTLInSeconds}
   648  		r.A = ip.AsSlice()
   649  		answers[i] = r
   650  	}
   651  	return answers
   652  }
   653  
   654  // aaaa takes a slice of ip string and returns a slice of AAAA RRs.
   655  func aaaa(host string, ips []netip.Addr) []dns.RR {
   656  	answers := make([]dns.RR, len(ips))
   657  	for i, ip := range ips {
   658  		r := new(dns.AAAA)
   659  		r.Hdr = dns.RR_Header{Name: host, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: defaultTTLInSeconds}
   660  		r.AAAA = ip.AsSlice()
   661  		answers[i] = r
   662  	}
   663  	return answers
   664  }
   665  
   666  func cname(host string, targetHost string) []dns.RR {
   667  	answer := new(dns.CNAME)
   668  	answer.Hdr = dns.RR_Header{
   669  		Name:   host,
   670  		Rrtype: dns.TypeCNAME,
   671  		Class:  dns.ClassINET,
   672  		Ttl:    defaultTTLInSeconds,
   673  	}
   674  	answer.Target = targetHost
   675  	return []dns.RR{answer}
   676  }
   677  
   678  // Size returns if buffer size *advertised* in the requests OPT record.
   679  // Or when the request was over TCP, we return the maximum allowed size of 64K.
   680  func size(proto string, r *dns.Msg) int {
   681  	size := uint16(0)
   682  	if o := r.IsEdns0(); o != nil {
   683  		size = o.UDPSize()
   684  	}
   685  
   686  	// normalize size
   687  	size = ednsSize(proto, size)
   688  	return int(size)
   689  }
   690  
   691  // ednsSize returns a normalized size based on proto.
   692  func ednsSize(proto string, size uint16) uint16 {
   693  	if proto == "tcp" {
   694  		return dns.MaxMsgSize
   695  	}
   696  	if size < dns.MinMsgSize {
   697  		return dns.MinMsgSize
   698  	}
   699  	return size
   700  }