istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/model/network.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"cmp"
    19  	"fmt"
    20  	"net"
    21  	"sort"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/hashicorp/go-multierror"
    26  	"github.com/miekg/dns"
    27  
    28  	"istio.io/istio/pilot/pkg/features"
    29  	"istio.io/istio/pkg/cluster"
    30  	"istio.io/istio/pkg/network"
    31  	"istio.io/istio/pkg/slices"
    32  	"istio.io/istio/pkg/util/istiomultierror"
    33  	netutil "istio.io/istio/pkg/util/net"
    34  	"istio.io/istio/pkg/util/sets"
    35  )
    36  
    37  // NetworkGateway is the gateway of a network
    38  type NetworkGateway struct {
    39  	// Network is the ID of the network where this Gateway resides.
    40  	Network network.ID
    41  	// Cluster is the ID of the k8s cluster where this Gateway resides.
    42  	Cluster cluster.ID
    43  	// gateway ip address
    44  	Addr string
    45  	// gateway port
    46  	Port uint32
    47  }
    48  
    49  type NetworkGatewaysWatcher interface {
    50  	NetworkGateways() []NetworkGateway
    51  	AppendNetworkGatewayHandler(h func())
    52  }
    53  
    54  // NetworkGatewaysHandler can be embedded to easily implement NetworkGatewaysWatcher.
    55  type NetworkGatewaysHandler struct {
    56  	handlers []func()
    57  }
    58  
    59  func (ngh *NetworkGatewaysHandler) AppendNetworkGatewayHandler(h func()) {
    60  	ngh.handlers = append(ngh.handlers, h)
    61  }
    62  
    63  func (ngh *NetworkGatewaysHandler) NotifyGatewayHandlers() {
    64  	for _, handler := range ngh.handlers {
    65  		handler()
    66  	}
    67  }
    68  
    69  type NetworkGateways struct {
    70  	mu *sync.RWMutex
    71  	// least common multiple of gateway number of {per network, per cluster}
    72  	lcm                 uint32
    73  	byNetwork           map[network.ID][]NetworkGateway
    74  	byNetworkAndCluster map[networkAndCluster][]NetworkGateway
    75  }
    76  
    77  // NetworkManager provides gateway details for accessing remote networks.
    78  type NetworkManager struct {
    79  	env *Environment
    80  	// exported for test
    81  	NameCache  *networkGatewayNameCache
    82  	xdsUpdater XDSUpdater
    83  
    84  	// just to ensure NetworkGateways and Unresolved are updated together
    85  	mu sync.RWMutex
    86  	// embedded NetworkGateways only includes gateways with IPs
    87  	// hostnames are resolved in control plane (or filtered out if feature is disabled)
    88  	*NetworkGateways
    89  	// includes all gateways with no DNS resolution or filtering, regardless of feature flags
    90  	Unresolved *NetworkGateways
    91  }
    92  
    93  // NewNetworkManager creates a new NetworkManager from the Environment by merging
    94  // together the MeshNetworks and ServiceRegistry-specific gateways.
    95  func NewNetworkManager(env *Environment, xdsUpdater XDSUpdater) (*NetworkManager, error) {
    96  	nameCache, err := newNetworkGatewayNameCache()
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	mgr := &NetworkManager{
   101  		env:             env,
   102  		NameCache:       nameCache,
   103  		xdsUpdater:      xdsUpdater,
   104  		NetworkGateways: &NetworkGateways{},
   105  		Unresolved:      &NetworkGateways{},
   106  	}
   107  
   108  	// share lock with root NetworkManager
   109  	mgr.NetworkGateways.mu = &mgr.mu
   110  	mgr.Unresolved.mu = &mgr.mu
   111  
   112  	env.AddNetworksHandler(mgr.reloadGateways)
   113  	// register to per registry, will be called when gateway service changed
   114  	env.AppendNetworkGatewayHandler(mgr.reloadGateways)
   115  	nameCache.AppendNetworkGatewayHandler(mgr.reloadGateways)
   116  	mgr.reload()
   117  	return mgr, nil
   118  }
   119  
   120  // reloadGateways reloads NetworkGateways and triggers a push if they change.
   121  func (mgr *NetworkManager) reloadGateways() {
   122  	changed := mgr.reload()
   123  
   124  	if changed && mgr.xdsUpdater != nil {
   125  		log.Infof("gateways changed, triggering push")
   126  		mgr.xdsUpdater.ConfigUpdate(&PushRequest{Full: true, Reason: NewReasonStats(NetworksTrigger)})
   127  	}
   128  }
   129  
   130  func (mgr *NetworkManager) reload() bool {
   131  	mgr.mu.Lock()
   132  	defer mgr.mu.Unlock()
   133  	log.Infof("reloading network gateways")
   134  
   135  	// Generate a snapshot of the state of gateways by merging the contents of
   136  	// MeshNetworks and the ServiceRegistries.
   137  
   138  	// Store all gateways in a set initially to eliminate duplicates.
   139  	gatewaySet := make(NetworkGatewaySet)
   140  
   141  	// First, load gateways from the static MeshNetworks config.
   142  	meshNetworks := mgr.env.NetworksWatcher.Networks()
   143  	if meshNetworks != nil {
   144  		for nw, networkConf := range meshNetworks.Networks {
   145  			for _, gw := range networkConf.Gateways {
   146  				if gw.GetAddress() == "" {
   147  					// registryServiceName addresses will be populated via kube service registry
   148  					continue
   149  				}
   150  				gatewaySet.Insert(NetworkGateway{
   151  					Cluster: "", /* TODO(nmittler): Add Cluster to the API */
   152  					Network: network.ID(nw),
   153  					Addr:    gw.GetAddress(),
   154  					Port:    gw.Port,
   155  				})
   156  			}
   157  		}
   158  	}
   159  
   160  	// Second, load registry-specific gateways.
   161  	// - the internal map of label gateways - these get deleted if the service is deleted, updated if the ip changes etc.
   162  	// - the computed map from meshNetworks (triggered by reloadNetworkLookup, the ported logic from getGatewayAddresses)
   163  	gatewaySet.InsertAll(mgr.env.NetworkGateways()...)
   164  	resolvedGatewaySet := mgr.resolveHostnameGateways(gatewaySet)
   165  
   166  	return mgr.NetworkGateways.update(resolvedGatewaySet) || mgr.Unresolved.update(gatewaySet)
   167  }
   168  
   169  // update calls should with the lock held
   170  func (gws *NetworkGateways) update(gatewaySet NetworkGatewaySet) bool {
   171  	if gatewaySet.Equals(sets.New(gws.allGateways()...)) {
   172  		return false
   173  	}
   174  
   175  	// index by network or network+cluster for quick lookup
   176  	byNetwork := make(map[network.ID][]NetworkGateway)
   177  	byNetworkAndCluster := make(map[networkAndCluster][]NetworkGateway)
   178  	for gw := range gatewaySet {
   179  		byNetwork[gw.Network] = append(byNetwork[gw.Network], gw)
   180  		nc := networkAndClusterForGateway(&gw)
   181  		byNetworkAndCluster[nc] = append(byNetworkAndCluster[nc], gw)
   182  	}
   183  
   184  	var gwNum []int
   185  	// Sort the gateways in byNetwork, and also calculate the max number
   186  	// of gateways per network.
   187  	for k, gws := range byNetwork {
   188  		byNetwork[k] = SortGateways(gws)
   189  		gwNum = append(gwNum, len(gws))
   190  	}
   191  
   192  	// Sort the gateways in byNetworkAndCluster.
   193  	for k, gws := range byNetworkAndCluster {
   194  		byNetworkAndCluster[k] = SortGateways(gws)
   195  		gwNum = append(gwNum, len(gws))
   196  	}
   197  
   198  	lcmVal := 1
   199  	// calculate lcm
   200  	for _, num := range gwNum {
   201  		lcmVal = lcm(lcmVal, num)
   202  	}
   203  
   204  	gws.lcm = uint32(lcmVal)
   205  	gws.byNetwork = byNetwork
   206  	gws.byNetworkAndCluster = byNetworkAndCluster
   207  
   208  	return true
   209  }
   210  
   211  // resolveHostnameGateway either resolves or removes gateways that use a non-IP Address
   212  func (mgr *NetworkManager) resolveHostnameGateways(gatewaySet NetworkGatewaySet) NetworkGatewaySet {
   213  	resolvedGatewaySet := make(NetworkGatewaySet, len(gatewaySet))
   214  	// filter the list of gateways to resolve
   215  	hostnameGateways := map[string][]NetworkGateway{}
   216  	names := sets.New[string]()
   217  	for gw := range gatewaySet {
   218  		if netutil.IsValidIPAddress(gw.Addr) {
   219  			resolvedGatewaySet.Insert(gw)
   220  			continue
   221  		}
   222  		if !features.ResolveHostnameGateways {
   223  			log.Warnf("Failed parsing gateway address %s from Service Registry. "+
   224  				"Set RESOLVE_HOSTNAME_GATEWAYS on istiod to enable resolving hostnames in the control plane.",
   225  				gw.Addr)
   226  			continue
   227  		}
   228  		hostnameGateways[gw.Addr] = append(hostnameGateways[gw.Addr], gw)
   229  		names.Insert(gw.Addr)
   230  	}
   231  
   232  	if !features.ResolveHostnameGateways {
   233  		return resolvedGatewaySet
   234  	}
   235  	// resolve each hostname
   236  	for host, addrs := range mgr.NameCache.Resolve(names) {
   237  		gwsForHost := hostnameGateways[host]
   238  		if len(addrs) == 0 {
   239  			log.Warnf("could not resolve hostname %q for %d gateways", host, len(gwsForHost))
   240  		}
   241  		// expand each resolved address into a NetworkGateway
   242  		for _, gw := range gwsForHost {
   243  			for _, resolved := range addrs {
   244  				// copy the base gateway to preserve the port/network, but update with the resolved IP
   245  				resolvedGw := gw
   246  				resolvedGw.Addr = resolved
   247  				resolvedGatewaySet.Insert(resolvedGw)
   248  			}
   249  		}
   250  	}
   251  	return resolvedGatewaySet
   252  }
   253  
   254  func (gws *NetworkGateways) IsMultiNetworkEnabled() bool {
   255  	if gws == nil {
   256  		return false
   257  	}
   258  	gws.mu.RLock()
   259  	defer gws.mu.RUnlock()
   260  	return len(gws.byNetwork) > 0
   261  }
   262  
   263  // GetLBWeightScaleFactor returns the least common multiple of the number of gateways per network.
   264  func (gws *NetworkGateways) GetLBWeightScaleFactor() uint32 {
   265  	gws.mu.RLock()
   266  	defer gws.mu.RUnlock()
   267  	return gws.lcm
   268  }
   269  
   270  func (gws *NetworkGateways) AllGateways() []NetworkGateway {
   271  	gws.mu.RLock()
   272  	defer gws.mu.RUnlock()
   273  	return gws.allGateways()
   274  }
   275  
   276  func (gws *NetworkGateways) allGateways() []NetworkGateway {
   277  	if gws.byNetwork == nil {
   278  		return nil
   279  	}
   280  	out := make([]NetworkGateway, 0)
   281  	for _, gateways := range gws.byNetwork {
   282  		out = append(out, gateways...)
   283  	}
   284  	return SortGateways(out)
   285  }
   286  
   287  func (gws *NetworkGateways) GatewaysForNetwork(nw network.ID) []NetworkGateway {
   288  	gws.mu.RLock()
   289  	defer gws.mu.RUnlock()
   290  	if gws.byNetwork == nil {
   291  		return nil
   292  	}
   293  	return gws.byNetwork[nw]
   294  }
   295  
   296  func (gws *NetworkGateways) GatewaysForNetworkAndCluster(nw network.ID, c cluster.ID) []NetworkGateway {
   297  	gws.mu.RLock()
   298  	defer gws.mu.RUnlock()
   299  	if gws.byNetworkAndCluster == nil {
   300  		return nil
   301  	}
   302  	return gws.byNetworkAndCluster[networkAndClusterFor(nw, c)]
   303  }
   304  
   305  type networkAndCluster struct {
   306  	network network.ID
   307  	cluster cluster.ID
   308  }
   309  
   310  func networkAndClusterForGateway(g *NetworkGateway) networkAndCluster {
   311  	return networkAndClusterFor(g.Network, g.Cluster)
   312  }
   313  
   314  func networkAndClusterFor(nw network.ID, c cluster.ID) networkAndCluster {
   315  	return networkAndCluster{
   316  		network: nw,
   317  		cluster: c,
   318  	}
   319  }
   320  
   321  // SortGateways sorts the array so that it's stable.
   322  func SortGateways(gws []NetworkGateway) []NetworkGateway {
   323  	return slices.SortFunc(gws, func(a, b NetworkGateway) int {
   324  		if r := cmp.Compare(a.Addr, b.Addr); r != 0 {
   325  			return r
   326  		}
   327  		return cmp.Compare(a.Port, b.Port)
   328  	})
   329  }
   330  
   331  // greatest common divisor of x and y
   332  func gcd(x, y int) int {
   333  	var tmp int
   334  	for {
   335  		tmp = x % y
   336  		if tmp > 0 {
   337  			x = y
   338  			y = tmp
   339  		} else {
   340  			return y
   341  		}
   342  	}
   343  }
   344  
   345  // least common multiple of x and y
   346  func lcm(x, y int) int {
   347  	return x * y / gcd(x, y)
   348  }
   349  
   350  // NetworkGatewaySet is a helper to manage a set of NetworkGateway instances.
   351  type NetworkGatewaySet = sets.Set[NetworkGateway]
   352  
   353  var (
   354  	// MinGatewayTTL is exported for testing
   355  	MinGatewayTTL = 30 * time.Second
   356  
   357  	// https://github.com/coredns/coredns/blob/v1.10.1/plugin/pkg/dnsutil/ttl.go#L51
   358  	MaxGatewayTTL = 1 * time.Hour
   359  )
   360  
   361  type networkGatewayNameCache struct {
   362  	NetworkGatewaysHandler
   363  	client *dnsClient
   364  
   365  	sync.Mutex
   366  	cache map[string]nameCacheEntry
   367  }
   368  
   369  type nameCacheEntry struct {
   370  	value  []string
   371  	expiry time.Time
   372  	timer  *time.Timer
   373  }
   374  
   375  func newNetworkGatewayNameCache() (*networkGatewayNameCache, error) {
   376  	c, err := newClient()
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  	return newNetworkGatewayNameCacheWithClient(c), nil
   381  }
   382  
   383  // newNetworkGatewayNameCacheWithClient exported for test
   384  func newNetworkGatewayNameCacheWithClient(c *dnsClient) *networkGatewayNameCache {
   385  	return &networkGatewayNameCache{client: c, cache: map[string]nameCacheEntry{}}
   386  }
   387  
   388  // Resolve takes a list of hostnames and returns a map of names to addresses
   389  func (n *networkGatewayNameCache) Resolve(names sets.String) map[string][]string {
   390  	n.Lock()
   391  	defer n.Unlock()
   392  
   393  	n.cleanupWatches(names)
   394  
   395  	out := make(map[string][]string, len(names))
   396  	for name := range names {
   397  		out[name] = n.resolveFromCache(name)
   398  	}
   399  
   400  	return out
   401  }
   402  
   403  // cleanupWatches cancels any scheduled re-resolve for names we no longer care about
   404  func (n *networkGatewayNameCache) cleanupWatches(names sets.String) {
   405  	for name, entry := range n.cache {
   406  		if names.Contains(name) {
   407  			continue
   408  		}
   409  		entry.timer.Stop()
   410  		delete(n.cache, name)
   411  	}
   412  }
   413  
   414  func (n *networkGatewayNameCache) resolveFromCache(name string) []string {
   415  	if entry, ok := n.cache[name]; ok && entry.expiry.After(time.Now()) {
   416  		return entry.value
   417  	}
   418  	// ideally this will not happen more than once for each name and the cache auto-updates in the background
   419  	// even if it does, this happens on the SotW ingestion path (kube or meshnetworks changes) and not xds push path.
   420  	return n.resolveAndCache(name)
   421  }
   422  
   423  func (n *networkGatewayNameCache) resolveAndCache(name string) []string {
   424  	entry, ok := n.cache[name]
   425  	if ok {
   426  		entry.timer.Stop()
   427  	}
   428  	delete(n.cache, name)
   429  	addrs, ttl, err := n.resolve(name)
   430  	// avoid excessive pushes due to small TTL
   431  	if ttl < MinGatewayTTL {
   432  		ttl = MinGatewayTTL
   433  	}
   434  	expiry := time.Now().Add(ttl)
   435  	if err != nil {
   436  		// gracefully retain old addresses in case the DNS server is unavailable
   437  		addrs = entry.value
   438  	}
   439  	n.cache[name] = nameCacheEntry{
   440  		value:  addrs,
   441  		expiry: expiry,
   442  		// TTL expires, try to refresh TODO should this be < ttl?
   443  		timer: time.AfterFunc(ttl, n.refreshAndNotify(name)),
   444  	}
   445  
   446  	return addrs
   447  }
   448  
   449  // refreshAndNotify is triggered via time.AfterFunc and will recursively schedule itself that way until timer is cleaned
   450  // up via cleanupWatches.
   451  func (n *networkGatewayNameCache) refreshAndNotify(name string) func() {
   452  	return func() {
   453  		log.Debugf("network gateways: refreshing DNS for %s", name)
   454  		n.Lock()
   455  		old := n.cache[name]
   456  		addrs := n.resolveAndCache(name)
   457  		n.Unlock()
   458  
   459  		if !slices.Equal(old.value, addrs) {
   460  			log.Debugf("network gateways: DNS for %s changed: %v -> %v", name, old.value, addrs)
   461  			n.NotifyGatewayHandlers()
   462  		}
   463  	}
   464  }
   465  
   466  // resolve gets all the A and AAAA records for the given name
   467  func (n *networkGatewayNameCache) resolve(name string) ([]string, time.Duration, error) {
   468  	ttl := MaxGatewayTTL
   469  	var out []string
   470  	errs := istiomultierror.New()
   471  
   472  	var mu sync.Mutex
   473  	var wg sync.WaitGroup
   474  	doResolve := func(dnsType uint16) {
   475  		defer wg.Done()
   476  
   477  		res := n.client.Query(new(dns.Msg).SetQuestion(dns.Fqdn(name), dnsType))
   478  
   479  		mu.Lock()
   480  		defer mu.Unlock()
   481  		if res.Rcode == dns.RcodeServerFailure {
   482  			errs = multierror.Append(errs, fmt.Errorf("upstream dns failure, qtype: %v", dnsType))
   483  			return
   484  		}
   485  		for _, rr := range res.Answer {
   486  			switch record := rr.(type) {
   487  			case *dns.A:
   488  				out = append(out, record.A.String())
   489  			case *dns.AAAA:
   490  				out = append(out, record.AAAA.String())
   491  			}
   492  		}
   493  		if nextTTL := minimalTTL(res); nextTTL < ttl {
   494  			ttl = nextTTL
   495  		}
   496  	}
   497  
   498  	wg.Add(2)
   499  	go doResolve(dns.TypeA)
   500  	go doResolve(dns.TypeAAAA)
   501  	wg.Wait()
   502  
   503  	sort.Strings(out)
   504  	if errs.Len() == 2 {
   505  		// return error only if all requests are failed
   506  		return out, MinGatewayTTL, errs
   507  	}
   508  	return out, ttl, nil
   509  }
   510  
   511  // https://github.com/coredns/coredns/blob/v1.10.1/plugin/pkg/dnsutil/ttl.go
   512  func minimalTTL(m *dns.Msg) time.Duration {
   513  	// No records or OPT is the only record, return a short ttl as a fail safe.
   514  	if len(m.Answer)+len(m.Ns) == 0 &&
   515  		(len(m.Extra) == 0 || (len(m.Extra) == 1 && m.Extra[0].Header().Rrtype == dns.TypeOPT)) {
   516  		return MinGatewayTTL
   517  	}
   518  
   519  	minTTL := MaxGatewayTTL
   520  	for _, r := range m.Answer {
   521  		if r.Header().Ttl < uint32(minTTL.Seconds()) {
   522  			minTTL = time.Duration(r.Header().Ttl) * time.Second
   523  		}
   524  	}
   525  	for _, r := range m.Ns {
   526  		if r.Header().Ttl < uint32(minTTL.Seconds()) {
   527  			minTTL = time.Duration(r.Header().Ttl) * time.Second
   528  		}
   529  	}
   530  
   531  	for _, r := range m.Extra {
   532  		if r.Header().Rrtype == dns.TypeOPT {
   533  			// OPT records use TTL field for extended rcode and flags
   534  			continue
   535  		}
   536  		if r.Header().Ttl < uint32(minTTL.Seconds()) {
   537  			minTTL = time.Duration(r.Header().Ttl) * time.Second
   538  		}
   539  	}
   540  	return minTTL
   541  }
   542  
   543  // TODO share code with pkg/dns
   544  type dnsClient struct {
   545  	*dns.Client
   546  	resolvConfServers []string
   547  }
   548  
   549  // NetworkGatewayTestDNSServers if set will ignore resolv.conf and use the given DNS servers for tests.
   550  var NetworkGatewayTestDNSServers []string
   551  
   552  func newClient() (*dnsClient, error) {
   553  	servers := NetworkGatewayTestDNSServers
   554  	if len(servers) == 0 {
   555  		dnsConfig, err := dns.ClientConfigFromFile("/etc/resolv.conf")
   556  		if err != nil {
   557  			return nil, err
   558  		}
   559  		if dnsConfig != nil {
   560  			for _, s := range dnsConfig.Servers {
   561  				servers = append(servers, net.JoinHostPort(s, dnsConfig.Port))
   562  			}
   563  		}
   564  		// TODO take search namespaces into account
   565  		// TODO what about /etc/hosts?
   566  	}
   567  
   568  	c := &dnsClient{
   569  		Client: &dns.Client{
   570  			DialTimeout:  5 * time.Second,
   571  			ReadTimeout:  5 * time.Second,
   572  			WriteTimeout: 5 * time.Second,
   573  		},
   574  	}
   575  	c.resolvConfServers = append(c.resolvConfServers, servers...)
   576  	return c, nil
   577  }
   578  
   579  // for more informative logging of dns errors
   580  func getReqNames(req *dns.Msg) []string {
   581  	names := make([]string, 0, 1)
   582  	for _, qq := range req.Question {
   583  		names = append(names, qq.Name)
   584  	}
   585  	return names
   586  }
   587  
   588  func (c *dnsClient) Query(req *dns.Msg) *dns.Msg {
   589  	var response *dns.Msg
   590  	for _, upstream := range c.resolvConfServers {
   591  		cResponse, _, err := c.Exchange(req, upstream)
   592  		rcode := dns.RcodeServerFailure
   593  		if err == nil && cResponse != nil {
   594  			rcode = cResponse.Rcode
   595  		}
   596  		if rcode == dns.RcodeServerFailure {
   597  			// RcodeServerFailure means the upstream cannot serve the request
   598  			// https://github.com/coredns/coredns/blob/v1.10.1/plugin/forward/forward.go#L193
   599  			log.Infof("upstream dns failure: %v: %v: %v", upstream, getReqNames(req), err)
   600  			continue
   601  		}
   602  		response = cResponse
   603  		if rcode == dns.RcodeSuccess {
   604  			break
   605  		}
   606  		codeString := dns.RcodeToString[rcode]
   607  		log.Debugf("upstream dns error: %v: %v: %v", upstream, getReqNames(req), codeString)
   608  	}
   609  	if response == nil {
   610  		response = new(dns.Msg)
   611  		response.SetReply(req)
   612  		response.Rcode = dns.RcodeServerFailure
   613  	}
   614  	return response
   615  }