istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/model/network.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "cmp" 19 "fmt" 20 "net" 21 "sort" 22 "sync" 23 "time" 24 25 "github.com/hashicorp/go-multierror" 26 "github.com/miekg/dns" 27 28 "istio.io/istio/pilot/pkg/features" 29 "istio.io/istio/pkg/cluster" 30 "istio.io/istio/pkg/network" 31 "istio.io/istio/pkg/slices" 32 "istio.io/istio/pkg/util/istiomultierror" 33 netutil "istio.io/istio/pkg/util/net" 34 "istio.io/istio/pkg/util/sets" 35 ) 36 37 // NetworkGateway is the gateway of a network 38 type NetworkGateway struct { 39 // Network is the ID of the network where this Gateway resides. 40 Network network.ID 41 // Cluster is the ID of the k8s cluster where this Gateway resides. 42 Cluster cluster.ID 43 // gateway ip address 44 Addr string 45 // gateway port 46 Port uint32 47 } 48 49 type NetworkGatewaysWatcher interface { 50 NetworkGateways() []NetworkGateway 51 AppendNetworkGatewayHandler(h func()) 52 } 53 54 // NetworkGatewaysHandler can be embedded to easily implement NetworkGatewaysWatcher. 55 type NetworkGatewaysHandler struct { 56 handlers []func() 57 } 58 59 func (ngh *NetworkGatewaysHandler) AppendNetworkGatewayHandler(h func()) { 60 ngh.handlers = append(ngh.handlers, h) 61 } 62 63 func (ngh *NetworkGatewaysHandler) NotifyGatewayHandlers() { 64 for _, handler := range ngh.handlers { 65 handler() 66 } 67 } 68 69 type NetworkGateways struct { 70 mu *sync.RWMutex 71 // least common multiple of gateway number of {per network, per cluster} 72 lcm uint32 73 byNetwork map[network.ID][]NetworkGateway 74 byNetworkAndCluster map[networkAndCluster][]NetworkGateway 75 } 76 77 // NetworkManager provides gateway details for accessing remote networks. 78 type NetworkManager struct { 79 env *Environment 80 // exported for test 81 NameCache *networkGatewayNameCache 82 xdsUpdater XDSUpdater 83 84 // just to ensure NetworkGateways and Unresolved are updated together 85 mu sync.RWMutex 86 // embedded NetworkGateways only includes gateways with IPs 87 // hostnames are resolved in control plane (or filtered out if feature is disabled) 88 *NetworkGateways 89 // includes all gateways with no DNS resolution or filtering, regardless of feature flags 90 Unresolved *NetworkGateways 91 } 92 93 // NewNetworkManager creates a new NetworkManager from the Environment by merging 94 // together the MeshNetworks and ServiceRegistry-specific gateways. 95 func NewNetworkManager(env *Environment, xdsUpdater XDSUpdater) (*NetworkManager, error) { 96 nameCache, err := newNetworkGatewayNameCache() 97 if err != nil { 98 return nil, err 99 } 100 mgr := &NetworkManager{ 101 env: env, 102 NameCache: nameCache, 103 xdsUpdater: xdsUpdater, 104 NetworkGateways: &NetworkGateways{}, 105 Unresolved: &NetworkGateways{}, 106 } 107 108 // share lock with root NetworkManager 109 mgr.NetworkGateways.mu = &mgr.mu 110 mgr.Unresolved.mu = &mgr.mu 111 112 env.AddNetworksHandler(mgr.reloadGateways) 113 // register to per registry, will be called when gateway service changed 114 env.AppendNetworkGatewayHandler(mgr.reloadGateways) 115 nameCache.AppendNetworkGatewayHandler(mgr.reloadGateways) 116 mgr.reload() 117 return mgr, nil 118 } 119 120 // reloadGateways reloads NetworkGateways and triggers a push if they change. 121 func (mgr *NetworkManager) reloadGateways() { 122 changed := mgr.reload() 123 124 if changed && mgr.xdsUpdater != nil { 125 log.Infof("gateways changed, triggering push") 126 mgr.xdsUpdater.ConfigUpdate(&PushRequest{Full: true, Reason: NewReasonStats(NetworksTrigger)}) 127 } 128 } 129 130 func (mgr *NetworkManager) reload() bool { 131 mgr.mu.Lock() 132 defer mgr.mu.Unlock() 133 log.Infof("reloading network gateways") 134 135 // Generate a snapshot of the state of gateways by merging the contents of 136 // MeshNetworks and the ServiceRegistries. 137 138 // Store all gateways in a set initially to eliminate duplicates. 139 gatewaySet := make(NetworkGatewaySet) 140 141 // First, load gateways from the static MeshNetworks config. 142 meshNetworks := mgr.env.NetworksWatcher.Networks() 143 if meshNetworks != nil { 144 for nw, networkConf := range meshNetworks.Networks { 145 for _, gw := range networkConf.Gateways { 146 if gw.GetAddress() == "" { 147 // registryServiceName addresses will be populated via kube service registry 148 continue 149 } 150 gatewaySet.Insert(NetworkGateway{ 151 Cluster: "", /* TODO(nmittler): Add Cluster to the API */ 152 Network: network.ID(nw), 153 Addr: gw.GetAddress(), 154 Port: gw.Port, 155 }) 156 } 157 } 158 } 159 160 // Second, load registry-specific gateways. 161 // - the internal map of label gateways - these get deleted if the service is deleted, updated if the ip changes etc. 162 // - the computed map from meshNetworks (triggered by reloadNetworkLookup, the ported logic from getGatewayAddresses) 163 gatewaySet.InsertAll(mgr.env.NetworkGateways()...) 164 resolvedGatewaySet := mgr.resolveHostnameGateways(gatewaySet) 165 166 return mgr.NetworkGateways.update(resolvedGatewaySet) || mgr.Unresolved.update(gatewaySet) 167 } 168 169 // update calls should with the lock held 170 func (gws *NetworkGateways) update(gatewaySet NetworkGatewaySet) bool { 171 if gatewaySet.Equals(sets.New(gws.allGateways()...)) { 172 return false 173 } 174 175 // index by network or network+cluster for quick lookup 176 byNetwork := make(map[network.ID][]NetworkGateway) 177 byNetworkAndCluster := make(map[networkAndCluster][]NetworkGateway) 178 for gw := range gatewaySet { 179 byNetwork[gw.Network] = append(byNetwork[gw.Network], gw) 180 nc := networkAndClusterForGateway(&gw) 181 byNetworkAndCluster[nc] = append(byNetworkAndCluster[nc], gw) 182 } 183 184 var gwNum []int 185 // Sort the gateways in byNetwork, and also calculate the max number 186 // of gateways per network. 187 for k, gws := range byNetwork { 188 byNetwork[k] = SortGateways(gws) 189 gwNum = append(gwNum, len(gws)) 190 } 191 192 // Sort the gateways in byNetworkAndCluster. 193 for k, gws := range byNetworkAndCluster { 194 byNetworkAndCluster[k] = SortGateways(gws) 195 gwNum = append(gwNum, len(gws)) 196 } 197 198 lcmVal := 1 199 // calculate lcm 200 for _, num := range gwNum { 201 lcmVal = lcm(lcmVal, num) 202 } 203 204 gws.lcm = uint32(lcmVal) 205 gws.byNetwork = byNetwork 206 gws.byNetworkAndCluster = byNetworkAndCluster 207 208 return true 209 } 210 211 // resolveHostnameGateway either resolves or removes gateways that use a non-IP Address 212 func (mgr *NetworkManager) resolveHostnameGateways(gatewaySet NetworkGatewaySet) NetworkGatewaySet { 213 resolvedGatewaySet := make(NetworkGatewaySet, len(gatewaySet)) 214 // filter the list of gateways to resolve 215 hostnameGateways := map[string][]NetworkGateway{} 216 names := sets.New[string]() 217 for gw := range gatewaySet { 218 if netutil.IsValidIPAddress(gw.Addr) { 219 resolvedGatewaySet.Insert(gw) 220 continue 221 } 222 if !features.ResolveHostnameGateways { 223 log.Warnf("Failed parsing gateway address %s from Service Registry. "+ 224 "Set RESOLVE_HOSTNAME_GATEWAYS on istiod to enable resolving hostnames in the control plane.", 225 gw.Addr) 226 continue 227 } 228 hostnameGateways[gw.Addr] = append(hostnameGateways[gw.Addr], gw) 229 names.Insert(gw.Addr) 230 } 231 232 if !features.ResolveHostnameGateways { 233 return resolvedGatewaySet 234 } 235 // resolve each hostname 236 for host, addrs := range mgr.NameCache.Resolve(names) { 237 gwsForHost := hostnameGateways[host] 238 if len(addrs) == 0 { 239 log.Warnf("could not resolve hostname %q for %d gateways", host, len(gwsForHost)) 240 } 241 // expand each resolved address into a NetworkGateway 242 for _, gw := range gwsForHost { 243 for _, resolved := range addrs { 244 // copy the base gateway to preserve the port/network, but update with the resolved IP 245 resolvedGw := gw 246 resolvedGw.Addr = resolved 247 resolvedGatewaySet.Insert(resolvedGw) 248 } 249 } 250 } 251 return resolvedGatewaySet 252 } 253 254 func (gws *NetworkGateways) IsMultiNetworkEnabled() bool { 255 if gws == nil { 256 return false 257 } 258 gws.mu.RLock() 259 defer gws.mu.RUnlock() 260 return len(gws.byNetwork) > 0 261 } 262 263 // GetLBWeightScaleFactor returns the least common multiple of the number of gateways per network. 264 func (gws *NetworkGateways) GetLBWeightScaleFactor() uint32 { 265 gws.mu.RLock() 266 defer gws.mu.RUnlock() 267 return gws.lcm 268 } 269 270 func (gws *NetworkGateways) AllGateways() []NetworkGateway { 271 gws.mu.RLock() 272 defer gws.mu.RUnlock() 273 return gws.allGateways() 274 } 275 276 func (gws *NetworkGateways) allGateways() []NetworkGateway { 277 if gws.byNetwork == nil { 278 return nil 279 } 280 out := make([]NetworkGateway, 0) 281 for _, gateways := range gws.byNetwork { 282 out = append(out, gateways...) 283 } 284 return SortGateways(out) 285 } 286 287 func (gws *NetworkGateways) GatewaysForNetwork(nw network.ID) []NetworkGateway { 288 gws.mu.RLock() 289 defer gws.mu.RUnlock() 290 if gws.byNetwork == nil { 291 return nil 292 } 293 return gws.byNetwork[nw] 294 } 295 296 func (gws *NetworkGateways) GatewaysForNetworkAndCluster(nw network.ID, c cluster.ID) []NetworkGateway { 297 gws.mu.RLock() 298 defer gws.mu.RUnlock() 299 if gws.byNetworkAndCluster == nil { 300 return nil 301 } 302 return gws.byNetworkAndCluster[networkAndClusterFor(nw, c)] 303 } 304 305 type networkAndCluster struct { 306 network network.ID 307 cluster cluster.ID 308 } 309 310 func networkAndClusterForGateway(g *NetworkGateway) networkAndCluster { 311 return networkAndClusterFor(g.Network, g.Cluster) 312 } 313 314 func networkAndClusterFor(nw network.ID, c cluster.ID) networkAndCluster { 315 return networkAndCluster{ 316 network: nw, 317 cluster: c, 318 } 319 } 320 321 // SortGateways sorts the array so that it's stable. 322 func SortGateways(gws []NetworkGateway) []NetworkGateway { 323 return slices.SortFunc(gws, func(a, b NetworkGateway) int { 324 if r := cmp.Compare(a.Addr, b.Addr); r != 0 { 325 return r 326 } 327 return cmp.Compare(a.Port, b.Port) 328 }) 329 } 330 331 // greatest common divisor of x and y 332 func gcd(x, y int) int { 333 var tmp int 334 for { 335 tmp = x % y 336 if tmp > 0 { 337 x = y 338 y = tmp 339 } else { 340 return y 341 } 342 } 343 } 344 345 // least common multiple of x and y 346 func lcm(x, y int) int { 347 return x * y / gcd(x, y) 348 } 349 350 // NetworkGatewaySet is a helper to manage a set of NetworkGateway instances. 351 type NetworkGatewaySet = sets.Set[NetworkGateway] 352 353 var ( 354 // MinGatewayTTL is exported for testing 355 MinGatewayTTL = 30 * time.Second 356 357 // https://github.com/coredns/coredns/blob/v1.10.1/plugin/pkg/dnsutil/ttl.go#L51 358 MaxGatewayTTL = 1 * time.Hour 359 ) 360 361 type networkGatewayNameCache struct { 362 NetworkGatewaysHandler 363 client *dnsClient 364 365 sync.Mutex 366 cache map[string]nameCacheEntry 367 } 368 369 type nameCacheEntry struct { 370 value []string 371 expiry time.Time 372 timer *time.Timer 373 } 374 375 func newNetworkGatewayNameCache() (*networkGatewayNameCache, error) { 376 c, err := newClient() 377 if err != nil { 378 return nil, err 379 } 380 return newNetworkGatewayNameCacheWithClient(c), nil 381 } 382 383 // newNetworkGatewayNameCacheWithClient exported for test 384 func newNetworkGatewayNameCacheWithClient(c *dnsClient) *networkGatewayNameCache { 385 return &networkGatewayNameCache{client: c, cache: map[string]nameCacheEntry{}} 386 } 387 388 // Resolve takes a list of hostnames and returns a map of names to addresses 389 func (n *networkGatewayNameCache) Resolve(names sets.String) map[string][]string { 390 n.Lock() 391 defer n.Unlock() 392 393 n.cleanupWatches(names) 394 395 out := make(map[string][]string, len(names)) 396 for name := range names { 397 out[name] = n.resolveFromCache(name) 398 } 399 400 return out 401 } 402 403 // cleanupWatches cancels any scheduled re-resolve for names we no longer care about 404 func (n *networkGatewayNameCache) cleanupWatches(names sets.String) { 405 for name, entry := range n.cache { 406 if names.Contains(name) { 407 continue 408 } 409 entry.timer.Stop() 410 delete(n.cache, name) 411 } 412 } 413 414 func (n *networkGatewayNameCache) resolveFromCache(name string) []string { 415 if entry, ok := n.cache[name]; ok && entry.expiry.After(time.Now()) { 416 return entry.value 417 } 418 // ideally this will not happen more than once for each name and the cache auto-updates in the background 419 // even if it does, this happens on the SotW ingestion path (kube or meshnetworks changes) and not xds push path. 420 return n.resolveAndCache(name) 421 } 422 423 func (n *networkGatewayNameCache) resolveAndCache(name string) []string { 424 entry, ok := n.cache[name] 425 if ok { 426 entry.timer.Stop() 427 } 428 delete(n.cache, name) 429 addrs, ttl, err := n.resolve(name) 430 // avoid excessive pushes due to small TTL 431 if ttl < MinGatewayTTL { 432 ttl = MinGatewayTTL 433 } 434 expiry := time.Now().Add(ttl) 435 if err != nil { 436 // gracefully retain old addresses in case the DNS server is unavailable 437 addrs = entry.value 438 } 439 n.cache[name] = nameCacheEntry{ 440 value: addrs, 441 expiry: expiry, 442 // TTL expires, try to refresh TODO should this be < ttl? 443 timer: time.AfterFunc(ttl, n.refreshAndNotify(name)), 444 } 445 446 return addrs 447 } 448 449 // refreshAndNotify is triggered via time.AfterFunc and will recursively schedule itself that way until timer is cleaned 450 // up via cleanupWatches. 451 func (n *networkGatewayNameCache) refreshAndNotify(name string) func() { 452 return func() { 453 log.Debugf("network gateways: refreshing DNS for %s", name) 454 n.Lock() 455 old := n.cache[name] 456 addrs := n.resolveAndCache(name) 457 n.Unlock() 458 459 if !slices.Equal(old.value, addrs) { 460 log.Debugf("network gateways: DNS for %s changed: %v -> %v", name, old.value, addrs) 461 n.NotifyGatewayHandlers() 462 } 463 } 464 } 465 466 // resolve gets all the A and AAAA records for the given name 467 func (n *networkGatewayNameCache) resolve(name string) ([]string, time.Duration, error) { 468 ttl := MaxGatewayTTL 469 var out []string 470 errs := istiomultierror.New() 471 472 var mu sync.Mutex 473 var wg sync.WaitGroup 474 doResolve := func(dnsType uint16) { 475 defer wg.Done() 476 477 res := n.client.Query(new(dns.Msg).SetQuestion(dns.Fqdn(name), dnsType)) 478 479 mu.Lock() 480 defer mu.Unlock() 481 if res.Rcode == dns.RcodeServerFailure { 482 errs = multierror.Append(errs, fmt.Errorf("upstream dns failure, qtype: %v", dnsType)) 483 return 484 } 485 for _, rr := range res.Answer { 486 switch record := rr.(type) { 487 case *dns.A: 488 out = append(out, record.A.String()) 489 case *dns.AAAA: 490 out = append(out, record.AAAA.String()) 491 } 492 } 493 if nextTTL := minimalTTL(res); nextTTL < ttl { 494 ttl = nextTTL 495 } 496 } 497 498 wg.Add(2) 499 go doResolve(dns.TypeA) 500 go doResolve(dns.TypeAAAA) 501 wg.Wait() 502 503 sort.Strings(out) 504 if errs.Len() == 2 { 505 // return error only if all requests are failed 506 return out, MinGatewayTTL, errs 507 } 508 return out, ttl, nil 509 } 510 511 // https://github.com/coredns/coredns/blob/v1.10.1/plugin/pkg/dnsutil/ttl.go 512 func minimalTTL(m *dns.Msg) time.Duration { 513 // No records or OPT is the only record, return a short ttl as a fail safe. 514 if len(m.Answer)+len(m.Ns) == 0 && 515 (len(m.Extra) == 0 || (len(m.Extra) == 1 && m.Extra[0].Header().Rrtype == dns.TypeOPT)) { 516 return MinGatewayTTL 517 } 518 519 minTTL := MaxGatewayTTL 520 for _, r := range m.Answer { 521 if r.Header().Ttl < uint32(minTTL.Seconds()) { 522 minTTL = time.Duration(r.Header().Ttl) * time.Second 523 } 524 } 525 for _, r := range m.Ns { 526 if r.Header().Ttl < uint32(minTTL.Seconds()) { 527 minTTL = time.Duration(r.Header().Ttl) * time.Second 528 } 529 } 530 531 for _, r := range m.Extra { 532 if r.Header().Rrtype == dns.TypeOPT { 533 // OPT records use TTL field for extended rcode and flags 534 continue 535 } 536 if r.Header().Ttl < uint32(minTTL.Seconds()) { 537 minTTL = time.Duration(r.Header().Ttl) * time.Second 538 } 539 } 540 return minTTL 541 } 542 543 // TODO share code with pkg/dns 544 type dnsClient struct { 545 *dns.Client 546 resolvConfServers []string 547 } 548 549 // NetworkGatewayTestDNSServers if set will ignore resolv.conf and use the given DNS servers for tests. 550 var NetworkGatewayTestDNSServers []string 551 552 func newClient() (*dnsClient, error) { 553 servers := NetworkGatewayTestDNSServers 554 if len(servers) == 0 { 555 dnsConfig, err := dns.ClientConfigFromFile("/etc/resolv.conf") 556 if err != nil { 557 return nil, err 558 } 559 if dnsConfig != nil { 560 for _, s := range dnsConfig.Servers { 561 servers = append(servers, net.JoinHostPort(s, dnsConfig.Port)) 562 } 563 } 564 // TODO take search namespaces into account 565 // TODO what about /etc/hosts? 566 } 567 568 c := &dnsClient{ 569 Client: &dns.Client{ 570 DialTimeout: 5 * time.Second, 571 ReadTimeout: 5 * time.Second, 572 WriteTimeout: 5 * time.Second, 573 }, 574 } 575 c.resolvConfServers = append(c.resolvConfServers, servers...) 576 return c, nil 577 } 578 579 // for more informative logging of dns errors 580 func getReqNames(req *dns.Msg) []string { 581 names := make([]string, 0, 1) 582 for _, qq := range req.Question { 583 names = append(names, qq.Name) 584 } 585 return names 586 } 587 588 func (c *dnsClient) Query(req *dns.Msg) *dns.Msg { 589 var response *dns.Msg 590 for _, upstream := range c.resolvConfServers { 591 cResponse, _, err := c.Exchange(req, upstream) 592 rcode := dns.RcodeServerFailure 593 if err == nil && cResponse != nil { 594 rcode = cResponse.Rcode 595 } 596 if rcode == dns.RcodeServerFailure { 597 // RcodeServerFailure means the upstream cannot serve the request 598 // https://github.com/coredns/coredns/blob/v1.10.1/plugin/forward/forward.go#L193 599 log.Infof("upstream dns failure: %v: %v: %v", upstream, getReqNames(req), err) 600 continue 601 } 602 response = cResponse 603 if rcode == dns.RcodeSuccess { 604 break 605 } 606 codeString := dns.RcodeToString[rcode] 607 log.Debugf("upstream dns error: %v: %v: %v", upstream, getReqNames(req), codeString) 608 } 609 if response == nil { 610 response = new(dns.Msg) 611 response.SetReply(req) 612 response.Rcode = dns.RcodeServerFailure 613 } 614 return response 615 }