google.golang.org/grpc@v1.72.2/xds/internal/resolver/xds_resolver.go (about) 1 /* 2 * Copyright 2019 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 // Package resolver implements the xds resolver, that does LDS and RDS to find 19 // the cluster to use. 20 package resolver 21 22 import ( 23 "context" 24 "fmt" 25 rand "math/rand/v2" 26 "sync/atomic" 27 28 estats "google.golang.org/grpc/experimental/stats" 29 "google.golang.org/grpc/internal" 30 "google.golang.org/grpc/internal/grpclog" 31 "google.golang.org/grpc/internal/grpcsync" 32 "google.golang.org/grpc/internal/pretty" 33 iresolver "google.golang.org/grpc/internal/resolver" 34 "google.golang.org/grpc/internal/wrr" 35 "google.golang.org/grpc/internal/xds/bootstrap" 36 "google.golang.org/grpc/resolver" 37 rinternal "google.golang.org/grpc/xds/internal/resolver/internal" 38 "google.golang.org/grpc/xds/internal/xdsclient" 39 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" 40 ) 41 42 // Scheme is the xDS resolver's scheme. 43 // 44 // TODO(easwars): Rename this package as xdsresolver so that this is accessed as 45 // xdsresolver.Scheme 46 const Scheme = "xds" 47 48 // newBuilderWithConfigForTesting creates a new xds resolver builder using a 49 // specific xds bootstrap config, so tests can use multiple xDS clients in 50 // different ClientConns at the same time. The builder creates a new pool with 51 // the provided config and a new xDS client in that pool. 52 func newBuilderWithConfigForTesting(config []byte) (resolver.Builder, error) { 53 return &xdsResolverBuilder{ 54 newXDSClient: func(name string, mr estats.MetricsRecorder) (xdsclient.XDSClient, func(), error) { 55 config, err := bootstrap.NewConfigFromContents(config) 56 if err != nil { 57 return nil, nil, err 58 } 59 pool := xdsclient.NewPool(config) 60 return pool.NewClientForTesting(xdsclient.OptionsForTesting{ 61 Name: name, 62 MetricsRecorder: mr, 63 }) 64 }, 65 }, nil 66 } 67 68 // newBuilderWithPoolForTesting creates a new xds resolver builder using the 69 // specific xds client pool, so that tests have complete control over the exact 70 // specific xds client pool being used. 71 func newBuilderWithPoolForTesting(pool *xdsclient.Pool) (resolver.Builder, error) { 72 return &xdsResolverBuilder{ 73 newXDSClient: func(name string, mr estats.MetricsRecorder) (xdsclient.XDSClient, func(), error) { 74 return pool.NewClientForTesting(xdsclient.OptionsForTesting{ 75 Name: name, 76 MetricsRecorder: mr, 77 }) 78 }, 79 }, nil 80 } 81 82 // newBuilderWithClientForTesting creates a new xds resolver builder using the 83 // specific xDS client, so that tests have complete control over the exact 84 // specific xDS client being used. 85 func newBuilderWithClientForTesting(client xdsclient.XDSClient) (resolver.Builder, error) { 86 return &xdsResolverBuilder{ 87 newXDSClient: func(string, estats.MetricsRecorder) (xdsclient.XDSClient, func(), error) { 88 // Returning an empty close func here means that the responsibility 89 // of closing the client lies with the caller. 90 return client, func() {}, nil 91 }, 92 }, nil 93 } 94 95 func init() { 96 resolver.Register(&xdsResolverBuilder{}) 97 internal.NewXDSResolverWithConfigForTesting = newBuilderWithConfigForTesting 98 internal.NewXDSResolverWithPoolForTesting = newBuilderWithPoolForTesting 99 internal.NewXDSResolverWithClientForTesting = newBuilderWithClientForTesting 100 101 rinternal.NewWRR = wrr.NewRandom 102 rinternal.NewXDSClient = xdsclient.DefaultPool.NewClient 103 } 104 105 type xdsResolverBuilder struct { 106 newXDSClient func(string, estats.MetricsRecorder) (xdsclient.XDSClient, func(), error) 107 } 108 109 // Build helps implement the resolver.Builder interface. 110 // 111 // The xds bootstrap process is performed (and a new xDS client is built) every 112 // time an xds resolver is built. 113 func (b *xdsResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (_ resolver.Resolver, retErr error) { 114 r := &xdsResolver{ 115 cc: cc, 116 activeClusters: make(map[string]*clusterInfo), 117 channelID: rand.Uint64(), 118 } 119 defer func() { 120 if retErr != nil { 121 r.Close() 122 } 123 }() 124 r.logger = prefixLogger(r) 125 r.logger.Infof("Creating resolver for target: %+v", target) 126 127 // Initialize the serializer used to synchronize the following: 128 // - updates from the xDS client. This could lead to generation of new 129 // service config if resolution is complete. 130 // - completion of an RPC to a removed cluster causing the associated ref 131 // count to become zero, resulting in generation of new service config. 132 // - stopping of a config selector that results in generation of new service 133 // config. 134 ctx, cancel := context.WithCancel(context.Background()) 135 r.serializer = grpcsync.NewCallbackSerializer(ctx) 136 r.serializerCancel = cancel 137 138 // Initialize the xDS client. 139 newXDSClient := rinternal.NewXDSClient.(func(string, estats.MetricsRecorder) (xdsclient.XDSClient, func(), error)) 140 if b.newXDSClient != nil { 141 newXDSClient = b.newXDSClient 142 } 143 client, closeFn, err := newXDSClient(target.String(), opts.MetricsRecorder) 144 if err != nil { 145 return nil, fmt.Errorf("xds: failed to create xds-client: %v", err) 146 } 147 r.xdsClient = client 148 r.xdsClientClose = closeFn 149 150 // Determine the listener resource name and start a watcher for it. 151 template, err := r.sanityChecksOnBootstrapConfig(target, opts, r.xdsClient) 152 if err != nil { 153 return nil, err 154 } 155 r.dataplaneAuthority = opts.Authority 156 r.ldsResourceName = bootstrap.PopulateResourceTemplate(template, target.Endpoint()) 157 r.listenerWatcher = newListenerWatcher(r.ldsResourceName, r) 158 return r, nil 159 } 160 161 // Performs the following sanity checks: 162 // - Verifies that the bootstrap configuration is not empty. 163 // - Verifies that if xDS credentials are specified by the user, the 164 // bootstrap configuration contains certificate providers. 165 // - Verifies that if the provided dial target contains an authority, the 166 // bootstrap configuration contains server config for that authority. 167 // 168 // Returns the listener resource name template to use. If any of the above 169 // validations fail, a non-nil error is returned. 170 func (r *xdsResolver) sanityChecksOnBootstrapConfig(target resolver.Target, _ resolver.BuildOptions, client xdsclient.XDSClient) (string, error) { 171 bootstrapConfig := client.BootstrapConfig() 172 if bootstrapConfig == nil { 173 // This is never expected to happen after a successful xDS client 174 // creation. Defensive programming. 175 return "", fmt.Errorf("xds: bootstrap configuration is empty") 176 } 177 178 // Find the client listener template to use from the bootstrap config: 179 // - If authority is not set in the target, use the top level template 180 // - If authority is set, use the template from the authority map. 181 template := bootstrapConfig.ClientDefaultListenerResourceNameTemplate() 182 if authority := target.URL.Host; authority != "" { 183 authorities := bootstrapConfig.Authorities() 184 if authorities == nil { 185 return "", fmt.Errorf("xds: authority %q specified in dial target %q is not found in the bootstrap file", authority, target) 186 } 187 a := authorities[authority] 188 if a == nil { 189 return "", fmt.Errorf("xds: authority %q specified in dial target %q is not found in the bootstrap file", authority, target) 190 } 191 if a.ClientListenerResourceNameTemplate != "" { 192 // This check will never be false, because 193 // ClientListenerResourceNameTemplate is required to start with 194 // xdstp://, and has a default value (not an empty string) if unset. 195 template = a.ClientListenerResourceNameTemplate 196 } 197 } 198 return template, nil 199 } 200 201 // Name helps implement the resolver.Builder interface. 202 func (*xdsResolverBuilder) Scheme() string { 203 return Scheme 204 } 205 206 // xdsResolver implements the resolver.Resolver interface. 207 // 208 // It registers a watcher for ServiceConfig updates with the xdsClient object 209 // (which performs LDS/RDS queries for the same), and passes the received 210 // updates to the ClientConn. 211 type xdsResolver struct { 212 cc resolver.ClientConn 213 logger *grpclog.PrefixLogger 214 // The underlying xdsClient which performs all xDS requests and responses. 215 xdsClient xdsclient.XDSClient 216 xdsClientClose func() 217 // A random number which uniquely identifies the channel which owns this 218 // resolver. 219 channelID uint64 220 221 // All methods on the xdsResolver type except for the ones invoked by gRPC, 222 // i.e ResolveNow() and Close(), are guaranteed to execute in the context of 223 // this serializer's callback. And since the serializer guarantees mutual 224 // exclusion among these callbacks, we can get by without any mutexes to 225 // access all of the below defined state. The only exception is Close(), 226 // which does access some of this shared state, but it does so after 227 // cancelling the context passed to the serializer. 228 serializer *grpcsync.CallbackSerializer 229 serializerCancel context.CancelFunc 230 231 // dataplaneAuthority is the authority used for the data plane connections, 232 // which is also used to select the VirtualHost within the xDS 233 // RouteConfiguration. This is %-encoded to match with VirtualHost Domain 234 // in xDS RouteConfiguration. 235 dataplaneAuthority string 236 237 ldsResourceName string 238 listenerWatcher *listenerWatcher 239 listenerUpdateRecvd bool 240 currentListener xdsresource.ListenerUpdate 241 242 rdsResourceName string 243 routeConfigWatcher *routeConfigWatcher 244 routeConfigUpdateRecvd bool 245 currentRouteConfig xdsresource.RouteConfigUpdate 246 currentVirtualHost *xdsresource.VirtualHost // Matched virtual host for quick access. 247 248 // activeClusters is a map from cluster name to information about the 249 // cluster that includes a ref count and load balancing configuration. 250 activeClusters map[string]*clusterInfo 251 252 curConfigSelector stoppableConfigSelector 253 } 254 255 // ResolveNow is a no-op at this point. 256 func (*xdsResolver) ResolveNow(resolver.ResolveNowOptions) {} 257 258 func (r *xdsResolver) Close() { 259 // Cancel the context passed to the serializer and wait for any scheduled 260 // callbacks to complete. Canceling the context ensures that no new 261 // callbacks will be scheduled. 262 r.serializerCancel() 263 <-r.serializer.Done() 264 265 // Note that Close needs to check for nils even if some of them are always 266 // set in the constructor. This is because the constructor defers Close() in 267 // error cases, and the fields might not be set when the error happens. 268 269 if r.listenerWatcher != nil { 270 r.listenerWatcher.stop() 271 } 272 if r.routeConfigWatcher != nil { 273 r.routeConfigWatcher.stop() 274 } 275 if r.xdsClientClose != nil { 276 r.xdsClientClose() 277 } 278 r.logger.Infof("Shutdown") 279 } 280 281 // sendNewServiceConfig prunes active clusters, generates a new service config 282 // based on the current set of active clusters, and sends an update to the 283 // channel with that service config and the provided config selector. Returns 284 // false if an error occurs while sending an update to the channel. 285 // 286 // Only executed in the context of a serializer callback. 287 func (r *xdsResolver) sendNewServiceConfig(cs stoppableConfigSelector) bool { 288 // Delete entries from r.activeClusters with zero references; 289 // otherwise serviceConfigJSON will generate a config including 290 // them. 291 r.pruneActiveClusters() 292 293 errCS, ok := cs.(*erroringConfigSelector) 294 if ok && len(r.activeClusters) == 0 { 295 // There are no clusters and we are sending a failing configSelector. 296 // Send an empty config, which picks pick-first, with no address, and 297 // puts the ClientConn into transient failure. 298 // 299 // This call to UpdateState is expected to return ErrBadResolverState 300 // since pick_first doesn't like an update with no addresses. 301 r.cc.UpdateState(resolver.State{ServiceConfig: r.cc.ParseServiceConfig("{}")}) 302 303 // Send a resolver error to pick_first so that RPCs will fail with a 304 // more meaningful error, as opposed to one that says that pick_first 305 // received no addresses. 306 r.cc.ReportError(errCS.err) 307 return true 308 } 309 310 sc := serviceConfigJSON(r.activeClusters) 311 r.logger.Infof("For Listener resource %q and RouteConfiguration resource %q, generated service config: %v", r.ldsResourceName, r.rdsResourceName, pretty.FormatJSON(sc)) 312 313 // Send the update to the ClientConn. 314 state := iresolver.SetConfigSelector(resolver.State{ 315 ServiceConfig: r.cc.ParseServiceConfig(string(sc)), 316 }, cs) 317 if err := r.cc.UpdateState(xdsclient.SetClient(state, r.xdsClient)); err != nil { 318 if r.logger.V(2) { 319 r.logger.Infof("Channel rejected new state: %+v with error: %v", state, err) 320 } 321 return false 322 } 323 return true 324 } 325 326 // newConfigSelector creates a new config selector using the most recently 327 // received listener and route config updates. May add entries to 328 // r.activeClusters for previously-unseen clusters. 329 // 330 // Only executed in the context of a serializer callback. 331 func (r *xdsResolver) newConfigSelector() *configSelector { 332 cs := &configSelector{ 333 r: r, 334 xdsNodeID: r.xdsClient.BootstrapConfig().Node().GetId(), 335 virtualHost: virtualHost{ 336 httpFilterConfigOverride: r.currentVirtualHost.HTTPFilterConfigOverride, 337 retryConfig: r.currentVirtualHost.RetryConfig, 338 }, 339 routes: make([]route, len(r.currentVirtualHost.Routes)), 340 clusters: make(map[string]*clusterInfo), 341 httpFilterConfig: r.currentListener.HTTPFilters, 342 } 343 344 for i, rt := range r.currentVirtualHost.Routes { 345 clusters := rinternal.NewWRR.(func() wrr.WRR)() 346 if rt.ClusterSpecifierPlugin != "" { 347 clusterName := clusterSpecifierPluginPrefix + rt.ClusterSpecifierPlugin 348 clusters.Add(&routeCluster{ 349 name: clusterName, 350 }, 1) 351 ci := r.addOrGetActiveClusterInfo(clusterName) 352 ci.cfg = xdsChildConfig{ChildPolicy: balancerConfig(r.currentRouteConfig.ClusterSpecifierPlugins[rt.ClusterSpecifierPlugin])} 353 cs.clusters[clusterName] = ci 354 } else { 355 for cluster, wc := range rt.WeightedClusters { 356 clusterName := clusterPrefix + cluster 357 clusters.Add(&routeCluster{ 358 name: clusterName, 359 httpFilterConfigOverride: wc.HTTPFilterConfigOverride, 360 }, int64(wc.Weight)) 361 ci := r.addOrGetActiveClusterInfo(clusterName) 362 ci.cfg = xdsChildConfig{ChildPolicy: newBalancerConfig(cdsName, cdsBalancerConfig{Cluster: cluster})} 363 cs.clusters[clusterName] = ci 364 } 365 } 366 cs.routes[i].clusters = clusters 367 368 cs.routes[i].m = xdsresource.RouteToMatcher(rt) 369 cs.routes[i].actionType = rt.ActionType 370 if rt.MaxStreamDuration == nil { 371 cs.routes[i].maxStreamDuration = r.currentListener.MaxStreamDuration 372 } else { 373 cs.routes[i].maxStreamDuration = *rt.MaxStreamDuration 374 } 375 376 cs.routes[i].httpFilterConfigOverride = rt.HTTPFilterConfigOverride 377 cs.routes[i].retryConfig = rt.RetryConfig 378 cs.routes[i].hashPolicies = rt.HashPolicies 379 } 380 381 // Account for this config selector's clusters. Do this after no further 382 // errors may occur. Note: cs.clusters are pointers to entries in 383 // activeClusters. 384 for _, ci := range cs.clusters { 385 atomic.AddInt32(&ci.refCount, 1) 386 } 387 388 return cs 389 } 390 391 // pruneActiveClusters deletes entries in r.activeClusters with zero 392 // references. 393 func (r *xdsResolver) pruneActiveClusters() { 394 for cluster, ci := range r.activeClusters { 395 if atomic.LoadInt32(&ci.refCount) == 0 { 396 delete(r.activeClusters, cluster) 397 } 398 } 399 } 400 401 func (r *xdsResolver) addOrGetActiveClusterInfo(name string) *clusterInfo { 402 ci := r.activeClusters[name] 403 if ci != nil { 404 return ci 405 } 406 407 ci = &clusterInfo{refCount: 0} 408 r.activeClusters[name] = ci 409 return ci 410 } 411 412 type clusterInfo struct { 413 // number of references to this cluster; accessed atomically 414 refCount int32 415 // cfg is the child configuration for this cluster, containing either the 416 // csp config or the cds cluster config. 417 cfg xdsChildConfig 418 } 419 420 // Determines if the xdsResolver has received all required configuration, i.e 421 // Listener and RouteConfiguration resources, from the management server, and 422 // whether a matching virtual host was found in the RouteConfiguration resource. 423 func (r *xdsResolver) resolutionComplete() bool { 424 return r.listenerUpdateRecvd && r.routeConfigUpdateRecvd && r.currentVirtualHost != nil 425 } 426 427 // onResolutionComplete performs the following actions when resolution is 428 // complete, i.e Listener and RouteConfiguration resources have been received 429 // from the management server and a matching virtual host is found in the 430 // latter. 431 // - creates a new config selector (this involves incrementing references to 432 // clusters owned by this config selector). 433 // - stops the old config selector (this involves decrementing references to 434 // clusters owned by this config selector). 435 // - prunes active clusters and pushes a new service config to the channel. 436 // - updates the current config selector used by the resolver. 437 // 438 // Only executed in the context of a serializer callback. 439 func (r *xdsResolver) onResolutionComplete() { 440 if !r.resolutionComplete() { 441 return 442 } 443 444 cs := r.newConfigSelector() 445 if !r.sendNewServiceConfig(cs) { 446 // Channel didn't like the update we provided (unexpected); erase 447 // this config selector and ignore this update, continuing with 448 // the previous config selector. 449 cs.stop() 450 return 451 } 452 453 if r.curConfigSelector != nil { 454 r.curConfigSelector.stop() 455 } 456 r.curConfigSelector = cs 457 } 458 459 func (r *xdsResolver) applyRouteConfigUpdate(update xdsresource.RouteConfigUpdate) { 460 matchVh := xdsresource.FindBestMatchingVirtualHost(r.dataplaneAuthority, update.VirtualHosts) 461 if matchVh == nil { 462 r.onError(fmt.Errorf("no matching virtual host found for %q", r.dataplaneAuthority)) 463 return 464 } 465 r.currentRouteConfig = update 466 r.currentVirtualHost = matchVh 467 r.routeConfigUpdateRecvd = true 468 469 r.onResolutionComplete() 470 } 471 472 // onError propagates the error up to the channel. And since this is invoked 473 // only for non resource-not-found errors, we don't have to update resolver 474 // state and we can keep using the old config. 475 // 476 // Only executed in the context of a serializer callback. 477 func (r *xdsResolver) onError(err error) { 478 r.cc.ReportError(err) 479 } 480 481 // Contains common functionality to be executed when resources of either type 482 // are removed. 483 // 484 // Only executed in the context of a serializer callback. 485 func (r *xdsResolver) onResourceNotFound() { 486 // We cannot remove clusters from the service config that have ongoing RPCs. 487 // Instead, what we can do is to send an erroring config selector 488 // along with normal service config. This will ensure that new RPCs will 489 // fail, and once the active RPCs complete, the reference counts on the 490 // clusters will come down to zero. At that point, we will send an empty 491 // service config with no addresses. This results in the pick-first 492 // LB policy being configured on the channel, and since there are no 493 // address, pick-first will put the channel in TRANSIENT_FAILURE. 494 cs := newErroringConfigSelector(r.xdsClient.BootstrapConfig().Node().GetId()) 495 r.sendNewServiceConfig(cs) 496 497 // Stop and dereference the active config selector, if one exists. 498 if r.curConfigSelector != nil { 499 r.curConfigSelector.stop() 500 } 501 r.curConfigSelector = cs 502 } 503 504 // Only executed in the context of a serializer callback. 505 func (r *xdsResolver) onListenerResourceUpdate(update xdsresource.ListenerUpdate) { 506 if r.logger.V(2) { 507 r.logger.Infof("Received update for Listener resource %q: %v", r.ldsResourceName, pretty.ToJSON(update)) 508 } 509 510 r.currentListener = update 511 r.listenerUpdateRecvd = true 512 513 if update.InlineRouteConfig != nil { 514 // If there was a previous route config watcher because of a non-inline 515 // route configuration, cancel it. 516 r.rdsResourceName = "" 517 if r.routeConfigWatcher != nil { 518 r.routeConfigWatcher.stop() 519 r.routeConfigWatcher = nil 520 } 521 522 r.applyRouteConfigUpdate(*update.InlineRouteConfig) 523 return 524 } 525 526 // We get here only if there was no inline route configuration. 527 528 // If the route config name has not changed, send an update with existing 529 // route configuration and the newly received listener configuration. 530 if r.rdsResourceName == update.RouteConfigName { 531 r.onResolutionComplete() 532 return 533 } 534 535 // If the route config name has changed, cancel the old watcher and start a 536 // new one. At this point, since we have not yet resolved the new route 537 // config name, we don't send an update to the channel, and therefore 538 // continue using the old route configuration (if received) until the new 539 // one is received. 540 r.rdsResourceName = update.RouteConfigName 541 if r.routeConfigWatcher != nil { 542 r.routeConfigWatcher.stop() 543 r.currentVirtualHost = nil 544 r.routeConfigUpdateRecvd = false 545 } 546 r.routeConfigWatcher = newRouteConfigWatcher(r.rdsResourceName, r) 547 } 548 549 func (r *xdsResolver) onListenerResourceError(err error) { 550 if r.logger.V(2) { 551 r.logger.Infof("Received error for Listener resource %q: %v", r.ldsResourceName, err) 552 } 553 r.onError(err) 554 } 555 556 // Only executed in the context of a serializer callback. 557 func (r *xdsResolver) onListenerResourceNotFound() { 558 r.logger.Warningf("Received resource-not-found-error for Listener resource %q", r.ldsResourceName) 559 560 r.listenerUpdateRecvd = false 561 if r.routeConfigWatcher != nil { 562 r.routeConfigWatcher.stop() 563 } 564 r.rdsResourceName = "" 565 r.currentVirtualHost = nil 566 r.routeConfigUpdateRecvd = false 567 r.routeConfigWatcher = nil 568 569 r.onResourceNotFound() 570 } 571 572 // Only executed in the context of a serializer callback. 573 func (r *xdsResolver) onRouteConfigResourceUpdate(name string, update xdsresource.RouteConfigUpdate) { 574 if r.logger.V(2) { 575 r.logger.Infof("Received update for RouteConfiguration resource %q: %v", name, pretty.ToJSON(update)) 576 } 577 578 if r.rdsResourceName != name { 579 // Drop updates from canceled watchers. 580 return 581 } 582 583 r.applyRouteConfigUpdate(update) 584 } 585 586 // Only executed in the context of a serializer callback. 587 func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { 588 if r.logger.V(2) { 589 r.logger.Infof("Received error for RouteConfiguration resource %q: %v", name, err) 590 } 591 r.onError(err) 592 } 593 594 // Only executed in the context of a serializer callback. 595 func (r *xdsResolver) onRouteConfigResourceNotFound(name string) { 596 r.logger.Warningf("Received resource-not-found-error for RouteConfiguration resource %q", name) 597 598 if r.rdsResourceName != name { 599 return 600 } 601 r.onResourceNotFound() 602 } 603 604 // Only executed in the context of a serializer callback. 605 func (r *xdsResolver) onClusterRefDownToZero() { 606 r.sendNewServiceConfig(r.curConfigSelector) 607 }