google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/authority.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient 20 21 import ( 22 "context" 23 "fmt" 24 "sync" 25 "sync/atomic" 26 27 "google.golang.org/grpc/grpclog" 28 igrpclog "google.golang.org/grpc/internal/grpclog" 29 "google.golang.org/grpc/xds/internal/clients" 30 "google.golang.org/grpc/xds/internal/clients/internal/syncutil" 31 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 32 "google.golang.org/grpc/xds/internal/clients/xdsclient/metrics" 33 "google.golang.org/protobuf/types/known/anypb" 34 "google.golang.org/protobuf/types/known/timestamppb" 35 36 v3adminpb "github.com/envoyproxy/go-control-plane/envoy/admin/v3" 37 v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" 38 ) 39 40 type resourceState struct { 41 watchers map[ResourceWatcher]bool // Set of watchers for this resource. 42 cache ResourceData // Most recent ACKed update for this resource. 43 md xdsresource.UpdateMetadata // Metadata for the most recent update. 44 deletionIgnored bool // True, if resource deletion was ignored for a prior update. 45 xdsChannelConfigs map[*xdsChannelWithConfig]bool // Set of xdsChannels where this resource is subscribed. 46 } 47 48 // xdsChannelForADS is used to acquire a reference to an xdsChannel. This 49 // functionality is provided by the xdsClient. 50 // 51 // The arguments to the function are as follows: 52 // - the server config for the xdsChannel 53 // - the calling authority on which a set of callbacks are invoked by the 54 // xdsChannel on ADS stream events 55 // 56 // Returns a reference to the xdsChannel and a function to release the same. A 57 // non-nil error is returned if the channel creation fails and the first two 58 // return values are meaningless in this case. 59 type xdsChannelForADS func(*ServerConfig, *authority) (*xdsChannel, func(), error) 60 61 // xdsChannelWithConfig is a struct that holds an xdsChannel and its associated 62 // ServerConfig, along with a cleanup function to release the xdsChannel. 63 type xdsChannelWithConfig struct { 64 channel *xdsChannel 65 serverConfig *ServerConfig 66 cleanup func() 67 } 68 69 // authority provides the functionality required to communicate with a 70 // management server corresponding to an authority name specified in the 71 // xDS client configuration. 72 // 73 // It holds references to one or more xdsChannels, one for each server 74 // configuration in the config, to allow fallback from a primary management 75 // server to a secondary management server. Authorities that contain similar 76 // server configuration entries will end up sharing the xdsChannel for that 77 // server configuration. The xdsChannels are owned and managed by the xdsClient. 78 // 79 // It also contains a cache of resource state for resources requested from 80 // management server(s). This cache contains the list of registered watchers and 81 // the most recent resource configuration received from the management server. 82 type authority struct { 83 // The following fields are initialized at creation time and are read-only 84 // afterwards, and therefore don't need to be protected with a mutex. 85 name string // Name of the authority from xDS client configuration. 86 watcherCallbackSerializer *syncutil.CallbackSerializer // Serializer to run watcher callbacks, owned by the xDS client implementation. 87 getChannelForADS xdsChannelForADS // Function to get an xdsChannel for ADS, provided by the xDS client implementation. 88 xdsClientSerializer *syncutil.CallbackSerializer // Serializer to run call ins from the xDS client, owned by this authority. 89 xdsClientSerializerClose func() // Function to close the above serializer. 90 logger *igrpclog.PrefixLogger // Logger for this authority. 91 target string // The gRPC Channel target. 92 metricsReporter clients.MetricsReporter 93 94 // The below defined fields must only be accessed in the context of the 95 // serializer callback, owned by this authority. 96 97 // A two level map containing the state of all the resources being watched. 98 // 99 // The first level map key is the ResourceType (Listener, Route etc). This 100 // allows us to have a single map for all resources instead of having per 101 // resource-type maps. 102 // 103 // The second level map key is the resource name, with the value being the 104 // actual state of the resource. 105 resources map[ResourceType]map[string]*resourceState 106 107 // An ordered list of xdsChannels corresponding to the list of server 108 // configurations specified for this authority in the config. The 109 // ordering specifies the order in which these channels are preferred for 110 // fallback. 111 xdsChannelConfigs []*xdsChannelWithConfig 112 113 // The current active xdsChannel. Here, active does not mean that the 114 // channel has a working connection to the server. It simply points to the 115 // channel that we are trying to work with, based on fallback logic. 116 activeXDSChannel *xdsChannelWithConfig 117 } 118 119 // authorityBuildOptions wraps arguments required to create a new authority. 120 type authorityBuildOptions struct { 121 serverConfigs []ServerConfig // Server configs for the authority 122 name string // Name of the authority 123 serializer *syncutil.CallbackSerializer // Callback serializer for invoking watch callbacks 124 getChannelForADS xdsChannelForADS // Function to acquire a reference to an xdsChannel 125 logPrefix string // Prefix for logging 126 target string // Target for the gRPC Channel that owns xDS Client/Authority 127 metricsReporter clients.MetricsReporter // Metrics reporter for the authority 128 } 129 130 // newAuthority creates a new authority instance with the provided 131 // configuration. The authority is responsible for managing the state of 132 // resources requested from the management server, as well as acquiring and 133 // releasing references to channels used to communicate with the management 134 // server. 135 // 136 // Note that no channels to management servers are created at this time. Instead 137 // a channel to the first server configuration is created when the first watch 138 // is registered, and more channels are created as needed by the fallback logic. 139 func newAuthority(args authorityBuildOptions) *authority { 140 ctx, cancel := context.WithCancel(context.Background()) 141 l := grpclog.Component("xds") 142 logPrefix := args.logPrefix + fmt.Sprintf("[authority %q] ", args.name) 143 ret := &authority{ 144 name: args.name, 145 watcherCallbackSerializer: args.serializer, 146 getChannelForADS: args.getChannelForADS, 147 xdsClientSerializer: syncutil.NewCallbackSerializer(ctx), 148 xdsClientSerializerClose: cancel, 149 logger: igrpclog.NewPrefixLogger(l, logPrefix), 150 resources: make(map[ResourceType]map[string]*resourceState), 151 target: args.target, 152 metricsReporter: args.metricsReporter, 153 } 154 155 // Create an ordered list of xdsChannels with their server configs. The 156 // actual channel to the first server configuration is created when the 157 // first watch is registered, and channels to other server configurations 158 // are created as needed to support fallback. 159 for _, sc := range args.serverConfigs { 160 ret.xdsChannelConfigs = append(ret.xdsChannelConfigs, &xdsChannelWithConfig{serverConfig: &sc}) 161 } 162 return ret 163 } 164 165 // adsStreamFailure is called to notify the authority about an ADS stream 166 // failure on an xdsChannel to the management server identified by the provided 167 // server config. The error is forwarded to all the resource watchers. 168 // 169 // This method is called by the xDS client implementation (on all interested 170 // authorities) when a stream error is reported by an xdsChannel. 171 // 172 // Errors of type xdsresource.ErrTypeStreamFailedAfterRecv are ignored. 173 func (a *authority) adsStreamFailure(serverConfig *ServerConfig, err error) { 174 a.xdsClientSerializer.TrySchedule(func(context.Context) { 175 a.handleADSStreamFailure(serverConfig, err) 176 }) 177 } 178 179 // Handles ADS stream failure by invoking watch callbacks and triggering 180 // fallback if the associated conditions are met. 181 // 182 // Only executed in the context of a serializer callback. 183 func (a *authority) handleADSStreamFailure(serverConfig *ServerConfig, err error) { 184 if a.logger.V(2) { 185 a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err) 186 } 187 188 // We do not consider it an error if the ADS stream was closed after having 189 // received a response on the stream. This is because there are legitimate 190 // reasons why the server may need to close the stream during normal 191 // operations, such as needing to rebalance load or the underlying 192 // connection hitting its max connection age limit. See gRFC A57 for more 193 // details. 194 if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv { 195 a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err) 196 return 197 } 198 199 // Two conditions need to be met for fallback to be triggered: 200 // 1. There is a connectivity failure on the ADS stream, as described in 201 // gRFC A57. For us, this means that the ADS stream was closed before the 202 // first server response was received. We already checked that condition 203 // earlier in this method. 204 // 2. There is at least one watcher for a resource that is not cached. 205 // Cached resources include ones that 206 // - have been successfully received and can be used. 207 // - are considered non-existent according to xDS Protocol Specification. 208 if !a.watcherExistsForUncachedResource() { 209 if a.logger.V(2) { 210 a.logger.Infof("No watchers for uncached resources. Not triggering fallback") 211 } 212 // Since we are not triggering fallback, propagate the connectivity 213 // error to all watchers and return early. 214 a.propagateConnectivityErrorToAllWatchers(err) 215 return 216 } 217 218 // Attempt to fallback to servers with lower priority than the failing one. 219 currentServerIdx := a.serverIndexForConfig(serverConfig) 220 for i := currentServerIdx + 1; i < len(a.xdsChannelConfigs); i++ { 221 if a.fallbackToServer(a.xdsChannelConfigs[i]) { 222 // Since we have successfully triggered fallback, we don't have to 223 // notify watchers about the connectivity error. 224 return 225 } 226 } 227 228 // Having exhausted all available servers, we must notify watchers of the 229 // connectivity error - A71. 230 a.propagateConnectivityErrorToAllWatchers(err) 231 } 232 233 // propagateConnectivityErrorToAllWatchers propagates the given connection error 234 // to all watchers of all resources. 235 // 236 // Only executed in the context of a serializer callback. 237 func (a *authority) propagateConnectivityErrorToAllWatchers(err error) { 238 for _, rType := range a.resources { 239 for _, state := range rType { 240 for watcher := range state.watchers { 241 if state.cache == nil { 242 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { 243 watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) 244 }) 245 } else { 246 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { 247 watcher.AmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) 248 }) 249 } 250 } 251 } 252 } 253 } 254 255 // serverIndexForConfig returns the index of the xdsChannelConfig matching the 256 // provided server config, panicking if no match is found (which indicates a 257 // programming error). 258 func (a *authority) serverIndexForConfig(sc *ServerConfig) int { 259 for i, cfg := range a.xdsChannelConfigs { 260 if isServerConfigEqual(sc, cfg.serverConfig) { 261 return i 262 } 263 } 264 panic(fmt.Sprintf("no server config matching %v found", sc)) 265 } 266 267 // Determines the server to fallback to and triggers fallback to the same. If 268 // required, creates an xdsChannel to that server, and re-subscribes to all 269 // existing resources. 270 // 271 // Only executed in the context of a serializer callback. 272 func (a *authority) fallbackToServer(xc *xdsChannelWithConfig) bool { 273 if a.logger.V(2) { 274 a.logger.Infof("Attempting to initiate fallback to server %q", xc.serverConfig) 275 } 276 277 if xc.channel != nil { 278 if a.logger.V(2) { 279 a.logger.Infof("Channel to the next server in the list %q already exists", xc.serverConfig) 280 } 281 return false 282 } 283 284 channel, cleanup, err := a.getChannelForADS(xc.serverConfig, a) 285 if err != nil { 286 a.logger.Errorf("Failed to create xDS channel: %v", err) 287 return false 288 } 289 xc.channel = channel 290 xc.cleanup = cleanup 291 a.activeXDSChannel = xc 292 293 // Subscribe to all existing resources from the new management server. 294 for typ, resources := range a.resources { 295 for name, state := range resources { 296 if a.logger.V(2) { 297 a.logger.Infof("Resubscribing to resource of type %q and name %q", typ.TypeName, name) 298 } 299 xc.channel.subscribe(typ, name) 300 301 // Add the new channel to the list of xdsChannels from which this 302 // resource has been requested from. Retain the cached resource and 303 // the set of existing watchers (and other metadata fields) in the 304 // resource state. 305 state.xdsChannelConfigs[xc] = true 306 } 307 } 308 return true 309 } 310 311 // adsResourceUpdate is called to notify the authority about a resource update 312 // received on the ADS stream. 313 // 314 // This method is called by the xDS client implementation (on all interested 315 // authorities) when a stream error is reported by an xdsChannel. 316 func (a *authority) adsResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { 317 a.xdsClientSerializer.TrySchedule(func(context.Context) { 318 a.handleADSResourceUpdate(serverConfig, rType, updates, md, onDone) 319 }) 320 } 321 322 // handleADSResourceUpdate processes an update from the xDS client, updating the 323 // resource cache and notifying any registered watchers of the update. 324 // 325 // If the update is received from a higher priority xdsChannel that was 326 // previously down, we revert to it and close all lower priority xdsChannels. 327 // 328 // Once the update has been processed by all watchers, the authority is expected 329 // to invoke the onDone callback. 330 // 331 // Only executed in the context of a serializer callback. 332 func (a *authority) handleADSResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { 333 a.handleRevertingToPrimaryOnUpdate(serverConfig) 334 335 // We build a list of callback funcs to invoke, and invoke them at the end 336 // of this method instead of inline (when handling the update for a 337 // particular resource), because we want to make sure that all calls to 338 // increment watcherCnt happen before any callbacks are invoked. This will 339 // ensure that the onDone callback is never invoked before all watcher 340 // callbacks are invoked, and the watchers have processed the update. 341 watcherCnt := new(atomic.Int64) 342 done := func() { 343 if watcherCnt.Add(-1) == 0 { 344 onDone() 345 } 346 } 347 funcsToSchedule := []func(context.Context){} 348 defer func() { 349 if len(funcsToSchedule) == 0 { 350 // When there are no watchers for the resources received as part of 351 // this update, invoke onDone explicitly to unblock the next read on 352 // the ADS stream. 353 onDone() 354 return 355 } 356 for _, f := range funcsToSchedule { 357 a.watcherCallbackSerializer.ScheduleOr(f, onDone) 358 } 359 }() 360 361 resourceStates := a.resources[rType] 362 for name, uErr := range updates { 363 state, ok := resourceStates[name] 364 if !ok { 365 continue 366 } 367 368 // On error, keep previous version of the resource. But update status 369 // and error. 370 if uErr.Err != nil { 371 if a.metricsReporter != nil { 372 a.metricsReporter.ReportMetric(&metrics.ResourceUpdateInvalid{ 373 ServerURI: serverConfig.ServerIdentifier.ServerURI, ResourceType: rType.TypeName, 374 }) 375 } 376 state.md.ErrState = md.ErrState 377 state.md.Status = md.Status 378 for watcher := range state.watchers { 379 watcher := watcher 380 err := uErr.Err 381 watcherCnt.Add(1) 382 if state.cache == nil { 383 funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceError(err, done) }) 384 } else { 385 funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) 386 } 387 } 388 continue 389 } 390 391 if a.metricsReporter != nil { 392 a.metricsReporter.ReportMetric(&metrics.ResourceUpdateValid{ 393 ServerURI: serverConfig.ServerIdentifier.ServerURI, ResourceType: rType.TypeName, 394 }) 395 } 396 397 if state.deletionIgnored { 398 state.deletionIgnored = false 399 a.logger.Infof("A valid update was received for resource %q of type %q after previously ignoring a deletion", name, rType.TypeName) 400 } 401 // Notify watchers if any of these conditions are met: 402 // - this is the first update for this resource 403 // - this update is different from the one currently cached 404 // - the previous update for this resource was NACKed, but the update 405 // before that was the same as this update. 406 if state.cache == nil || !state.cache.Equal(uErr.Resource) || state.md.ErrState != nil { 407 // Update the resource cache. 408 if a.logger.V(2) { 409 a.logger.Infof("Resource type %q with name %q added to cache", rType.TypeName, name) 410 } 411 state.cache = uErr.Resource 412 413 for watcher := range state.watchers { 414 watcher := watcher 415 resource := uErr.Resource 416 watcherCnt.Add(1) 417 funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceChanged(resource, done) }) 418 } 419 } 420 421 // Set status to ACK, and clear error state. The metadata might be a 422 // NACK metadata because some other resources in the same response 423 // are invalid. 424 state.md = md 425 state.md.ErrState = nil 426 state.md.Status = xdsresource.ServiceStatusACKed 427 if md.ErrState != nil { 428 state.md.Version = md.ErrState.Version 429 } 430 } 431 432 // If this resource type requires that all resources be present in every 433 // SotW response from the server, a response that does not include a 434 // previously seen resource will be interpreted as a deletion of that 435 // resource unless ignore_resource_deletion option was set in the server 436 // config. 437 if !rType.AllResourcesRequiredInSotW { 438 return 439 } 440 for name, state := range resourceStates { 441 if state.cache == nil { 442 // If the resource state does not contain a cached update, which can 443 // happen when: 444 // - resource was newly requested but has not yet been received, or, 445 // - resource was removed as part of a previous update, 446 // we don't want to generate an error for the watchers. 447 // 448 // For the first of the above two conditions, this ADS response may 449 // be in reaction to an earlier request that did not yet request the 450 // new resource, so its absence from the response does not 451 // necessarily indicate that the resource does not exist. For that 452 // case, we rely on the request timeout instead. 453 // 454 // For the second of the above two conditions, we already generated 455 // an error when we received the first response which removed this 456 // resource. So, there is no need to generate another one. 457 continue 458 } 459 if _, ok := updates[name]; ok { 460 // If the resource was present in the response, move on. 461 continue 462 } 463 if state.md.Status == xdsresource.ServiceStatusNotExist { 464 // The metadata status is set to "ServiceStatusNotExist" if a 465 // previous update deleted this resource, in which case we do not 466 // want to repeatedly call the watch callbacks with a 467 // "resource-not-found" error. 468 continue 469 } 470 if serverConfig.IgnoreResourceDeletion { 471 // Per A53, resource deletions are ignored if the 472 // `ignore_resource_deletion` server feature is enabled through the 473 // xDS client configuration. If the resource deletion is to be 474 // ignored, the resource is not removed from the cache and the 475 // corresponding OnResourceDoesNotExist() callback is not invoked on 476 // the watchers. 477 if !state.deletionIgnored { 478 state.deletionIgnored = true 479 a.logger.Warningf("Ignoring resource deletion for resource %q of type %q", name, rType.TypeName) 480 } 481 continue 482 } 483 484 // If we get here, it means that the resource exists in cache, but not 485 // in the new update. Delete the resource from cache, and send a 486 // resource not found error to indicate that the resource has been 487 // removed. Metadata for the resource is still maintained, as this is 488 // required by CSDS. 489 state.cache = nil 490 state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} 491 for watcher := range state.watchers { 492 watcher := watcher 493 watcherCnt.Add(1) 494 funcsToSchedule = append(funcsToSchedule, func(context.Context) { 495 watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName), done) 496 }) 497 } 498 } 499 } 500 501 // adsResourceDoesNotExist is called by the xDS client implementation (on all 502 // interested authorities) to notify the authority that a subscribed resource 503 // does not exist. 504 func (a *authority) adsResourceDoesNotExist(rType ResourceType, resourceName string) { 505 a.xdsClientSerializer.TrySchedule(func(context.Context) { 506 a.handleADSResourceDoesNotExist(rType, resourceName) 507 }) 508 } 509 510 // handleADSResourceDoesNotExist is called when a subscribed resource does not 511 // exist. It removes the resource from the cache, updates the metadata status 512 // to ServiceStatusNotExist, and notifies all watchers that the resource does 513 // not exist. 514 func (a *authority) handleADSResourceDoesNotExist(rType ResourceType, resourceName string) { 515 if a.logger.V(2) { 516 a.logger.Infof("Watch for resource %q of type %s timed out", resourceName, rType.TypeName) 517 } 518 519 resourceStates := a.resources[rType] 520 if resourceStates == nil { 521 if a.logger.V(2) { 522 a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName) 523 } 524 return 525 } 526 state, ok := resourceStates[resourceName] 527 if !ok { 528 if a.logger.V(2) { 529 a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName) 530 } 531 return 532 } 533 534 state.cache = nil 535 state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} 536 for watcher := range state.watchers { 537 watcher := watcher 538 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { 539 watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {}) 540 }) 541 } 542 } 543 544 // handleRevertingToPrimaryOnUpdate is called when a resource update is received 545 // from the xDS client. 546 // 547 // If the update is from the currently active server, nothing is done. Else, all 548 // lower priority servers are closed and the active server is reverted to the 549 // highest priority server that sent the update. 550 // 551 // This method is only executed in the context of a serializer callback. 552 func (a *authority) handleRevertingToPrimaryOnUpdate(serverConfig *ServerConfig) { 553 if a.activeXDSChannel != nil && isServerConfigEqual(serverConfig, a.activeXDSChannel.serverConfig) { 554 // If the resource update is from the current active server, nothing 555 // needs to be done from fallback point of view. 556 return 557 } 558 559 if a.logger.V(2) { 560 a.logger.Infof("Received update from non-active server %q", serverConfig) 561 } 562 563 // If the resource update is not from the current active server, it means 564 // that we have received an update from a higher priority server and we need 565 // to revert back to it. This method guarantees that when an update is 566 // received from a server, all lower priority servers are closed. 567 serverIdx := a.serverIndexForConfig(serverConfig) 568 a.activeXDSChannel = a.xdsChannelConfigs[serverIdx] 569 570 // Close all lower priority channels. 571 // 572 // But before closing any channel, we need to unsubscribe from any resources 573 // that were subscribed to on this channel. Resources could be subscribed to 574 // from multiple channels as we fallback to lower priority servers. But when 575 // a higher priority one comes back up, we need to unsubscribe from all 576 // lower priority ones before releasing the reference to them. 577 for i := serverIdx + 1; i < len(a.xdsChannelConfigs); i++ { 578 cfg := a.xdsChannelConfigs[i] 579 580 for rType, rState := range a.resources { 581 for resourceName, state := range rState { 582 for xcc := range state.xdsChannelConfigs { 583 if xcc != cfg { 584 continue 585 } 586 // If the current resource is subscribed to on this channel, 587 // unsubscribe, and remove the channel from the list of 588 // channels that this resource is subscribed to. 589 xcc.channel.unsubscribe(rType, resourceName) 590 delete(state.xdsChannelConfigs, xcc) 591 } 592 } 593 } 594 595 // Release the reference to the channel. 596 if cfg.cleanup != nil { 597 if a.logger.V(2) { 598 a.logger.Infof("Closing lower priority server %q", cfg.serverConfig) 599 } 600 cfg.cleanup() 601 cfg.cleanup = nil 602 } 603 cfg.channel = nil 604 } 605 } 606 607 // watchResource registers a new watcher for the specified resource type and 608 // name. It returns a function that can be called to cancel the watch. 609 // 610 // If this is the first watch for any resource on this authority, an xdsChannel 611 // to the first management server (from the list of server configurations) will 612 // be created. 613 // 614 // If this is the first watch for the given resource name, it will subscribe to 615 // the resource with the xdsChannel. If a cached copy of the resource exists, it 616 // will immediately notify the new watcher. When the last watcher for a resource 617 // is removed, it will unsubscribe the resource from the xdsChannel. 618 func (a *authority) watchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() { 619 cleanup := func() {} 620 done := make(chan struct{}) 621 622 a.xdsClientSerializer.ScheduleOr(func(context.Context) { 623 defer close(done) 624 625 if a.logger.V(2) { 626 a.logger.Infof("New watch for type %q, resource name %q", rType.TypeName, resourceName) 627 } 628 629 xdsChannel, err := a.xdsChannelToUse() 630 if err != nil { 631 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) 632 return 633 } 634 635 // Lookup the entry for the resource type in the top-level map. If there is 636 // no entry for this resource type, create one. 637 resources := a.resources[rType] 638 if resources == nil { 639 resources = make(map[string]*resourceState) 640 a.resources[rType] = resources 641 } 642 643 // Lookup the resource state for the particular resource name that the watch 644 // is being registered for. If this is the first watch for this resource 645 // name, request it from the management server. 646 state := resources[resourceName] 647 if state == nil { 648 if a.logger.V(2) { 649 a.logger.Infof("First watch for type %q, resource name %q", rType.TypeName, resourceName) 650 } 651 state = &resourceState{ 652 watchers: make(map[ResourceWatcher]bool), 653 md: xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusRequested}, 654 xdsChannelConfigs: map[*xdsChannelWithConfig]bool{xdsChannel: true}, 655 } 656 resources[resourceName] = state 657 xdsChannel.channel.subscribe(rType, resourceName) 658 } 659 // Always add the new watcher to the set of watchers. 660 state.watchers[watcher] = true 661 662 // If we have a cached copy of the resource, notify the new watcher 663 // immediately. 664 if state.cache != nil { 665 if a.logger.V(2) { 666 a.logger.Infof("Resource type %q with resource name %q found in cache: %v", rType.TypeName, resourceName, state.cache) 667 } 668 // state can only be accessed in the context of an 669 // xdsClientSerializer callback. Hence making a copy of the cached 670 // resource here for watchCallbackSerializer. 671 resource := state.cache 672 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceChanged(resource, func() {}) }) 673 } 674 // If last update was NACK'd, notify the new watcher of error 675 // immediately as well. 676 if state.md.Status == xdsresource.ServiceStatusNACKed { 677 if a.logger.V(2) { 678 a.logger.Infof("Resource type %q with resource name %q was NACKed", rType.TypeName, resourceName) 679 } 680 // state can only be accessed in the context of an 681 // xdsClientSerializer callback. Hence making a copy of the error 682 // here for watchCallbackSerializer. 683 err := state.md.ErrState.Err 684 if state.cache == nil { 685 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) 686 } else { 687 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) 688 } 689 } 690 // If the metadata field is updated to indicate that the management 691 // server does not have this resource, notify the new watcher. 692 if state.md.Status == xdsresource.ServiceStatusNotExist { 693 a.watcherCallbackSerializer.TrySchedule(func(context.Context) { 694 watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {}) 695 }) 696 } 697 cleanup = a.unwatchResource(rType, resourceName, watcher) 698 }, func() { 699 if a.logger.V(2) { 700 a.logger.Infof("Failed to schedule a watch for type %q, resource name %q, because the xDS client is closed", rType.TypeName, resourceName) 701 } 702 close(done) 703 }) 704 <-done 705 return cleanup 706 } 707 708 func (a *authority) unwatchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() { 709 return sync.OnceFunc(func() { 710 done := make(chan struct{}) 711 a.xdsClientSerializer.ScheduleOr(func(context.Context) { 712 defer close(done) 713 714 if a.logger.V(2) { 715 a.logger.Infof("Canceling a watch for type %q, resource name %q", rType.TypeName, resourceName) 716 } 717 718 // Lookup the resource type from the resource cache. The entry is 719 // guaranteed to be present, since *we* were the ones who added it in 720 // there when the watch was registered. 721 resources := a.resources[rType] 722 state := resources[resourceName] 723 724 // Delete this particular watcher from the list of watchers, so that its 725 // callback will not be invoked in the future. 726 delete(state.watchers, watcher) 727 if len(state.watchers) > 0 { 728 if a.logger.V(2) { 729 a.logger.Infof("Other watchers exist for type %q, resource name %q", rType.TypeName, resourceName) 730 } 731 return 732 } 733 734 // There are no more watchers for this resource. Unsubscribe this 735 // resource from all channels where it was subscribed to and delete 736 // the state associated with it. 737 if a.logger.V(2) { 738 a.logger.Infof("Removing last watch for resource name %q", resourceName) 739 } 740 for xcc := range state.xdsChannelConfigs { 741 xcc.channel.unsubscribe(rType, resourceName) 742 } 743 delete(resources, resourceName) 744 745 // If there are no more watchers for this resource type, delete the 746 // resource type from the top-level map. 747 if len(resources) == 0 { 748 if a.logger.V(2) { 749 a.logger.Infof("Removing last watch for resource type %q", rType.TypeName) 750 } 751 delete(a.resources, rType) 752 } 753 // If there are no more watchers for any resource type, release the 754 // reference to the xdsChannels. 755 if len(a.resources) == 0 { 756 if a.logger.V(2) { 757 a.logger.Infof("Removing last watch for for any resource type, releasing reference to the xdsChannel") 758 } 759 a.closeXDSChannels() 760 } 761 }, func() { close(done) }) 762 <-done 763 }) 764 } 765 766 // xdsChannelToUse returns the xdsChannel to use for communicating with the 767 // management server. If an active channel is available, it returns that. 768 // Otherwise, it creates a new channel using the first server configuration in 769 // the list of configurations, and returns that. 770 // 771 // A non-nil error is returned if the channel creation fails. 772 // 773 // Only executed in the context of a serializer callback. 774 func (a *authority) xdsChannelToUse() (*xdsChannelWithConfig, error) { 775 if a.activeXDSChannel != nil { 776 return a.activeXDSChannel, nil 777 } 778 779 sc := a.xdsChannelConfigs[0].serverConfig 780 xc, cleanup, err := a.getChannelForADS(sc, a) 781 if err != nil { 782 return nil, err 783 } 784 a.xdsChannelConfigs[0].channel = xc 785 a.xdsChannelConfigs[0].cleanup = cleanup 786 a.activeXDSChannel = a.xdsChannelConfigs[0] 787 return a.activeXDSChannel, nil 788 } 789 790 // closeXDSChannels closes all the xDS channels associated with this authority, 791 // when there are no more watchers for any resource type. 792 // 793 // Only executed in the context of a serializer callback. 794 func (a *authority) closeXDSChannels() { 795 for _, xcc := range a.xdsChannelConfigs { 796 if xcc.cleanup != nil { 797 xcc.cleanup() 798 xcc.cleanup = nil 799 } 800 xcc.channel = nil 801 } 802 a.activeXDSChannel = nil 803 } 804 805 // watcherExistsForUncachedResource returns true if there is at least one 806 // watcher for a resource that has not yet been cached. 807 // 808 // Only executed in the context of a serializer callback. 809 func (a *authority) watcherExistsForUncachedResource() bool { 810 for _, resourceStates := range a.resources { 811 for _, state := range resourceStates { 812 if state.md.Status == xdsresource.ServiceStatusRequested { 813 return true 814 } 815 } 816 } 817 return false 818 } 819 820 // dumpResources returns a dump of the resource configuration cached by this 821 // authority, for CSDS purposes. 822 func (a *authority) dumpResources() []*v3statuspb.ClientConfig_GenericXdsConfig { 823 var ret []*v3statuspb.ClientConfig_GenericXdsConfig 824 done := make(chan struct{}) 825 826 a.xdsClientSerializer.ScheduleOr(func(context.Context) { 827 defer close(done) 828 ret = a.resourceConfig() 829 }, func() { close(done) }) 830 <-done 831 return ret 832 } 833 834 // resourceConfig returns a slice of GenericXdsConfig objects representing the 835 // current state of all resources managed by this authority. This is used for 836 // reporting the current state of the xDS client. 837 // 838 // Only executed in the context of a serializer callback. 839 func (a *authority) resourceConfig() []*v3statuspb.ClientConfig_GenericXdsConfig { 840 var ret []*v3statuspb.ClientConfig_GenericXdsConfig 841 for rType, resourceStates := range a.resources { 842 typeURL := rType.TypeURL 843 for name, state := range resourceStates { 844 var raw *anypb.Any 845 if state.cache != nil { 846 raw = &anypb.Any{TypeUrl: typeURL, Value: state.cache.Bytes()} 847 } 848 config := &v3statuspb.ClientConfig_GenericXdsConfig{ 849 TypeUrl: typeURL, 850 Name: name, 851 VersionInfo: state.md.Version, 852 XdsConfig: raw, 853 LastUpdated: timestamppb.New(state.md.Timestamp), 854 ClientStatus: serviceStatusToProto(state.md.Status), 855 } 856 if errState := state.md.ErrState; errState != nil { 857 config.ErrorState = &v3adminpb.UpdateFailureState{ 858 LastUpdateAttempt: timestamppb.New(errState.Timestamp), 859 Details: errState.Err.Error(), 860 VersionInfo: errState.Version, 861 } 862 } 863 ret = append(ret, config) 864 } 865 } 866 return ret 867 } 868 869 func (a *authority) close() { 870 a.xdsClientSerializerClose() 871 <-a.xdsClientSerializer.Done() 872 if a.logger.V(2) { 873 a.logger.Infof("Closed") 874 } 875 } 876 877 func serviceStatusToProto(serviceStatus xdsresource.ServiceStatus) v3adminpb.ClientResourceStatus { 878 switch serviceStatus { 879 case xdsresource.ServiceStatusUnknown: 880 return v3adminpb.ClientResourceStatus_UNKNOWN 881 case xdsresource.ServiceStatusRequested: 882 return v3adminpb.ClientResourceStatus_REQUESTED 883 case xdsresource.ServiceStatusNotExist: 884 return v3adminpb.ClientResourceStatus_DOES_NOT_EXIST 885 case xdsresource.ServiceStatusACKed: 886 return v3adminpb.ClientResourceStatus_ACKED 887 case xdsresource.ServiceStatusNACKed: 888 return v3adminpb.ClientResourceStatus_NACKED 889 default: 890 return v3adminpb.ClientResourceStatus_UNKNOWN 891 } 892 }