google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/authority.go (about)

     1  /*
     2   *
     3   * Copyright 2025 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xdsclient
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"sync"
    25  	"sync/atomic"
    26  
    27  	"google.golang.org/grpc/grpclog"
    28  	igrpclog "google.golang.org/grpc/internal/grpclog"
    29  	"google.golang.org/grpc/xds/internal/clients"
    30  	"google.golang.org/grpc/xds/internal/clients/internal/syncutil"
    31  	"google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource"
    32  	"google.golang.org/grpc/xds/internal/clients/xdsclient/metrics"
    33  	"google.golang.org/protobuf/types/known/anypb"
    34  	"google.golang.org/protobuf/types/known/timestamppb"
    35  
    36  	v3adminpb "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
    37  	v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3"
    38  )
    39  
    40  type resourceState struct {
    41  	watchers          map[ResourceWatcher]bool       // Set of watchers for this resource.
    42  	cache             ResourceData                   // Most recent ACKed update for this resource.
    43  	md                xdsresource.UpdateMetadata     // Metadata for the most recent update.
    44  	deletionIgnored   bool                           // True, if resource deletion was ignored for a prior update.
    45  	xdsChannelConfigs map[*xdsChannelWithConfig]bool // Set of xdsChannels where this resource is subscribed.
    46  }
    47  
    48  // xdsChannelForADS is used to acquire a reference to an xdsChannel. This
    49  // functionality is provided by the xdsClient.
    50  //
    51  // The arguments to the function are as follows:
    52  //   - the server config for the xdsChannel
    53  //   - the calling authority on which a set of callbacks are invoked by the
    54  //     xdsChannel on ADS stream events
    55  //
    56  // Returns a reference to the xdsChannel and a function to release the same. A
    57  // non-nil error is returned if the channel creation fails and the first two
    58  // return values are meaningless in this case.
    59  type xdsChannelForADS func(*ServerConfig, *authority) (*xdsChannel, func(), error)
    60  
    61  // xdsChannelWithConfig is a struct that holds an xdsChannel and its associated
    62  // ServerConfig, along with a cleanup function to release the xdsChannel.
    63  type xdsChannelWithConfig struct {
    64  	channel      *xdsChannel
    65  	serverConfig *ServerConfig
    66  	cleanup      func()
    67  }
    68  
    69  // authority provides the functionality required to communicate with a
    70  // management server corresponding to an authority name specified in the
    71  // xDS client configuration.
    72  //
    73  // It holds references to one or more xdsChannels, one for each server
    74  // configuration in the config, to allow fallback from a primary management
    75  // server to a secondary management server. Authorities that contain similar
    76  // server configuration entries will end up sharing the xdsChannel for that
    77  // server configuration. The xdsChannels are owned and managed by the xdsClient.
    78  //
    79  // It also contains a cache of resource state for resources requested from
    80  // management server(s). This cache contains the list of registered watchers and
    81  // the most recent resource configuration received from the management server.
    82  type authority struct {
    83  	// The following fields are initialized at creation time and are read-only
    84  	// afterwards, and therefore don't need to be protected with a mutex.
    85  	name                      string                       // Name of the authority from xDS client configuration.
    86  	watcherCallbackSerializer *syncutil.CallbackSerializer // Serializer to run watcher callbacks, owned by the xDS client implementation.
    87  	getChannelForADS          xdsChannelForADS             // Function to get an xdsChannel for ADS, provided by the xDS client implementation.
    88  	xdsClientSerializer       *syncutil.CallbackSerializer // Serializer to run call ins from the xDS client, owned by this authority.
    89  	xdsClientSerializerClose  func()                       // Function to close the above serializer.
    90  	logger                    *igrpclog.PrefixLogger       // Logger for this authority.
    91  	target                    string                       // The gRPC Channel target.
    92  	metricsReporter           clients.MetricsReporter
    93  
    94  	// The below defined fields must only be accessed in the context of the
    95  	// serializer callback, owned by this authority.
    96  
    97  	// A two level map containing the state of all the resources being watched.
    98  	//
    99  	// The first level map key is the ResourceType (Listener, Route etc). This
   100  	// allows us to have a single map for all resources instead of having per
   101  	// resource-type maps.
   102  	//
   103  	// The second level map key is the resource name, with the value being the
   104  	// actual state of the resource.
   105  	resources map[ResourceType]map[string]*resourceState
   106  
   107  	// An ordered list of xdsChannels corresponding to the list of server
   108  	// configurations specified for this authority in the config. The
   109  	// ordering specifies the order in which these channels are preferred for
   110  	// fallback.
   111  	xdsChannelConfigs []*xdsChannelWithConfig
   112  
   113  	// The current active xdsChannel. Here, active does not mean that the
   114  	// channel has a working connection to the server. It simply points to the
   115  	// channel that we are trying to work with, based on fallback logic.
   116  	activeXDSChannel *xdsChannelWithConfig
   117  }
   118  
   119  // authorityBuildOptions wraps arguments required to create a new authority.
   120  type authorityBuildOptions struct {
   121  	serverConfigs    []ServerConfig               // Server configs for the authority
   122  	name             string                       // Name of the authority
   123  	serializer       *syncutil.CallbackSerializer // Callback serializer for invoking watch callbacks
   124  	getChannelForADS xdsChannelForADS             // Function to acquire a reference to an xdsChannel
   125  	logPrefix        string                       // Prefix for logging
   126  	target           string                       // Target for the gRPC Channel that owns xDS Client/Authority
   127  	metricsReporter  clients.MetricsReporter      // Metrics reporter for the authority
   128  }
   129  
   130  // newAuthority creates a new authority instance with the provided
   131  // configuration. The authority is responsible for managing the state of
   132  // resources requested from the management server, as well as acquiring and
   133  // releasing references to channels used to communicate with the management
   134  // server.
   135  //
   136  // Note that no channels to management servers are created at this time. Instead
   137  // a channel to the first server configuration is created when the first watch
   138  // is registered, and more channels are created as needed by the fallback logic.
   139  func newAuthority(args authorityBuildOptions) *authority {
   140  	ctx, cancel := context.WithCancel(context.Background())
   141  	l := grpclog.Component("xds")
   142  	logPrefix := args.logPrefix + fmt.Sprintf("[authority %q] ", args.name)
   143  	ret := &authority{
   144  		name:                      args.name,
   145  		watcherCallbackSerializer: args.serializer,
   146  		getChannelForADS:          args.getChannelForADS,
   147  		xdsClientSerializer:       syncutil.NewCallbackSerializer(ctx),
   148  		xdsClientSerializerClose:  cancel,
   149  		logger:                    igrpclog.NewPrefixLogger(l, logPrefix),
   150  		resources:                 make(map[ResourceType]map[string]*resourceState),
   151  		target:                    args.target,
   152  		metricsReporter:           args.metricsReporter,
   153  	}
   154  
   155  	// Create an ordered list of xdsChannels with their server configs. The
   156  	// actual channel to the first server configuration is created when the
   157  	// first watch is registered, and channels to other server configurations
   158  	// are created as needed to support fallback.
   159  	for _, sc := range args.serverConfigs {
   160  		ret.xdsChannelConfigs = append(ret.xdsChannelConfigs, &xdsChannelWithConfig{serverConfig: &sc})
   161  	}
   162  	return ret
   163  }
   164  
   165  // adsStreamFailure is called to notify the authority about an ADS stream
   166  // failure on an xdsChannel to the management server identified by the provided
   167  // server config. The error is forwarded to all the resource watchers.
   168  //
   169  // This method is called by the xDS client implementation (on all interested
   170  // authorities) when a stream error is reported by an xdsChannel.
   171  //
   172  // Errors of type xdsresource.ErrTypeStreamFailedAfterRecv are ignored.
   173  func (a *authority) adsStreamFailure(serverConfig *ServerConfig, err error) {
   174  	a.xdsClientSerializer.TrySchedule(func(context.Context) {
   175  		a.handleADSStreamFailure(serverConfig, err)
   176  	})
   177  }
   178  
   179  // Handles ADS stream failure by invoking watch callbacks and triggering
   180  // fallback if the associated conditions are met.
   181  //
   182  // Only executed in the context of a serializer callback.
   183  func (a *authority) handleADSStreamFailure(serverConfig *ServerConfig, err error) {
   184  	if a.logger.V(2) {
   185  		a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err)
   186  	}
   187  
   188  	// We do not consider it an error if the ADS stream was closed after having
   189  	// received a response on the stream. This is because there are legitimate
   190  	// reasons why the server may need to close the stream during normal
   191  	// operations, such as needing to rebalance load or the underlying
   192  	// connection hitting its max connection age limit. See gRFC A57 for more
   193  	// details.
   194  	if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv {
   195  		a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err)
   196  		return
   197  	}
   198  
   199  	// Two conditions need to be met for fallback to be triggered:
   200  	// 1. There is a connectivity failure on the ADS stream, as described in
   201  	//    gRFC A57. For us, this means that the ADS stream was closed before the
   202  	//    first server response was received. We already checked that condition
   203  	//    earlier in this method.
   204  	// 2. There is at least one watcher for a resource that is not cached.
   205  	//    Cached resources include ones that
   206  	//    - have been successfully received and can be used.
   207  	//    - are considered non-existent according to xDS Protocol Specification.
   208  	if !a.watcherExistsForUncachedResource() {
   209  		if a.logger.V(2) {
   210  			a.logger.Infof("No watchers for uncached resources. Not triggering fallback")
   211  		}
   212  		// Since we are not triggering fallback, propagate the connectivity
   213  		// error to all watchers and return early.
   214  		a.propagateConnectivityErrorToAllWatchers(err)
   215  		return
   216  	}
   217  
   218  	// Attempt to fallback to servers with lower priority than the failing one.
   219  	currentServerIdx := a.serverIndexForConfig(serverConfig)
   220  	for i := currentServerIdx + 1; i < len(a.xdsChannelConfigs); i++ {
   221  		if a.fallbackToServer(a.xdsChannelConfigs[i]) {
   222  			// Since we have successfully triggered fallback, we don't have to
   223  			// notify watchers about the connectivity error.
   224  			return
   225  		}
   226  	}
   227  
   228  	// Having exhausted all available servers, we must notify watchers of the
   229  	// connectivity error - A71.
   230  	a.propagateConnectivityErrorToAllWatchers(err)
   231  }
   232  
   233  // propagateConnectivityErrorToAllWatchers propagates the given connection error
   234  // to all watchers of all resources.
   235  //
   236  // Only executed in the context of a serializer callback.
   237  func (a *authority) propagateConnectivityErrorToAllWatchers(err error) {
   238  	for _, rType := range a.resources {
   239  		for _, state := range rType {
   240  			for watcher := range state.watchers {
   241  				if state.cache == nil {
   242  					a.watcherCallbackSerializer.TrySchedule(func(context.Context) {
   243  						watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {})
   244  					})
   245  				} else {
   246  					a.watcherCallbackSerializer.TrySchedule(func(context.Context) {
   247  						watcher.AmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {})
   248  					})
   249  				}
   250  			}
   251  		}
   252  	}
   253  }
   254  
   255  // serverIndexForConfig returns the index of the xdsChannelConfig matching the
   256  // provided server config, panicking if no match is found (which indicates a
   257  // programming error).
   258  func (a *authority) serverIndexForConfig(sc *ServerConfig) int {
   259  	for i, cfg := range a.xdsChannelConfigs {
   260  		if isServerConfigEqual(sc, cfg.serverConfig) {
   261  			return i
   262  		}
   263  	}
   264  	panic(fmt.Sprintf("no server config matching %v found", sc))
   265  }
   266  
   267  // Determines the server to fallback to and triggers fallback to the same. If
   268  // required, creates an xdsChannel to that server, and re-subscribes to all
   269  // existing resources.
   270  //
   271  // Only executed in the context of a serializer callback.
   272  func (a *authority) fallbackToServer(xc *xdsChannelWithConfig) bool {
   273  	if a.logger.V(2) {
   274  		a.logger.Infof("Attempting to initiate fallback to server %q", xc.serverConfig)
   275  	}
   276  
   277  	if xc.channel != nil {
   278  		if a.logger.V(2) {
   279  			a.logger.Infof("Channel to the next server in the list %q already exists", xc.serverConfig)
   280  		}
   281  		return false
   282  	}
   283  
   284  	channel, cleanup, err := a.getChannelForADS(xc.serverConfig, a)
   285  	if err != nil {
   286  		a.logger.Errorf("Failed to create xDS channel: %v", err)
   287  		return false
   288  	}
   289  	xc.channel = channel
   290  	xc.cleanup = cleanup
   291  	a.activeXDSChannel = xc
   292  
   293  	// Subscribe to all existing resources from the new management server.
   294  	for typ, resources := range a.resources {
   295  		for name, state := range resources {
   296  			if a.logger.V(2) {
   297  				a.logger.Infof("Resubscribing to resource of type %q and name %q", typ.TypeName, name)
   298  			}
   299  			xc.channel.subscribe(typ, name)
   300  
   301  			// Add the new channel to the list of xdsChannels from which this
   302  			// resource has been requested from. Retain the cached resource and
   303  			// the set of existing watchers (and other metadata fields) in the
   304  			// resource state.
   305  			state.xdsChannelConfigs[xc] = true
   306  		}
   307  	}
   308  	return true
   309  }
   310  
   311  // adsResourceUpdate is called to notify the authority about a resource update
   312  // received on the ADS stream.
   313  //
   314  // This method is called by the xDS client implementation (on all interested
   315  // authorities) when a stream error is reported by an xdsChannel.
   316  func (a *authority) adsResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) {
   317  	a.xdsClientSerializer.TrySchedule(func(context.Context) {
   318  		a.handleADSResourceUpdate(serverConfig, rType, updates, md, onDone)
   319  	})
   320  }
   321  
   322  // handleADSResourceUpdate processes an update from the xDS client, updating the
   323  // resource cache and notifying any registered watchers of the update.
   324  //
   325  // If the update is received from a higher priority xdsChannel that was
   326  // previously down, we revert to it and close all lower priority xdsChannels.
   327  //
   328  // Once the update has been processed by all watchers, the authority is expected
   329  // to invoke the onDone callback.
   330  //
   331  // Only executed in the context of a serializer callback.
   332  func (a *authority) handleADSResourceUpdate(serverConfig *ServerConfig, rType ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) {
   333  	a.handleRevertingToPrimaryOnUpdate(serverConfig)
   334  
   335  	// We build a list of callback funcs to invoke, and invoke them at the end
   336  	// of this method instead of inline (when handling the update for a
   337  	// particular resource), because we want to make sure that all calls to
   338  	// increment watcherCnt happen before any callbacks are invoked. This will
   339  	// ensure that the onDone callback is never invoked before all watcher
   340  	// callbacks are invoked, and the watchers have processed the update.
   341  	watcherCnt := new(atomic.Int64)
   342  	done := func() {
   343  		if watcherCnt.Add(-1) == 0 {
   344  			onDone()
   345  		}
   346  	}
   347  	funcsToSchedule := []func(context.Context){}
   348  	defer func() {
   349  		if len(funcsToSchedule) == 0 {
   350  			// When there are no watchers for the resources received as part of
   351  			// this update, invoke onDone explicitly to unblock the next read on
   352  			// the ADS stream.
   353  			onDone()
   354  			return
   355  		}
   356  		for _, f := range funcsToSchedule {
   357  			a.watcherCallbackSerializer.ScheduleOr(f, onDone)
   358  		}
   359  	}()
   360  
   361  	resourceStates := a.resources[rType]
   362  	for name, uErr := range updates {
   363  		state, ok := resourceStates[name]
   364  		if !ok {
   365  			continue
   366  		}
   367  
   368  		// On error, keep previous version of the resource. But update status
   369  		// and error.
   370  		if uErr.Err != nil {
   371  			if a.metricsReporter != nil {
   372  				a.metricsReporter.ReportMetric(&metrics.ResourceUpdateInvalid{
   373  					ServerURI: serverConfig.ServerIdentifier.ServerURI, ResourceType: rType.TypeName,
   374  				})
   375  			}
   376  			state.md.ErrState = md.ErrState
   377  			state.md.Status = md.Status
   378  			for watcher := range state.watchers {
   379  				watcher := watcher
   380  				err := uErr.Err
   381  				watcherCnt.Add(1)
   382  				if state.cache == nil {
   383  					funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceError(err, done) })
   384  				} else {
   385  					funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) })
   386  				}
   387  			}
   388  			continue
   389  		}
   390  
   391  		if a.metricsReporter != nil {
   392  			a.metricsReporter.ReportMetric(&metrics.ResourceUpdateValid{
   393  				ServerURI: serverConfig.ServerIdentifier.ServerURI, ResourceType: rType.TypeName,
   394  			})
   395  		}
   396  
   397  		if state.deletionIgnored {
   398  			state.deletionIgnored = false
   399  			a.logger.Infof("A valid update was received for resource %q of type %q after previously ignoring a deletion", name, rType.TypeName)
   400  		}
   401  		// Notify watchers if any of these conditions are met:
   402  		//   - this is the first update for this resource
   403  		//   - this update is different from the one currently cached
   404  		//   - the previous update for this resource was NACKed, but the update
   405  		//     before that was the same as this update.
   406  		if state.cache == nil || !state.cache.Equal(uErr.Resource) || state.md.ErrState != nil {
   407  			// Update the resource cache.
   408  			if a.logger.V(2) {
   409  				a.logger.Infof("Resource type %q with name %q added to cache", rType.TypeName, name)
   410  			}
   411  			state.cache = uErr.Resource
   412  
   413  			for watcher := range state.watchers {
   414  				watcher := watcher
   415  				resource := uErr.Resource
   416  				watcherCnt.Add(1)
   417  				funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceChanged(resource, done) })
   418  			}
   419  		}
   420  
   421  		// Set status to ACK, and clear error state. The metadata might be a
   422  		// NACK metadata because some other resources in the same response
   423  		// are invalid.
   424  		state.md = md
   425  		state.md.ErrState = nil
   426  		state.md.Status = xdsresource.ServiceStatusACKed
   427  		if md.ErrState != nil {
   428  			state.md.Version = md.ErrState.Version
   429  		}
   430  	}
   431  
   432  	// If this resource type requires that all resources be present in every
   433  	// SotW response from the server, a response that does not include a
   434  	// previously seen resource will be interpreted as a deletion of that
   435  	// resource unless ignore_resource_deletion option was set in the server
   436  	// config.
   437  	if !rType.AllResourcesRequiredInSotW {
   438  		return
   439  	}
   440  	for name, state := range resourceStates {
   441  		if state.cache == nil {
   442  			// If the resource state does not contain a cached update, which can
   443  			// happen when:
   444  			// - resource was newly requested but has not yet been received, or,
   445  			// - resource was removed as part of a previous update,
   446  			// we don't want to generate an error for the watchers.
   447  			//
   448  			// For the first of the above two conditions, this ADS response may
   449  			// be in reaction to an earlier request that did not yet request the
   450  			// new resource, so its absence from the response does not
   451  			// necessarily indicate that the resource does not exist. For that
   452  			// case, we rely on the request timeout instead.
   453  			//
   454  			// For the second of the above two conditions, we already generated
   455  			// an error when we received the first response which removed this
   456  			// resource. So, there is no need to generate another one.
   457  			continue
   458  		}
   459  		if _, ok := updates[name]; ok {
   460  			// If the resource was present in the response, move on.
   461  			continue
   462  		}
   463  		if state.md.Status == xdsresource.ServiceStatusNotExist {
   464  			// The metadata status is set to "ServiceStatusNotExist" if a
   465  			// previous update deleted this resource, in which case we do not
   466  			// want to repeatedly call the watch callbacks with a
   467  			// "resource-not-found" error.
   468  			continue
   469  		}
   470  		if serverConfig.IgnoreResourceDeletion {
   471  			// Per A53, resource deletions are ignored if the
   472  			// `ignore_resource_deletion` server feature is enabled through the
   473  			// xDS client configuration. If the resource deletion is to be
   474  			// ignored, the resource is not removed from the cache and the
   475  			// corresponding OnResourceDoesNotExist() callback is not invoked on
   476  			// the watchers.
   477  			if !state.deletionIgnored {
   478  				state.deletionIgnored = true
   479  				a.logger.Warningf("Ignoring resource deletion for resource %q of type %q", name, rType.TypeName)
   480  			}
   481  			continue
   482  		}
   483  
   484  		// If we get here, it means that the resource exists in cache, but not
   485  		// in the new update. Delete the resource from cache, and send a
   486  		// resource not found error to indicate that the resource has been
   487  		// removed. Metadata for the resource is still maintained, as this is
   488  		// required by CSDS.
   489  		state.cache = nil
   490  		state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist}
   491  		for watcher := range state.watchers {
   492  			watcher := watcher
   493  			watcherCnt.Add(1)
   494  			funcsToSchedule = append(funcsToSchedule, func(context.Context) {
   495  				watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName), done)
   496  			})
   497  		}
   498  	}
   499  }
   500  
   501  // adsResourceDoesNotExist is called by the xDS client implementation (on all
   502  // interested authorities) to notify the authority that a subscribed resource
   503  // does not exist.
   504  func (a *authority) adsResourceDoesNotExist(rType ResourceType, resourceName string) {
   505  	a.xdsClientSerializer.TrySchedule(func(context.Context) {
   506  		a.handleADSResourceDoesNotExist(rType, resourceName)
   507  	})
   508  }
   509  
   510  // handleADSResourceDoesNotExist is called when a subscribed resource does not
   511  // exist. It removes the resource from the cache, updates the metadata status
   512  // to ServiceStatusNotExist, and notifies all watchers that the resource does
   513  // not exist.
   514  func (a *authority) handleADSResourceDoesNotExist(rType ResourceType, resourceName string) {
   515  	if a.logger.V(2) {
   516  		a.logger.Infof("Watch for resource %q of type %s timed out", resourceName, rType.TypeName)
   517  	}
   518  
   519  	resourceStates := a.resources[rType]
   520  	if resourceStates == nil {
   521  		if a.logger.V(2) {
   522  			a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName)
   523  		}
   524  		return
   525  	}
   526  	state, ok := resourceStates[resourceName]
   527  	if !ok {
   528  		if a.logger.V(2) {
   529  			a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName)
   530  		}
   531  		return
   532  	}
   533  
   534  	state.cache = nil
   535  	state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist}
   536  	for watcher := range state.watchers {
   537  		watcher := watcher
   538  		a.watcherCallbackSerializer.TrySchedule(func(context.Context) {
   539  			watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {})
   540  		})
   541  	}
   542  }
   543  
   544  // handleRevertingToPrimaryOnUpdate is called when a resource update is received
   545  // from the xDS client.
   546  //
   547  // If the update is from the currently active server, nothing is done. Else, all
   548  // lower priority servers are closed and the active server is reverted to the
   549  // highest priority server that sent the update.
   550  //
   551  // This method is only executed in the context of a serializer callback.
   552  func (a *authority) handleRevertingToPrimaryOnUpdate(serverConfig *ServerConfig) {
   553  	if a.activeXDSChannel != nil && isServerConfigEqual(serverConfig, a.activeXDSChannel.serverConfig) {
   554  		// If the resource update is from the current active server, nothing
   555  		// needs to be done from fallback point of view.
   556  		return
   557  	}
   558  
   559  	if a.logger.V(2) {
   560  		a.logger.Infof("Received update from non-active server %q", serverConfig)
   561  	}
   562  
   563  	// If the resource update is not from the current active server, it means
   564  	// that we have received an update from a higher priority server and we need
   565  	// to revert back to it. This method guarantees that when an update is
   566  	// received from a server, all lower priority servers are closed.
   567  	serverIdx := a.serverIndexForConfig(serverConfig)
   568  	a.activeXDSChannel = a.xdsChannelConfigs[serverIdx]
   569  
   570  	// Close all lower priority channels.
   571  	//
   572  	// But before closing any channel, we need to unsubscribe from any resources
   573  	// that were subscribed to on this channel. Resources could be subscribed to
   574  	// from multiple channels as we fallback to lower priority servers. But when
   575  	// a higher priority one comes back up, we need to unsubscribe from all
   576  	// lower priority ones before releasing the reference to them.
   577  	for i := serverIdx + 1; i < len(a.xdsChannelConfigs); i++ {
   578  		cfg := a.xdsChannelConfigs[i]
   579  
   580  		for rType, rState := range a.resources {
   581  			for resourceName, state := range rState {
   582  				for xcc := range state.xdsChannelConfigs {
   583  					if xcc != cfg {
   584  						continue
   585  					}
   586  					// If the current resource is subscribed to on this channel,
   587  					// unsubscribe, and remove the channel from the list of
   588  					// channels that this resource is subscribed to.
   589  					xcc.channel.unsubscribe(rType, resourceName)
   590  					delete(state.xdsChannelConfigs, xcc)
   591  				}
   592  			}
   593  		}
   594  
   595  		// Release the reference to the channel.
   596  		if cfg.cleanup != nil {
   597  			if a.logger.V(2) {
   598  				a.logger.Infof("Closing lower priority server %q", cfg.serverConfig)
   599  			}
   600  			cfg.cleanup()
   601  			cfg.cleanup = nil
   602  		}
   603  		cfg.channel = nil
   604  	}
   605  }
   606  
   607  // watchResource registers a new watcher for the specified resource type and
   608  // name. It returns a function that can be called to cancel the watch.
   609  //
   610  // If this is the first watch for any resource on this authority, an xdsChannel
   611  // to the first management server (from the list of server configurations) will
   612  // be created.
   613  //
   614  // If this is the first watch for the given resource name, it will subscribe to
   615  // the resource with the xdsChannel. If a cached copy of the resource exists, it
   616  // will immediately notify the new watcher. When the last watcher for a resource
   617  // is removed, it will unsubscribe the resource from the xdsChannel.
   618  func (a *authority) watchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() {
   619  	cleanup := func() {}
   620  	done := make(chan struct{})
   621  
   622  	a.xdsClientSerializer.ScheduleOr(func(context.Context) {
   623  		defer close(done)
   624  
   625  		if a.logger.V(2) {
   626  			a.logger.Infof("New watch for type %q, resource name %q", rType.TypeName, resourceName)
   627  		}
   628  
   629  		xdsChannel, err := a.xdsChannelToUse()
   630  		if err != nil {
   631  			a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) })
   632  			return
   633  		}
   634  
   635  		// Lookup the entry for the resource type in the top-level map. If there is
   636  		// no entry for this resource type, create one.
   637  		resources := a.resources[rType]
   638  		if resources == nil {
   639  			resources = make(map[string]*resourceState)
   640  			a.resources[rType] = resources
   641  		}
   642  
   643  		// Lookup the resource state for the particular resource name that the watch
   644  		// is being registered for. If this is the first watch for this resource
   645  		// name, request it from the management server.
   646  		state := resources[resourceName]
   647  		if state == nil {
   648  			if a.logger.V(2) {
   649  				a.logger.Infof("First watch for type %q, resource name %q", rType.TypeName, resourceName)
   650  			}
   651  			state = &resourceState{
   652  				watchers:          make(map[ResourceWatcher]bool),
   653  				md:                xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusRequested},
   654  				xdsChannelConfigs: map[*xdsChannelWithConfig]bool{xdsChannel: true},
   655  			}
   656  			resources[resourceName] = state
   657  			xdsChannel.channel.subscribe(rType, resourceName)
   658  		}
   659  		// Always add the new watcher to the set of watchers.
   660  		state.watchers[watcher] = true
   661  
   662  		// If we have a cached copy of the resource, notify the new watcher
   663  		// immediately.
   664  		if state.cache != nil {
   665  			if a.logger.V(2) {
   666  				a.logger.Infof("Resource type %q with resource name %q found in cache: %v", rType.TypeName, resourceName, state.cache)
   667  			}
   668  			// state can only be accessed in the context of an
   669  			// xdsClientSerializer callback. Hence making a copy of the cached
   670  			// resource here for watchCallbackSerializer.
   671  			resource := state.cache
   672  			a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceChanged(resource, func() {}) })
   673  		}
   674  		// If last update was NACK'd, notify the new watcher of error
   675  		// immediately as well.
   676  		if state.md.Status == xdsresource.ServiceStatusNACKed {
   677  			if a.logger.V(2) {
   678  				a.logger.Infof("Resource type %q with resource name %q was NACKed", rType.TypeName, resourceName)
   679  			}
   680  			// state can only be accessed in the context of an
   681  			// xdsClientSerializer callback. Hence making a copy of the error
   682  			// here for watchCallbackSerializer.
   683  			err := state.md.ErrState.Err
   684  			if state.cache == nil {
   685  				a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) })
   686  			} else {
   687  				a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) })
   688  			}
   689  		}
   690  		// If the metadata field is updated to indicate that the management
   691  		// server does not have this resource, notify the new watcher.
   692  		if state.md.Status == xdsresource.ServiceStatusNotExist {
   693  			a.watcherCallbackSerializer.TrySchedule(func(context.Context) {
   694  				watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", resourceName, rType.TypeName), func() {})
   695  			})
   696  		}
   697  		cleanup = a.unwatchResource(rType, resourceName, watcher)
   698  	}, func() {
   699  		if a.logger.V(2) {
   700  			a.logger.Infof("Failed to schedule a watch for type %q, resource name %q, because the xDS client is closed", rType.TypeName, resourceName)
   701  		}
   702  		close(done)
   703  	})
   704  	<-done
   705  	return cleanup
   706  }
   707  
   708  func (a *authority) unwatchResource(rType ResourceType, resourceName string, watcher ResourceWatcher) func() {
   709  	return sync.OnceFunc(func() {
   710  		done := make(chan struct{})
   711  		a.xdsClientSerializer.ScheduleOr(func(context.Context) {
   712  			defer close(done)
   713  
   714  			if a.logger.V(2) {
   715  				a.logger.Infof("Canceling a watch for type %q, resource name %q", rType.TypeName, resourceName)
   716  			}
   717  
   718  			// Lookup the resource type from the resource cache. The entry is
   719  			// guaranteed to be present, since *we* were the ones who added it in
   720  			// there when the watch was registered.
   721  			resources := a.resources[rType]
   722  			state := resources[resourceName]
   723  
   724  			// Delete this particular watcher from the list of watchers, so that its
   725  			// callback will not be invoked in the future.
   726  			delete(state.watchers, watcher)
   727  			if len(state.watchers) > 0 {
   728  				if a.logger.V(2) {
   729  					a.logger.Infof("Other watchers exist for type %q, resource name %q", rType.TypeName, resourceName)
   730  				}
   731  				return
   732  			}
   733  
   734  			// There are no more watchers for this resource. Unsubscribe this
   735  			// resource from all channels where it was subscribed to and delete
   736  			// the state associated with it.
   737  			if a.logger.V(2) {
   738  				a.logger.Infof("Removing last watch for resource name %q", resourceName)
   739  			}
   740  			for xcc := range state.xdsChannelConfigs {
   741  				xcc.channel.unsubscribe(rType, resourceName)
   742  			}
   743  			delete(resources, resourceName)
   744  
   745  			// If there are no more watchers for this resource type, delete the
   746  			// resource type from the top-level map.
   747  			if len(resources) == 0 {
   748  				if a.logger.V(2) {
   749  					a.logger.Infof("Removing last watch for resource type %q", rType.TypeName)
   750  				}
   751  				delete(a.resources, rType)
   752  			}
   753  			// If there are no more watchers for any resource type, release the
   754  			// reference to the xdsChannels.
   755  			if len(a.resources) == 0 {
   756  				if a.logger.V(2) {
   757  					a.logger.Infof("Removing last watch for for any resource type, releasing reference to the xdsChannel")
   758  				}
   759  				a.closeXDSChannels()
   760  			}
   761  		}, func() { close(done) })
   762  		<-done
   763  	})
   764  }
   765  
   766  // xdsChannelToUse returns the xdsChannel to use for communicating with the
   767  // management server. If an active channel is available, it returns that.
   768  // Otherwise, it creates a new channel using the first server configuration in
   769  // the list of configurations, and returns that.
   770  //
   771  // A non-nil error is returned if the channel creation fails.
   772  //
   773  // Only executed in the context of a serializer callback.
   774  func (a *authority) xdsChannelToUse() (*xdsChannelWithConfig, error) {
   775  	if a.activeXDSChannel != nil {
   776  		return a.activeXDSChannel, nil
   777  	}
   778  
   779  	sc := a.xdsChannelConfigs[0].serverConfig
   780  	xc, cleanup, err := a.getChannelForADS(sc, a)
   781  	if err != nil {
   782  		return nil, err
   783  	}
   784  	a.xdsChannelConfigs[0].channel = xc
   785  	a.xdsChannelConfigs[0].cleanup = cleanup
   786  	a.activeXDSChannel = a.xdsChannelConfigs[0]
   787  	return a.activeXDSChannel, nil
   788  }
   789  
   790  // closeXDSChannels closes all the xDS channels associated with this authority,
   791  // when there are no more watchers for any resource type.
   792  //
   793  // Only executed in the context of a serializer callback.
   794  func (a *authority) closeXDSChannels() {
   795  	for _, xcc := range a.xdsChannelConfigs {
   796  		if xcc.cleanup != nil {
   797  			xcc.cleanup()
   798  			xcc.cleanup = nil
   799  		}
   800  		xcc.channel = nil
   801  	}
   802  	a.activeXDSChannel = nil
   803  }
   804  
   805  // watcherExistsForUncachedResource returns true if there is at least one
   806  // watcher for a resource that has not yet been cached.
   807  //
   808  // Only executed in the context of a serializer callback.
   809  func (a *authority) watcherExistsForUncachedResource() bool {
   810  	for _, resourceStates := range a.resources {
   811  		for _, state := range resourceStates {
   812  			if state.md.Status == xdsresource.ServiceStatusRequested {
   813  				return true
   814  			}
   815  		}
   816  	}
   817  	return false
   818  }
   819  
   820  // dumpResources returns a dump of the resource configuration cached by this
   821  // authority, for CSDS purposes.
   822  func (a *authority) dumpResources() []*v3statuspb.ClientConfig_GenericXdsConfig {
   823  	var ret []*v3statuspb.ClientConfig_GenericXdsConfig
   824  	done := make(chan struct{})
   825  
   826  	a.xdsClientSerializer.ScheduleOr(func(context.Context) {
   827  		defer close(done)
   828  		ret = a.resourceConfig()
   829  	}, func() { close(done) })
   830  	<-done
   831  	return ret
   832  }
   833  
   834  // resourceConfig returns a slice of GenericXdsConfig objects representing the
   835  // current state of all resources managed by this authority. This is used for
   836  // reporting the current state of the xDS client.
   837  //
   838  // Only executed in the context of a serializer callback.
   839  func (a *authority) resourceConfig() []*v3statuspb.ClientConfig_GenericXdsConfig {
   840  	var ret []*v3statuspb.ClientConfig_GenericXdsConfig
   841  	for rType, resourceStates := range a.resources {
   842  		typeURL := rType.TypeURL
   843  		for name, state := range resourceStates {
   844  			var raw *anypb.Any
   845  			if state.cache != nil {
   846  				raw = &anypb.Any{TypeUrl: typeURL, Value: state.cache.Bytes()}
   847  			}
   848  			config := &v3statuspb.ClientConfig_GenericXdsConfig{
   849  				TypeUrl:      typeURL,
   850  				Name:         name,
   851  				VersionInfo:  state.md.Version,
   852  				XdsConfig:    raw,
   853  				LastUpdated:  timestamppb.New(state.md.Timestamp),
   854  				ClientStatus: serviceStatusToProto(state.md.Status),
   855  			}
   856  			if errState := state.md.ErrState; errState != nil {
   857  				config.ErrorState = &v3adminpb.UpdateFailureState{
   858  					LastUpdateAttempt: timestamppb.New(errState.Timestamp),
   859  					Details:           errState.Err.Error(),
   860  					VersionInfo:       errState.Version,
   861  				}
   862  			}
   863  			ret = append(ret, config)
   864  		}
   865  	}
   866  	return ret
   867  }
   868  
   869  func (a *authority) close() {
   870  	a.xdsClientSerializerClose()
   871  	<-a.xdsClientSerializer.Done()
   872  	if a.logger.V(2) {
   873  		a.logger.Infof("Closed")
   874  	}
   875  }
   876  
   877  func serviceStatusToProto(serviceStatus xdsresource.ServiceStatus) v3adminpb.ClientResourceStatus {
   878  	switch serviceStatus {
   879  	case xdsresource.ServiceStatusUnknown:
   880  		return v3adminpb.ClientResourceStatus_UNKNOWN
   881  	case xdsresource.ServiceStatusRequested:
   882  		return v3adminpb.ClientResourceStatus_REQUESTED
   883  	case xdsresource.ServiceStatusNotExist:
   884  		return v3adminpb.ClientResourceStatus_DOES_NOT_EXIST
   885  	case xdsresource.ServiceStatusACKed:
   886  		return v3adminpb.ClientResourceStatus_ACKED
   887  	case xdsresource.ServiceStatusNACKed:
   888  		return v3adminpb.ClientResourceStatus_NACKED
   889  	default:
   890  		return v3adminpb.ClientResourceStatus_UNKNOWN
   891  	}
   892  }