google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/xdsclient.go (about)

     1  /*
     2   *
     3   * Copyright 2025 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package xdsclient provides an xDS (* Discovery Service) client.
    20  //
    21  // It allows applications to:
    22  //   - Create xDS client instances with in-memory configurations.
    23  //   - Register watches for named resources.
    24  //   - Receive resources via an ADS (Aggregated Discovery Service) stream.
    25  //   - Register watches for named resources (e.g. listeners, routes, or
    26  //     clusters).
    27  //
    28  // This enables applications to dynamically discover and configure resources
    29  // such as listeners, routes, clusters, and endpoints from an xDS management
    30  // server.
    31  package xdsclient
    32  
    33  import (
    34  	"context"
    35  	"errors"
    36  	"fmt"
    37  	"sync"
    38  	"sync/atomic"
    39  	"time"
    40  
    41  	"google.golang.org/grpc/internal/grpclog"
    42  	"google.golang.org/grpc/xds/internal/clients"
    43  	clientsinternal "google.golang.org/grpc/xds/internal/clients/internal"
    44  	"google.golang.org/grpc/xds/internal/clients/internal/backoff"
    45  	"google.golang.org/grpc/xds/internal/clients/internal/syncutil"
    46  	xdsclientinternal "google.golang.org/grpc/xds/internal/clients/xdsclient/internal"
    47  	"google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource"
    48  	"google.golang.org/grpc/xds/internal/clients/xdsclient/metrics"
    49  	"google.golang.org/protobuf/proto"
    50  
    51  	v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3"
    52  )
    53  
    54  const (
    55  	defaultWatchExpiryTimeout = 15 * time.Second
    56  	name                      = "xds-client"
    57  )
    58  
    59  var (
    60  	defaultExponentialBackoff = backoff.DefaultExponential.Backoff
    61  )
    62  
    63  func init() {
    64  	xdsclientinternal.WatchExpiryTimeout = defaultWatchExpiryTimeout
    65  	xdsclientinternal.StreamBackoff = defaultExponentialBackoff
    66  	xdsclientinternal.ResourceWatchStateForTesting = resourceWatchStateForTesting
    67  }
    68  
    69  // XDSClient is a client which queries a set of discovery APIs (collectively
    70  // termed as xDS) on a remote management server, to discover
    71  // various dynamic resources.
    72  type XDSClient struct {
    73  	// The following fields are initialized at creation time and are read-only
    74  	// after that, and therefore can be accessed without a mutex.
    75  	done               *syncutil.Event              // Fired when the client is closed.
    76  	topLevelAuthority  *authority                   // The top-level authority, used only for old-style names without an authority.
    77  	authorities        map[string]*authority        // Map from authority names in config to authority struct.
    78  	config             *Config                      // Complete xDS client configuration.
    79  	watchExpiryTimeout time.Duration                // Expiry timeout for ADS watch.
    80  	backoff            func(int) time.Duration      // Backoff for ADS and LRS stream failures.
    81  	transportBuilder   clients.TransportBuilder     // Builder to create transports to xDS server.
    82  	resourceTypes      map[string]ResourceType      // Registry of resource types, for parsing incoming ADS responses.
    83  	serializer         *syncutil.CallbackSerializer // Serializer for invoking resource watcher callbacks.
    84  	serializerClose    func()                       // Function to close the serializer.
    85  	logger             *grpclog.PrefixLogger
    86  	target             string
    87  	metricsReporter    clients.MetricsReporter
    88  
    89  	// The XDSClient owns a bunch of channels to individual xDS servers
    90  	// specified in the xDS client configuration. Authorities acquire references
    91  	// to these channels based on server configs within the authority config.
    92  	// The XDSClient maintains a list of interested authorities for each of
    93  	// these channels, and forwards updates from the channels to each of these
    94  	// authorities.
    95  	//
    96  	// Once all references to a channel are dropped, the channel is closed.
    97  	channelsMu        sync.Mutex
    98  	xdsActiveChannels map[ServerConfig]*channelState // Map from server config to in-use xdsChannels.
    99  }
   100  
   101  // New returns a new xDS Client configured with the provided config.
   102  func New(config Config) (*XDSClient, error) {
   103  	switch {
   104  	case config.Node.ID == "":
   105  		return nil, errors.New("xdsclient: node ID is empty")
   106  	case config.ResourceTypes == nil:
   107  		return nil, errors.New("xdsclient: resource types map is nil")
   108  	case config.TransportBuilder == nil:
   109  		return nil, errors.New("xdsclient: transport builder is nil")
   110  	case config.Authorities == nil && config.Servers == nil:
   111  		return nil, errors.New("xdsclient: no servers or authorities specified")
   112  	}
   113  
   114  	client, err := newClient(&config, name)
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  	return client, nil
   119  }
   120  
   121  // SetWatchExpiryTimeoutForTesting override the default watch expiry timeout
   122  // with provided timeout value.
   123  func (c *XDSClient) SetWatchExpiryTimeoutForTesting(watchExpiryTimeout time.Duration) {
   124  	c.watchExpiryTimeout = watchExpiryTimeout
   125  }
   126  
   127  // newClient returns a new XDSClient with the given config.
   128  func newClient(config *Config, target string) (*XDSClient, error) {
   129  	ctx, cancel := context.WithCancel(context.Background())
   130  	c := &XDSClient{
   131  		target:             target,
   132  		done:               syncutil.NewEvent(),
   133  		authorities:        make(map[string]*authority),
   134  		config:             config,
   135  		watchExpiryTimeout: xdsclientinternal.WatchExpiryTimeout,
   136  		backoff:            xdsclientinternal.StreamBackoff,
   137  		serializer:         syncutil.NewCallbackSerializer(ctx),
   138  		serializerClose:    cancel,
   139  		transportBuilder:   config.TransportBuilder,
   140  		resourceTypes:      config.ResourceTypes,
   141  		xdsActiveChannels:  make(map[ServerConfig]*channelState),
   142  		metricsReporter:    config.MetricsReporter,
   143  	}
   144  
   145  	for name, cfg := range config.Authorities {
   146  		// If server configs are specified in the authorities map, use that.
   147  		// Else, use the top-level server configs.
   148  		serverCfg := config.Servers
   149  		if len(cfg.XDSServers) >= 1 {
   150  			serverCfg = cfg.XDSServers
   151  		}
   152  		c.authorities[name] = newAuthority(authorityBuildOptions{
   153  			serverConfigs:    serverCfg,
   154  			name:             name,
   155  			serializer:       c.serializer,
   156  			getChannelForADS: c.getChannelForADS,
   157  			logPrefix:        clientPrefix(c),
   158  			target:           target,
   159  			metricsReporter:  c.metricsReporter,
   160  		})
   161  	}
   162  	c.topLevelAuthority = newAuthority(authorityBuildOptions{
   163  		serverConfigs:    config.Servers,
   164  		name:             "",
   165  		serializer:       c.serializer,
   166  		getChannelForADS: c.getChannelForADS,
   167  		logPrefix:        clientPrefix(c),
   168  		target:           target,
   169  		metricsReporter:  c.metricsReporter,
   170  	})
   171  	c.logger = prefixLogger(c)
   172  
   173  	return c, nil
   174  }
   175  
   176  // Close closes the xDS client and releases all resources.
   177  func (c *XDSClient) Close() {
   178  	if c.done.HasFired() {
   179  		return
   180  	}
   181  	c.done.Fire()
   182  
   183  	c.topLevelAuthority.close()
   184  	for _, a := range c.authorities {
   185  		a.close()
   186  	}
   187  
   188  	// Channel close cannot be invoked with the lock held, because it can race
   189  	// with stream failure happening at the same time. The latter will callback
   190  	// into the XDSClient and will attempt to grab the lock. This will result
   191  	// in a deadlock. So instead, we release the lock and wait for all active
   192  	// channels to be closed.
   193  	var channelsToClose []*xdsChannel
   194  	c.channelsMu.Lock()
   195  	for _, cs := range c.xdsActiveChannels {
   196  		channelsToClose = append(channelsToClose, cs.channel)
   197  	}
   198  	c.xdsActiveChannels = nil
   199  	c.channelsMu.Unlock()
   200  	for _, c := range channelsToClose {
   201  		c.close()
   202  	}
   203  
   204  	c.serializerClose()
   205  	<-c.serializer.Done()
   206  
   207  	c.logger.Infof("Shutdown")
   208  }
   209  
   210  // getChannelForADS returns an xdsChannel for the given server configuration.
   211  //
   212  // If an xdsChannel exists for the given server configuration, it is returned.
   213  // Else a new one is created. It also ensures that the calling authority is
   214  // added to the set of interested authorities for the returned channel.
   215  //
   216  // It returns the xdsChannel and a function to release the calling authority's
   217  // reference on the channel. The caller must call the cancel function when it is
   218  // no longer interested in this channel.
   219  //
   220  // A non-nil error is returned if an xdsChannel was not created.
   221  func (c *XDSClient) getChannelForADS(serverConfig *ServerConfig, callingAuthority *authority) (*xdsChannel, func(), error) {
   222  	if c.done.HasFired() {
   223  		return nil, nil, errors.New("xds: the xDS client is closed")
   224  	}
   225  
   226  	initLocked := func(s *channelState) {
   227  		if c.logger.V(2) {
   228  			c.logger.Infof("Adding authority %q to the set of interested authorities for channel [%p]", callingAuthority.name, s.channel)
   229  		}
   230  		s.interestedAuthorities[callingAuthority] = true
   231  	}
   232  	deInitLocked := func(s *channelState) {
   233  		if c.logger.V(2) {
   234  			c.logger.Infof("Removing authority %q from the set of interested authorities for channel [%p]", callingAuthority.name, s.channel)
   235  		}
   236  		delete(s.interestedAuthorities, callingAuthority)
   237  	}
   238  
   239  	return c.getOrCreateChannel(serverConfig, initLocked, deInitLocked)
   240  }
   241  
   242  // getOrCreateChannel returns an xdsChannel for the given server configuration.
   243  //
   244  // If an active xdsChannel exists for the given server configuration, it is
   245  // returned. If an idle xdsChannel exists for the given server configuration, it
   246  // is revived from the idle cache and returned. Else a new one is created.
   247  //
   248  // The initLocked function runs some initialization logic before the channel is
   249  // returned. This includes adding the calling authority to the set of interested
   250  // authorities for the channel or incrementing the count of the number of LRS
   251  // calls on the channel.
   252  //
   253  // The deInitLocked function runs some cleanup logic when the returned cleanup
   254  // function is called. This involves removing the calling authority from the set
   255  // of interested authorities for the channel or decrementing the count of the
   256  // number of LRS calls on the channel.
   257  //
   258  // Both initLocked and deInitLocked are called with the c.channelsMu held.
   259  //
   260  // Returns the xdsChannel and a cleanup function to be invoked when the channel
   261  // is no longer required. A non-nil error is returned if an xdsChannel was not
   262  // created.
   263  func (c *XDSClient) getOrCreateChannel(serverConfig *ServerConfig, initLocked, deInitLocked func(*channelState)) (*xdsChannel, func(), error) {
   264  	c.channelsMu.Lock()
   265  	defer c.channelsMu.Unlock()
   266  
   267  	if c.logger.V(2) {
   268  		c.logger.Infof("Received request for a reference to an xdsChannel for server config %q", serverConfig)
   269  	}
   270  
   271  	// Use an existing channel, if one exists for this server config.
   272  	if st, ok := c.xdsActiveChannels[*serverConfig]; ok {
   273  		if c.logger.V(2) {
   274  			c.logger.Infof("Reusing an existing xdsChannel for server config %q", serverConfig)
   275  		}
   276  		initLocked(st)
   277  		return st.channel, c.releaseChannel(serverConfig, st, deInitLocked), nil
   278  	}
   279  
   280  	if c.logger.V(2) {
   281  		c.logger.Infof("Creating a new xdsChannel for server config %q", serverConfig)
   282  	}
   283  
   284  	// Create a new transport and create a new xdsChannel, and add it to the
   285  	// map of xdsChannels.
   286  	tr, err := c.transportBuilder.Build(serverConfig.ServerIdentifier)
   287  	if err != nil {
   288  		return nil, func() {}, fmt.Errorf("xds: failed to create transport for server config %v: %v", serverConfig, err)
   289  	}
   290  	state := &channelState{
   291  		parent:                c,
   292  		serverConfig:          serverConfig,
   293  		interestedAuthorities: make(map[*authority]bool),
   294  	}
   295  	channel, err := newXDSChannel(xdsChannelOpts{
   296  		transport:          tr,
   297  		serverConfig:       serverConfig,
   298  		clientConfig:       c.config,
   299  		eventHandler:       state,
   300  		backoff:            c.backoff,
   301  		watchExpiryTimeout: c.watchExpiryTimeout,
   302  		logPrefix:          clientPrefix(c),
   303  	})
   304  	if err != nil {
   305  		return nil, func() {}, fmt.Errorf("xds: failed to create a new channel for server config %v: %v", serverConfig, err)
   306  	}
   307  	state.channel = channel
   308  	c.xdsActiveChannels[*serverConfig] = state
   309  	initLocked(state)
   310  	return state.channel, c.releaseChannel(serverConfig, state, deInitLocked), nil
   311  }
   312  
   313  // releaseChannel is a function that is called when a reference to an xdsChannel
   314  // needs to be released. It handles closing channels with no active references.
   315  //
   316  // The function takes the following parameters:
   317  // - serverConfig: the server configuration for the xdsChannel
   318  // - state: the state of the xdsChannel
   319  // - deInitLocked: a function that performs any necessary cleanup for the xdsChannel
   320  //
   321  // The function returns another function that can be called to release the
   322  // reference to the xdsChannel. This returned function is idempotent, meaning
   323  // it can be called multiple times without any additional effect.
   324  func (c *XDSClient) releaseChannel(serverConfig *ServerConfig, state *channelState, deInitLocked func(*channelState)) func() {
   325  	return sync.OnceFunc(func() {
   326  		c.channelsMu.Lock()
   327  
   328  		if c.logger.V(2) {
   329  			c.logger.Infof("Received request to release a reference to an xdsChannel for server config %q", serverConfig)
   330  		}
   331  		deInitLocked(state)
   332  
   333  		// The channel has active users. Do nothing and return.
   334  		if len(state.interestedAuthorities) != 0 {
   335  			if c.logger.V(2) {
   336  				c.logger.Infof("xdsChannel %p has other active references", state.channel)
   337  			}
   338  			c.channelsMu.Unlock()
   339  			return
   340  		}
   341  
   342  		delete(c.xdsActiveChannels, *serverConfig)
   343  		if c.logger.V(2) {
   344  			c.logger.Infof("Closing xdsChannel [%p] for server config %s", state.channel, serverConfig)
   345  		}
   346  		channelToClose := state.channel
   347  		c.channelsMu.Unlock()
   348  
   349  		channelToClose.close()
   350  	})
   351  }
   352  
   353  // DumpResources returns the status and contents of all xDS resources being
   354  // watched by the xDS client.
   355  func (c *XDSClient) DumpResources() ([]byte, error) {
   356  	retCfg := c.topLevelAuthority.dumpResources()
   357  	for _, a := range c.authorities {
   358  		retCfg = append(retCfg, a.dumpResources()...)
   359  	}
   360  
   361  	nodeProto := clientsinternal.NodeProto(c.config.Node)
   362  	nodeProto.ClientFeatures = []string{clientFeatureNoOverprovisioning, clientFeatureResourceWrapper}
   363  	resp := &v3statuspb.ClientStatusResponse{}
   364  	resp.Config = append(resp.Config, &v3statuspb.ClientConfig{
   365  		Node:              nodeProto,
   366  		GenericXdsConfigs: retCfg,
   367  	})
   368  	return proto.Marshal(resp)
   369  }
   370  
   371  // channelState represents the state of an xDS channel. It tracks the number of
   372  // LRS references, the authorities interested in the channel, and the server
   373  // configuration used for the channel.
   374  //
   375  // It receives callbacks for events on the underlying ADS stream and invokes
   376  // corresponding callbacks on interested authorities.
   377  type channelState struct {
   378  	parent       *XDSClient
   379  	serverConfig *ServerConfig
   380  
   381  	// Access to the following fields should be protected by the parent's
   382  	// channelsMu.
   383  	channel               *xdsChannel
   384  	interestedAuthorities map[*authority]bool
   385  }
   386  
   387  func (cs *channelState) adsStreamFailure(err error) {
   388  	if cs.parent.done.HasFired() {
   389  		return
   390  	}
   391  
   392  	if xdsresource.ErrType(err) != xdsresource.ErrTypeStreamFailedAfterRecv && cs.parent.metricsReporter != nil {
   393  		cs.parent.metricsReporter.ReportMetric(&metrics.ServerFailure{
   394  			ServerURI: cs.serverConfig.ServerIdentifier.ServerURI,
   395  		})
   396  	}
   397  
   398  	cs.parent.channelsMu.Lock()
   399  	defer cs.parent.channelsMu.Unlock()
   400  	for authority := range cs.interestedAuthorities {
   401  		authority.adsStreamFailure(cs.serverConfig, err)
   402  	}
   403  }
   404  
   405  func (cs *channelState) adsResourceUpdate(typ ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) {
   406  	if cs.parent.done.HasFired() {
   407  		return
   408  	}
   409  
   410  	cs.parent.channelsMu.Lock()
   411  	defer cs.parent.channelsMu.Unlock()
   412  
   413  	if len(cs.interestedAuthorities) == 0 {
   414  		onDone()
   415  		return
   416  	}
   417  
   418  	authorityCnt := new(atomic.Int64)
   419  	authorityCnt.Add(int64(len(cs.interestedAuthorities)))
   420  	done := func() {
   421  		if authorityCnt.Add(-1) == 0 {
   422  			onDone()
   423  		}
   424  	}
   425  	for authority := range cs.interestedAuthorities {
   426  		authority.adsResourceUpdate(cs.serverConfig, typ, updates, md, done)
   427  	}
   428  }
   429  
   430  func (cs *channelState) adsResourceDoesNotExist(typ ResourceType, resourceName string) {
   431  	if cs.parent.done.HasFired() {
   432  		return
   433  	}
   434  
   435  	cs.parent.channelsMu.Lock()
   436  	defer cs.parent.channelsMu.Unlock()
   437  	for authority := range cs.interestedAuthorities {
   438  		authority.adsResourceDoesNotExist(typ, resourceName)
   439  	}
   440  }
   441  
   442  func resourceWatchStateForTesting(c *XDSClient, rType ResourceType, resourceName string) (xdsresource.ResourceWatchState, error) {
   443  	c.channelsMu.Lock()
   444  	defer c.channelsMu.Unlock()
   445  
   446  	for _, state := range c.xdsActiveChannels {
   447  		if st, err := state.channel.ads.adsResourceWatchStateForTesting(rType, resourceName); err == nil {
   448  			return st, nil
   449  		}
   450  	}
   451  	return xdsresource.ResourceWatchState{}, fmt.Errorf("unable to find watch state for resource type %q and name %q", rType.TypeName, resourceName)
   452  }