google.golang.org/grpc@v1.72.2/xds/internal/balancer/cdsbalancer/cdsbalancer.go (about)

     1  /*
     2   * Copyright 2019 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package cdsbalancer implements a balancer to handle CDS responses.
    18  package cdsbalancer
    19  
    20  import (
    21  	"context"
    22  	"crypto/x509"
    23  	"encoding/json"
    24  	"fmt"
    25  	"sync/atomic"
    26  	"unsafe"
    27  
    28  	"google.golang.org/grpc/balancer"
    29  	"google.golang.org/grpc/balancer/base"
    30  	"google.golang.org/grpc/connectivity"
    31  	"google.golang.org/grpc/credentials"
    32  	"google.golang.org/grpc/credentials/tls/certprovider"
    33  	"google.golang.org/grpc/internal/balancer/nop"
    34  	xdsinternal "google.golang.org/grpc/internal/credentials/xds"
    35  	"google.golang.org/grpc/internal/grpclog"
    36  	"google.golang.org/grpc/internal/grpcsync"
    37  	"google.golang.org/grpc/internal/pretty"
    38  	"google.golang.org/grpc/resolver"
    39  	"google.golang.org/grpc/serviceconfig"
    40  	"google.golang.org/grpc/xds/internal/balancer/clusterresolver"
    41  	"google.golang.org/grpc/xds/internal/xdsclient"
    42  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    43  )
    44  
    45  const (
    46  	cdsName                  = "cds_experimental"
    47  	aggregateClusterMaxDepth = 16
    48  )
    49  
    50  var (
    51  	errBalancerClosed  = fmt.Errorf("cds_experimental LB policy is closed")
    52  	errExceedsMaxDepth = fmt.Errorf("aggregate cluster graph exceeds max depth (%d)", aggregateClusterMaxDepth)
    53  
    54  	// newChildBalancer is a helper function to build a new cluster_resolver
    55  	// balancer and will be overridden in unittests.
    56  	newChildBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) {
    57  		builder := balancer.Get(clusterresolver.Name)
    58  		if builder == nil {
    59  			return nil, fmt.Errorf("xds: no balancer builder with name %v", clusterresolver.Name)
    60  		}
    61  		// We directly pass the parent clientConn to the underlying
    62  		// cluster_resolver balancer because the cdsBalancer does not deal with
    63  		// subConns.
    64  		return builder.Build(cc, opts), nil
    65  	}
    66  	buildProvider = buildProviderFunc
    67  
    68  	// x509SystemCertPoolFunc is used for mocking the system cert pool for
    69  	// tests.
    70  	x509SystemCertPoolFunc = x509.SystemCertPool
    71  )
    72  
    73  func init() {
    74  	balancer.Register(bb{})
    75  }
    76  
    77  // bb implements the balancer.Builder interface to help build a cdsBalancer.
    78  // It also implements the balancer.ConfigParser interface to help parse the
    79  // JSON service config, to be passed to the cdsBalancer.
    80  type bb struct{}
    81  
    82  // Build creates a new CDS balancer with the ClientConn.
    83  func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    84  	builder := balancer.Get(clusterresolver.Name)
    85  	if builder == nil {
    86  		// Shouldn't happen, registered through imported Cluster Resolver,
    87  		// defensive programming.
    88  		logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name)
    89  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", clusterresolver.Name))
    90  	}
    91  	parser, ok := builder.(balancer.ConfigParser)
    92  	if !ok {
    93  		// Shouldn't happen, imported Cluster Resolver builder has this method.
    94  		logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name)
    95  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name))
    96  	}
    97  
    98  	ctx, cancel := context.WithCancel(context.Background())
    99  	hi := xdsinternal.NewHandshakeInfo(nil, nil, nil, false)
   100  	xdsHIPtr := unsafe.Pointer(hi)
   101  	b := &cdsBalancer{
   102  		bOpts:             opts,
   103  		childConfigParser: parser,
   104  		serializer:        grpcsync.NewCallbackSerializer(ctx),
   105  		serializerCancel:  cancel,
   106  		xdsHIPtr:          &xdsHIPtr,
   107  		watchers:          make(map[string]*watcherState),
   108  	}
   109  	b.ccw = &ccWrapper{
   110  		ClientConn: cc,
   111  		xdsHIPtr:   b.xdsHIPtr,
   112  	}
   113  	b.logger = prefixLogger(b)
   114  	b.logger.Infof("Created")
   115  
   116  	var creds credentials.TransportCredentials
   117  	switch {
   118  	case opts.DialCreds != nil:
   119  		creds = opts.DialCreds
   120  	case opts.CredsBundle != nil:
   121  		creds = opts.CredsBundle.TransportCredentials()
   122  	}
   123  	if xc, ok := creds.(interface{ UsesXDS() bool }); ok && xc.UsesXDS() {
   124  		b.xdsCredsInUse = true
   125  	}
   126  	b.logger.Infof("xDS credentials in use: %v", b.xdsCredsInUse)
   127  	return b
   128  }
   129  
   130  // Name returns the name of balancers built by this builder.
   131  func (bb) Name() string {
   132  	return cdsName
   133  }
   134  
   135  // lbConfig represents the loadBalancingConfig section of the service config
   136  // for the cdsBalancer.
   137  type lbConfig struct {
   138  	serviceconfig.LoadBalancingConfig
   139  	ClusterName string `json:"Cluster"`
   140  }
   141  
   142  // ParseConfig parses the JSON load balancer config provided into an
   143  // internal form or returns an error if the config is invalid.
   144  func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   145  	var cfg lbConfig
   146  	if err := json.Unmarshal(c, &cfg); err != nil {
   147  		return nil, fmt.Errorf("xds: unable to unmarshal lbconfig: %s, error: %v", string(c), err)
   148  	}
   149  	return &cfg, nil
   150  }
   151  
   152  // cdsBalancer implements a CDS based LB policy. It instantiates a
   153  // cluster_resolver balancer to further resolve the serviceName received from
   154  // CDS, into localities and endpoints. Implements the balancer.Balancer
   155  // interface which is exposed to gRPC and implements the balancer.ClientConn
   156  // interface which is exposed to the cluster_resolver balancer.
   157  type cdsBalancer struct {
   158  	// The following fields are initialized at build time and are either
   159  	// read-only after that or provide their own synchronization, and therefore
   160  	// do not need to be guarded by a mutex.
   161  	ccw               *ccWrapper            // ClientConn interface passed to child LB.
   162  	bOpts             balancer.BuildOptions // BuildOptions passed to child LB.
   163  	childConfigParser balancer.ConfigParser // Config parser for cluster_resolver LB policy.
   164  	logger            *grpclog.PrefixLogger // Prefix logger for all logging.
   165  	xdsCredsInUse     bool
   166  
   167  	xdsHIPtr *unsafe.Pointer // Accessed atomically.
   168  
   169  	// The serializer and its cancel func are initialized at build time, and the
   170  	// rest of the fields here are only accessed from serializer callbacks (or
   171  	// from balancer.Balancer methods, which themselves are guaranteed to be
   172  	// mutually exclusive) and hence do not need to be guarded by a mutex.
   173  	serializer       *grpcsync.CallbackSerializer // Serializes updates from gRPC and xDS client.
   174  	serializerCancel context.CancelFunc           // Stops the above serializer.
   175  	childLB          balancer.Balancer            // Child policy, built upon resolution of the cluster graph.
   176  	xdsClient        xdsclient.XDSClient          // xDS client to watch Cluster resources.
   177  	watchers         map[string]*watcherState     // Set of watchers and associated state, keyed by cluster name.
   178  	lbCfg            *lbConfig                    // Current load balancing configuration.
   179  
   180  	// The certificate providers are cached here to that they can be closed when
   181  	// a new provider is to be created.
   182  	cachedRoot     certprovider.Provider
   183  	cachedIdentity certprovider.Provider
   184  }
   185  
   186  // handleSecurityConfig processes the security configuration received from the
   187  // management server, creates appropriate certificate provider plugins, and
   188  // updates the HandshakeInfo which is added as an address attribute in
   189  // NewSubConn() calls.
   190  //
   191  // Only executed in the context of a serializer callback.
   192  func (b *cdsBalancer) handleSecurityConfig(config *xdsresource.SecurityConfig) error {
   193  	// If xdsCredentials are not in use, i.e, the user did not want to get
   194  	// security configuration from an xDS server, we should not be acting on the
   195  	// received security config here. Doing so poses a security threat.
   196  	if !b.xdsCredsInUse {
   197  		return nil
   198  	}
   199  	var xdsHI *xdsinternal.HandshakeInfo
   200  
   201  	// Security config being nil is a valid case where the management server has
   202  	// not sent any security configuration. The xdsCredentials implementation
   203  	// handles this by delegating to its fallback credentials.
   204  	if config == nil {
   205  		// We need to explicitly set the fields to nil here since this might be
   206  		// a case of switching from a good security configuration to an empty
   207  		// one where fallback credentials are to be used.
   208  		xdsHI = xdsinternal.NewHandshakeInfo(nil, nil, nil, false)
   209  		atomic.StorePointer(b.xdsHIPtr, unsafe.Pointer(xdsHI))
   210  		return nil
   211  
   212  	}
   213  
   214  	// A root provider is required whether we are using TLS or mTLS.
   215  	cpc := b.xdsClient.BootstrapConfig().CertProviderConfigs()
   216  	var rootProvider certprovider.Provider
   217  	if config.UseSystemRootCerts {
   218  		rootProvider = systemRootCertsProvider{}
   219  	} else {
   220  		rp, err := buildProvider(cpc, config.RootInstanceName, config.RootCertName, false, true)
   221  		if err != nil {
   222  			return err
   223  		}
   224  		rootProvider = rp
   225  	}
   226  
   227  	// The identity provider is only present when using mTLS.
   228  	var identityProvider certprovider.Provider
   229  	if name, cert := config.IdentityInstanceName, config.IdentityCertName; name != "" {
   230  		var err error
   231  		identityProvider, err = buildProvider(cpc, name, cert, true, false)
   232  		if err != nil {
   233  			return err
   234  		}
   235  	}
   236  
   237  	// Close the old providers and cache the new ones.
   238  	if b.cachedRoot != nil {
   239  		b.cachedRoot.Close()
   240  	}
   241  	if b.cachedIdentity != nil {
   242  		b.cachedIdentity.Close()
   243  	}
   244  	b.cachedRoot = rootProvider
   245  	b.cachedIdentity = identityProvider
   246  	xdsHI = xdsinternal.NewHandshakeInfo(rootProvider, identityProvider, config.SubjectAltNameMatchers, false)
   247  	atomic.StorePointer(b.xdsHIPtr, unsafe.Pointer(xdsHI))
   248  	return nil
   249  }
   250  
   251  func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanceName, certName string, wantIdentity, wantRoot bool) (certprovider.Provider, error) {
   252  	cfg, ok := configs[instanceName]
   253  	if !ok {
   254  		// Defensive programming. If a resource received from the management
   255  		// server contains a certificate provider instance name that is not
   256  		// found in the bootstrap, the resource is NACKed by the xDS client.
   257  		return nil, fmt.Errorf("certificate provider instance %q not found in bootstrap file", instanceName)
   258  	}
   259  	provider, err := cfg.Build(certprovider.BuildOptions{
   260  		CertName:     certName,
   261  		WantIdentity: wantIdentity,
   262  		WantRoot:     wantRoot,
   263  	})
   264  	if err != nil {
   265  		// This error is not expected since the bootstrap process parses the
   266  		// config and makes sure that it is acceptable to the plugin. Still, it
   267  		// is possible that the plugin parses the config successfully, but its
   268  		// Build() method errors out.
   269  		return nil, fmt.Errorf("xds: failed to get security plugin instance (%+v): %v", cfg, err)
   270  	}
   271  	return provider, nil
   272  }
   273  
   274  // A convenience method to create a watcher for cluster `name`. It also
   275  // registers the watch with the xDS client, and adds the newly created watcher
   276  // to the list of watchers maintained by the LB policy.
   277  func (b *cdsBalancer) createAndAddWatcherForCluster(name string) {
   278  	w := &clusterWatcher{
   279  		name:   name,
   280  		parent: b,
   281  	}
   282  	ws := &watcherState{
   283  		watcher:     w,
   284  		cancelWatch: xdsresource.WatchCluster(b.xdsClient, name, w),
   285  	}
   286  	b.watchers[name] = ws
   287  }
   288  
   289  // UpdateClientConnState receives the serviceConfig (which contains the
   290  // clusterName to watch for in CDS) and the xdsClient object from the
   291  // xdsResolver.
   292  func (b *cdsBalancer) UpdateClientConnState(state balancer.ClientConnState) error {
   293  	if b.xdsClient == nil {
   294  		c := xdsclient.FromResolverState(state.ResolverState)
   295  		if c == nil {
   296  			b.logger.Warningf("Received balancer config with no xDS client")
   297  			return balancer.ErrBadResolverState
   298  		}
   299  		b.xdsClient = c
   300  	}
   301  	b.logger.Infof("Received balancer config update: %s", pretty.ToJSON(state.BalancerConfig))
   302  
   303  	// The errors checked here should ideally never happen because the
   304  	// ServiceConfig in this case is prepared by the xdsResolver and is not
   305  	// something that is received on the wire.
   306  	lbCfg, ok := state.BalancerConfig.(*lbConfig)
   307  	if !ok {
   308  		b.logger.Warningf("Received unexpected balancer config type: %T", state.BalancerConfig)
   309  		return balancer.ErrBadResolverState
   310  	}
   311  	if lbCfg.ClusterName == "" {
   312  		b.logger.Warningf("Received balancer config with no cluster name")
   313  		return balancer.ErrBadResolverState
   314  	}
   315  
   316  	// Do nothing and return early if configuration has not changed.
   317  	if b.lbCfg != nil && b.lbCfg.ClusterName == lbCfg.ClusterName {
   318  		return nil
   319  	}
   320  	b.lbCfg = lbCfg
   321  
   322  	// Handle the update in a blocking fashion.
   323  	errCh := make(chan error, 1)
   324  	callback := func(context.Context) {
   325  		// A config update with a changed top-level cluster name means that none
   326  		// of our old watchers make any sense any more.
   327  		b.closeAllWatchers()
   328  
   329  		// Create a new watcher for the top-level cluster. Upon resolution, it
   330  		// could end up creating more watchers if turns out to be an aggregate
   331  		// cluster.
   332  		b.createAndAddWatcherForCluster(lbCfg.ClusterName)
   333  		errCh <- nil
   334  	}
   335  	onFailure := func() {
   336  		// The call to Schedule returns false *only* if the serializer has been
   337  		// closed, which happens only when we receive an update after close.
   338  		errCh <- errBalancerClosed
   339  	}
   340  	b.serializer.ScheduleOr(callback, onFailure)
   341  	return <-errCh
   342  }
   343  
   344  // ResolverError handles errors reported by the xdsResolver.
   345  func (b *cdsBalancer) ResolverError(err error) {
   346  	b.serializer.TrySchedule(func(context.Context) {
   347  		// Missing Listener or RouteConfiguration on the management server
   348  		// results in a 'resource not found' error from the xDS resolver. In
   349  		// these cases, we should stap watching all of the current clusters
   350  		// being watched.
   351  		if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound {
   352  			b.closeAllWatchers()
   353  			b.closeChildPolicyAndReportTF(err)
   354  			return
   355  		}
   356  		var root string
   357  		if b.lbCfg != nil {
   358  			root = b.lbCfg.ClusterName
   359  		}
   360  		b.onClusterError(root, err)
   361  	})
   362  }
   363  
   364  // UpdateSubConnState handles subConn updates from gRPC.
   365  func (b *cdsBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   366  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   367  }
   368  
   369  // Closes all registered cluster watchers and removes them from the internal map.
   370  //
   371  // Only executed in the context of a serializer callback.
   372  func (b *cdsBalancer) closeAllWatchers() {
   373  	for name, state := range b.watchers {
   374  		state.cancelWatch()
   375  		delete(b.watchers, name)
   376  	}
   377  }
   378  
   379  // closeChildPolicyAndReportTF closes the child policy, if it exists, and
   380  // updates the connectivity state of the channel to TransientFailure with an
   381  // error picker.
   382  //
   383  // Only executed in the context of a serializer callback.
   384  func (b *cdsBalancer) closeChildPolicyAndReportTF(err error) {
   385  	if b.childLB != nil {
   386  		b.childLB.Close()
   387  		b.childLB = nil
   388  	}
   389  	b.ccw.UpdateState(balancer.State{
   390  		ConnectivityState: connectivity.TransientFailure,
   391  		Picker:            base.NewErrPicker(err),
   392  	})
   393  }
   394  
   395  // Close cancels the CDS watch, closes the child policy and closes the
   396  // cdsBalancer.
   397  func (b *cdsBalancer) Close() {
   398  	b.serializer.TrySchedule(func(context.Context) {
   399  		b.closeAllWatchers()
   400  
   401  		if b.childLB != nil {
   402  			b.childLB.Close()
   403  			b.childLB = nil
   404  		}
   405  		if b.cachedRoot != nil {
   406  			b.cachedRoot.Close()
   407  		}
   408  		if b.cachedIdentity != nil {
   409  			b.cachedIdentity.Close()
   410  		}
   411  		b.logger.Infof("Shutdown")
   412  	})
   413  	b.serializerCancel()
   414  	<-b.serializer.Done()
   415  }
   416  
   417  func (b *cdsBalancer) ExitIdle() {
   418  	b.serializer.TrySchedule(func(context.Context) {
   419  		if b.childLB == nil {
   420  			b.logger.Warningf("Received ExitIdle with no child policy")
   421  			return
   422  		}
   423  		// This implementation assumes the child balancer supports
   424  		// ExitIdle (but still checks for the interface's existence to
   425  		// avoid a panic if not).  If the child does not, no subconns
   426  		// will be connected.
   427  		if ei, ok := b.childLB.(balancer.ExitIdler); ok {
   428  			ei.ExitIdle()
   429  		}
   430  	})
   431  }
   432  
   433  // Node ID needs to be manually added to errors generated in the following
   434  // scenarios:
   435  //   - resource-does-not-exist: since the xDS watch API uses a separate callback
   436  //     instead of returning an error value. TODO(gRFC A88): Once A88 is
   437  //     implemented, the xDS client will be able to add the node ID to
   438  //     resource-does-not-exist errors as well, and we can get rid of this
   439  //     special handling.
   440  //   - received a good update from the xDS client, but the update either contains
   441  //     an invalid security configuration or contains invalid aggragate cluster
   442  //     config.
   443  func (b *cdsBalancer) annotateErrorWithNodeID(err error) error {
   444  	nodeID := b.xdsClient.BootstrapConfig().Node().GetId()
   445  	return fmt.Errorf("[xDS node id: %v]: %w", nodeID, err)
   446  }
   447  
   448  // Handles a good Cluster update from the xDS client. Kicks off the discovery
   449  // mechanism generation process from the top-level cluster and if the cluster
   450  // graph is resolved, generates child policy config and pushes it down.
   451  //
   452  // Only executed in the context of a serializer callback.
   453  func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpdate) {
   454  	state := b.watchers[name]
   455  	if state == nil {
   456  		// We are currently not watching this cluster anymore. Return early.
   457  		return
   458  	}
   459  
   460  	b.logger.Infof("Received Cluster resource: %s", pretty.ToJSON(update))
   461  
   462  	// Update the watchers map with the update for the cluster.
   463  	state.lastUpdate = &update
   464  
   465  	// For an aggregate cluster, always use the security configuration on the
   466  	// root cluster.
   467  	if name == b.lbCfg.ClusterName {
   468  		// Process the security config from the received update before building the
   469  		// child policy or forwarding the update to it. We do this because the child
   470  		// policy may try to create a new subConn inline. Processing the security
   471  		// configuration here and setting up the handshakeInfo will make sure that
   472  		// such attempts are handled properly.
   473  		if err := b.handleSecurityConfig(update.SecurityCfg); err != nil {
   474  			// If the security config is invalid, for example, if the provider
   475  			// instance is not found in the bootstrap config, we need to put the
   476  			// channel in transient failure.
   477  			b.onClusterError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err)))
   478  			return
   479  		}
   480  	}
   481  
   482  	clustersSeen := make(map[string]bool)
   483  	dms, ok, err := b.generateDMsForCluster(b.lbCfg.ClusterName, 0, nil, clustersSeen)
   484  	if err != nil {
   485  		b.onClusterError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err)))
   486  		return
   487  	}
   488  	if ok {
   489  		if len(dms) == 0 {
   490  			b.onClusterError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters")))
   491  			return
   492  		}
   493  		// Child policy is built the first time we resolve the cluster graph.
   494  		if b.childLB == nil {
   495  			childLB, err := newChildBalancer(b.ccw, b.bOpts)
   496  			if err != nil {
   497  				b.logger.Errorf("Failed to create child policy of type %s: %v", clusterresolver.Name, err)
   498  				return
   499  			}
   500  			b.childLB = childLB
   501  			b.logger.Infof("Created child policy %p of type %s", b.childLB, clusterresolver.Name)
   502  		}
   503  
   504  		// Prepare the child policy configuration, convert it to JSON, have it
   505  		// parsed by the child policy to convert it into service config and push
   506  		// an update to it.
   507  		childCfg := &clusterresolver.LBConfig{
   508  			DiscoveryMechanisms: dms,
   509  			// The LB policy is configured by the root cluster.
   510  			XDSLBPolicy: b.watchers[b.lbCfg.ClusterName].lastUpdate.LBPolicy,
   511  		}
   512  		cfgJSON, err := json.Marshal(childCfg)
   513  		if err != nil {
   514  			// Shouldn't happen, since we just prepared struct.
   515  			b.logger.Errorf("cds_balancer: error marshalling prepared config: %v", childCfg)
   516  			return
   517  		}
   518  
   519  		var sc serviceconfig.LoadBalancingConfig
   520  		if sc, err = b.childConfigParser.ParseConfig(cfgJSON); err != nil {
   521  			b.logger.Errorf("cds_balancer: cluster_resolver config generated %v is invalid: %v", string(cfgJSON), err)
   522  			return
   523  		}
   524  
   525  		ccState := balancer.ClientConnState{
   526  			ResolverState:  xdsclient.SetClient(resolver.State{}, b.xdsClient),
   527  			BalancerConfig: sc,
   528  		}
   529  		if err := b.childLB.UpdateClientConnState(ccState); err != nil {
   530  			b.logger.Errorf("Encountered error when sending config {%+v} to child policy: %v", ccState, err)
   531  		}
   532  	}
   533  	// We no longer need the clusters that we did not see in this iteration of
   534  	// generateDMsForCluster().
   535  	for cluster := range clustersSeen {
   536  		state, ok := b.watchers[cluster]
   537  		if ok {
   538  			continue
   539  		}
   540  		state.cancelWatch()
   541  		delete(b.watchers, cluster)
   542  	}
   543  }
   544  
   545  // Handles an error Cluster update from the xDS client. Propagates the error
   546  // down to the child policy if one exists, or puts the channel in
   547  // TRANSIENT_FAILURE.
   548  //
   549  // Only executed in the context of a serializer callback.
   550  func (b *cdsBalancer) onClusterError(name string, err error) {
   551  	b.logger.Warningf("Cluster resource %q received error update: %v", name, err)
   552  
   553  	if b.childLB != nil {
   554  		if xdsresource.ErrType(err) != xdsresource.ErrorTypeConnection {
   555  			// Connection errors will be sent to the child balancers directly.
   556  			// There's no need to forward them.
   557  			b.childLB.ResolverError(err)
   558  		}
   559  	} else {
   560  		// If child balancer was never created, fail the RPCs with
   561  		// errors.
   562  		b.ccw.UpdateState(balancer.State{
   563  			ConnectivityState: connectivity.TransientFailure,
   564  			Picker:            base.NewErrPicker(fmt.Errorf("%q: %v", name, err)),
   565  		})
   566  	}
   567  }
   568  
   569  // Handles a resource-not-found error from the xDS client. Propagates the error
   570  // down to the child policy if one exists, or puts the channel in
   571  // TRANSIENT_FAILURE.
   572  //
   573  // Only executed in the context of a serializer callback.
   574  func (b *cdsBalancer) onClusterResourceNotFound(name string) {
   575  	b.logger.Warningf("CDS watch for resource %q reported resource-does-not-exist error", name)
   576  	err := b.annotateErrorWithNodeID(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "cluster %q not found", name))
   577  	b.closeChildPolicyAndReportTF(err)
   578  }
   579  
   580  // Generates discovery mechanisms for the cluster graph rooted at `name`. This
   581  // method is called recursively if `name` corresponds to an aggregate cluster,
   582  // with the base case for recursion being a leaf cluster. If a new cluster is
   583  // encountered when traversing the graph, a watcher is created for it.
   584  //
   585  // Inputs:
   586  // - name: name of the cluster to start from
   587  // - depth: recursion depth of the current cluster, starting from root
   588  // - dms: prioritized list of current discovery mechanisms
   589  // - clustersSeen: cluster names seen so far in the graph traversal
   590  //
   591  // Outputs:
   592  //   - new prioritized list of discovery mechanisms
   593  //   - boolean indicating if traversal of the aggregate cluster graph is
   594  //     complete. If false, the above list of discovery mechanisms is ignored.
   595  //   - error indicating if any error was encountered as part of the graph
   596  //     traversal. If error is non-nil, the other return values are ignored.
   597  //
   598  // Only executed in the context of a serializer callback.
   599  func (b *cdsBalancer) generateDMsForCluster(name string, depth int, dms []clusterresolver.DiscoveryMechanism, clustersSeen map[string]bool) ([]clusterresolver.DiscoveryMechanism, bool, error) {
   600  	if depth >= aggregateClusterMaxDepth {
   601  		return dms, false, errExceedsMaxDepth
   602  	}
   603  
   604  	if clustersSeen[name] {
   605  		// Discovery mechanism already seen through a different branch.
   606  		return dms, true, nil
   607  	}
   608  	clustersSeen[name] = true
   609  
   610  	state, ok := b.watchers[name]
   611  	if !ok {
   612  		// If we have not seen this cluster so far, create a watcher for it, add
   613  		// it to the map, start the watch and return.
   614  		b.createAndAddWatcherForCluster(name)
   615  
   616  		// And since we just created the watcher, we know that we haven't
   617  		// resolved the cluster graph yet.
   618  		return dms, false, nil
   619  	}
   620  
   621  	// A watcher exists, but no update has been received yet.
   622  	if state.lastUpdate == nil {
   623  		return dms, false, nil
   624  	}
   625  
   626  	var dm clusterresolver.DiscoveryMechanism
   627  	cluster := state.lastUpdate
   628  	switch cluster.ClusterType {
   629  	case xdsresource.ClusterTypeAggregate:
   630  		// This boolean is used to track if any of the clusters in the graph is
   631  		// not yet completely resolved or returns errors, thereby allowing us to
   632  		// traverse as much of the graph as possible (and start the associated
   633  		// watches where required) to ensure that clustersSeen contains all
   634  		// clusters in the graph that we can traverse to.
   635  		missingCluster := false
   636  		var err error
   637  		for _, child := range cluster.PrioritizedClusterNames {
   638  			var ok bool
   639  			dms, ok, err = b.generateDMsForCluster(child, depth+1, dms, clustersSeen)
   640  			if err != nil || !ok {
   641  				missingCluster = true
   642  			}
   643  		}
   644  		return dms, !missingCluster, err
   645  	case xdsresource.ClusterTypeEDS:
   646  		dm = clusterresolver.DiscoveryMechanism{
   647  			Type:                  clusterresolver.DiscoveryMechanismTypeEDS,
   648  			Cluster:               cluster.ClusterName,
   649  			EDSServiceName:        cluster.EDSServiceName,
   650  			MaxConcurrentRequests: cluster.MaxRequests,
   651  			LoadReportingServer:   cluster.LRSServerConfig,
   652  		}
   653  	case xdsresource.ClusterTypeLogicalDNS:
   654  		dm = clusterresolver.DiscoveryMechanism{
   655  			Type:                  clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   656  			Cluster:               cluster.ClusterName,
   657  			DNSHostname:           cluster.DNSHostName,
   658  			MaxConcurrentRequests: cluster.MaxRequests,
   659  			LoadReportingServer:   cluster.LRSServerConfig,
   660  		}
   661  	}
   662  	odJSON := cluster.OutlierDetection
   663  	// "In the cds LB policy, if the outlier_detection field is not set in
   664  	// the Cluster resource, a "no-op" outlier_detection config will be
   665  	// generated in the corresponding DiscoveryMechanism config, with all
   666  	// fields unset." - A50
   667  	if odJSON == nil {
   668  		// This will pick up top level defaults in Cluster Resolver
   669  		// ParseConfig, but sre and fpe will be nil still so still a
   670  		// "no-op" config.
   671  		odJSON = json.RawMessage(`{}`)
   672  	}
   673  	dm.OutlierDetection = odJSON
   674  
   675  	dm.TelemetryLabels = cluster.TelemetryLabels
   676  
   677  	return append(dms, dm), true, nil
   678  }
   679  
   680  // ccWrapper wraps the balancer.ClientConn passed to the CDS balancer at
   681  // creation and intercepts the NewSubConn() and UpdateAddresses() call from the
   682  // child policy to add security configuration required by xDS credentials.
   683  //
   684  // Other methods of the balancer.ClientConn interface are not overridden and
   685  // hence get the original implementation.
   686  type ccWrapper struct {
   687  	balancer.ClientConn
   688  
   689  	xdsHIPtr *unsafe.Pointer
   690  }
   691  
   692  // NewSubConn intercepts NewSubConn() calls from the child policy and adds an
   693  // address attribute which provides all information required by the xdsCreds
   694  // handshaker to perform the TLS handshake.
   695  func (ccw *ccWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
   696  	newAddrs := make([]resolver.Address, len(addrs))
   697  	for i, addr := range addrs {
   698  		newAddrs[i] = xdsinternal.SetHandshakeInfo(addr, ccw.xdsHIPtr)
   699  	}
   700  
   701  	// No need to override opts.StateListener; just forward all calls to the
   702  	// child that created the SubConn.
   703  	return ccw.ClientConn.NewSubConn(newAddrs, opts)
   704  }
   705  
   706  func (ccw *ccWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) {
   707  	newAddrs := make([]resolver.Address, len(addrs))
   708  	for i, addr := range addrs {
   709  		newAddrs[i] = xdsinternal.SetHandshakeInfo(addr, ccw.xdsHIPtr)
   710  	}
   711  	ccw.ClientConn.UpdateAddresses(sc, newAddrs)
   712  }
   713  
   714  // systemRootCertsProvider implements a certprovider.Provider that returns the
   715  // system default root certificates for validation.
   716  type systemRootCertsProvider struct{}
   717  
   718  func (systemRootCertsProvider) Close() {}
   719  
   720  func (systemRootCertsProvider) KeyMaterial(context.Context) (*certprovider.KeyMaterial, error) {
   721  	rootCAs, err := x509SystemCertPoolFunc()
   722  	if err != nil {
   723  		return nil, err
   724  	}
   725  	return &certprovider.KeyMaterial{
   726  		Roots: rootCAs,
   727  	}, nil
   728  }