google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterresolver/clusterresolver.go (about)

     1  /*
     2   *
     3   * Copyright 2019 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package clusterresolver contains the implementation of the
    20  // cluster_resolver_experimental LB policy which resolves endpoint addresses
    21  // using a list of one or more discovery mechanisms.
    22  package clusterresolver
    23  
    24  import (
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  
    29  	"google.golang.org/grpc/attributes"
    30  	"google.golang.org/grpc/balancer"
    31  	"google.golang.org/grpc/balancer/base"
    32  	"google.golang.org/grpc/connectivity"
    33  	"google.golang.org/grpc/internal/balancer/nop"
    34  	"google.golang.org/grpc/internal/buffer"
    35  	"google.golang.org/grpc/internal/grpclog"
    36  	"google.golang.org/grpc/internal/grpcsync"
    37  	"google.golang.org/grpc/internal/pretty"
    38  	"google.golang.org/grpc/resolver"
    39  	"google.golang.org/grpc/serviceconfig"
    40  	"google.golang.org/grpc/xds/internal/balancer/outlierdetection"
    41  	"google.golang.org/grpc/xds/internal/balancer/priority"
    42  	"google.golang.org/grpc/xds/internal/xdsclient"
    43  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    44  )
    45  
    46  // Name is the name of the cluster_resolver balancer.
    47  const Name = "cluster_resolver_experimental"
    48  
    49  var (
    50  	errBalancerClosed = errors.New("cdsBalancer is closed")
    51  	newChildBalancer  = func(bb balancer.Builder, cc balancer.ClientConn, o balancer.BuildOptions) balancer.Balancer {
    52  		return bb.Build(cc, o)
    53  	}
    54  )
    55  
    56  func init() {
    57  	balancer.Register(bb{})
    58  }
    59  
    60  type bb struct{}
    61  
    62  // Build helps implement the balancer.Builder interface.
    63  func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    64  	priorityBuilder := balancer.Get(priority.Name)
    65  	if priorityBuilder == nil {
    66  		logger.Errorf("%q LB policy is needed but not registered", priority.Name)
    67  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", priority.Name))
    68  	}
    69  	priorityConfigParser, ok := priorityBuilder.(balancer.ConfigParser)
    70  	if !ok {
    71  		logger.Errorf("%q LB policy does not implement a config parser", priority.Name)
    72  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", priority.Name))
    73  	}
    74  
    75  	b := &clusterResolverBalancer{
    76  		bOpts:    opts,
    77  		updateCh: buffer.NewUnbounded(),
    78  		closed:   grpcsync.NewEvent(),
    79  		done:     grpcsync.NewEvent(),
    80  
    81  		priorityBuilder:      priorityBuilder,
    82  		priorityConfigParser: priorityConfigParser,
    83  	}
    84  	b.logger = prefixLogger(b)
    85  	b.logger.Infof("Created")
    86  
    87  	b.resourceWatcher = newResourceResolver(b, b.logger)
    88  	b.cc = &ccWrapper{
    89  		ClientConn:      cc,
    90  		b:               b,
    91  		resourceWatcher: b.resourceWatcher,
    92  	}
    93  
    94  	go b.run()
    95  	return b
    96  }
    97  
    98  func (bb) Name() string {
    99  	return Name
   100  }
   101  
   102  func (bb) ParseConfig(j json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   103  	odBuilder := balancer.Get(outlierdetection.Name)
   104  	if odBuilder == nil {
   105  		// Shouldn't happen, registered through imported Outlier Detection,
   106  		// defensive programming.
   107  		return nil, fmt.Errorf("%q LB policy is needed but not registered", outlierdetection.Name)
   108  	}
   109  	odParser, ok := odBuilder.(balancer.ConfigParser)
   110  	if !ok {
   111  		// Shouldn't happen, imported Outlier Detection builder has this method.
   112  		return nil, fmt.Errorf("%q LB policy does not implement a config parser", outlierdetection.Name)
   113  	}
   114  
   115  	var cfg *LBConfig
   116  	if err := json.Unmarshal(j, &cfg); err != nil {
   117  		return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(j), err)
   118  	}
   119  
   120  	for i, dm := range cfg.DiscoveryMechanisms {
   121  		lbCfg, err := odParser.ParseConfig(dm.OutlierDetection)
   122  		if err != nil {
   123  			return nil, fmt.Errorf("error parsing Outlier Detection config %v: %v", dm.OutlierDetection, err)
   124  		}
   125  		odCfg, ok := lbCfg.(*outlierdetection.LBConfig)
   126  		if !ok {
   127  			// Shouldn't happen, Parser built at build time with Outlier Detection
   128  			// builder pulled from gRPC LB Registry.
   129  			return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg)
   130  		}
   131  		cfg.DiscoveryMechanisms[i].outlierDetection = *odCfg
   132  	}
   133  	if err := json.Unmarshal(cfg.XDSLBPolicy, &cfg.xdsLBPolicy); err != nil {
   134  		// This will never occur, valid configuration is emitted from the xDS
   135  		// Client. Validity is already checked in the xDS Client, however, this
   136  		// double validation is present because Unmarshalling and Validating are
   137  		// coupled into one json.Unmarshal operation. We will switch this in
   138  		// the future to two separate operations.
   139  		return nil, fmt.Errorf("error unmarshalling xDS LB Policy: %v", err)
   140  	}
   141  	return cfg, nil
   142  }
   143  
   144  // ccUpdate wraps a clientConn update received from gRPC.
   145  type ccUpdate struct {
   146  	state balancer.ClientConnState
   147  	err   error
   148  }
   149  
   150  type exitIdle struct{}
   151  
   152  // clusterResolverBalancer resolves endpoint addresses using a list of one or
   153  // more discovery mechanisms.
   154  type clusterResolverBalancer struct {
   155  	cc              balancer.ClientConn
   156  	bOpts           balancer.BuildOptions
   157  	updateCh        *buffer.Unbounded // Channel for updates from gRPC.
   158  	resourceWatcher *resourceResolver
   159  	logger          *grpclog.PrefixLogger
   160  	closed          *grpcsync.Event
   161  	done            *grpcsync.Event
   162  
   163  	priorityBuilder      balancer.Builder
   164  	priorityConfigParser balancer.ConfigParser
   165  
   166  	config          *LBConfig
   167  	configRaw       *serviceconfig.ParseResult
   168  	xdsClient       xdsclient.XDSClient    // xDS client to watch EDS resource.
   169  	attrsWithClient *attributes.Attributes // Attributes with xdsClient attached to be passed to the child policies.
   170  
   171  	child               balancer.Balancer
   172  	priorities          []priorityConfig
   173  	watchUpdateReceived bool
   174  }
   175  
   176  // handleClientConnUpdate handles a ClientConnUpdate received from gRPC.
   177  //
   178  // A good update results in creation of endpoint resolvers for the configured
   179  // discovery mechanisms. An update with an error results in cancellation of any
   180  // existing endpoint resolution and propagation of the same to the child policy.
   181  func (b *clusterResolverBalancer) handleClientConnUpdate(update *ccUpdate) {
   182  	if err := update.err; err != nil {
   183  		b.handleErrorFromUpdate(err, true)
   184  		return
   185  	}
   186  
   187  	if b.logger.V(2) {
   188  		b.logger.Infof("Received new balancer config: %v", pretty.ToJSON(update.state.BalancerConfig))
   189  	}
   190  
   191  	cfg, _ := update.state.BalancerConfig.(*LBConfig)
   192  	if cfg == nil {
   193  		b.logger.Warningf("Ignoring unsupported balancer configuration of type: %T", update.state.BalancerConfig)
   194  		return
   195  	}
   196  
   197  	b.config = cfg
   198  	b.configRaw = update.state.ResolverState.ServiceConfig
   199  	b.resourceWatcher.updateMechanisms(cfg.DiscoveryMechanisms)
   200  
   201  	// The child policy is created only after all configured discovery
   202  	// mechanisms have been successfully returned endpoints. If that is not the
   203  	// case, we return early.
   204  	if !b.watchUpdateReceived {
   205  		return
   206  	}
   207  	b.updateChildConfig()
   208  }
   209  
   210  // handleResourceUpdate handles a resource update or error from the resource
   211  // resolver by propagating the same to the child LB policy.
   212  func (b *clusterResolverBalancer) handleResourceUpdate(update *resourceUpdate) {
   213  	b.watchUpdateReceived = true
   214  	b.priorities = update.priorities
   215  
   216  	// An update from the resource resolver contains resolved endpoint addresses
   217  	// for all configured discovery mechanisms ordered by priority. This is used
   218  	// to generate configuration for the priority LB policy.
   219  	b.updateChildConfig()
   220  
   221  	if update.onDone != nil {
   222  		update.onDone()
   223  	}
   224  }
   225  
   226  // updateChildConfig builds child policy configuration using endpoint addresses
   227  // returned by the resource resolver and child policy configuration provided by
   228  // parent LB policy.
   229  //
   230  // A child policy is created if one doesn't already exist. The newly built
   231  // configuration is then pushed to the child policy.
   232  func (b *clusterResolverBalancer) updateChildConfig() {
   233  	if b.child == nil {
   234  		b.child = newChildBalancer(b.priorityBuilder, b.cc, b.bOpts)
   235  	}
   236  
   237  	childCfgBytes, endpoints, err := buildPriorityConfigJSON(b.priorities, &b.config.xdsLBPolicy)
   238  	if err != nil {
   239  		b.logger.Warningf("Failed to build child policy config: %v", err)
   240  		return
   241  	}
   242  	childCfg, err := b.priorityConfigParser.ParseConfig(childCfgBytes)
   243  	if err != nil {
   244  		b.logger.Warningf("Failed to parse child policy config. This should never happen because the config was generated: %v", err)
   245  		return
   246  	}
   247  	if b.logger.V(2) {
   248  		b.logger.Infof("Built child policy config: %s", pretty.ToJSON(childCfg))
   249  	}
   250  
   251  	flattenedAddrs := make([]resolver.Address, len(endpoints))
   252  	for i := range endpoints {
   253  		for j := range endpoints[i].Addresses {
   254  			addr := endpoints[i].Addresses[j]
   255  			addr.BalancerAttributes = endpoints[i].Attributes
   256  			// If the endpoint has multiple addresses, only the first is added
   257  			// to the flattened address list. This ensures that LB policies
   258  			// that don't support endpoints create only one subchannel to a
   259  			// backend.
   260  			if j == 0 {
   261  				flattenedAddrs[i] = addr
   262  			}
   263  			// BalancerAttributes need to be present in endpoint addresses. This
   264  			// temporary workaround is required to make load reporting work
   265  			// with the old pickfirst policy which creates SubConns with multiple
   266  			// addresses. Since the addresses can be from different localities,
   267  			// an Address.BalancerAttribute is used to identify the locality of the
   268  			// address used by the transport. This workaround can be removed once
   269  			// the old pickfirst is removed.
   270  			// See https://github.com/grpc/grpc-go/issues/7339
   271  			endpoints[i].Addresses[j] = addr
   272  		}
   273  	}
   274  	if err := b.child.UpdateClientConnState(balancer.ClientConnState{
   275  		ResolverState: resolver.State{
   276  			Endpoints:     endpoints,
   277  			Addresses:     flattenedAddrs,
   278  			ServiceConfig: b.configRaw,
   279  			Attributes:    b.attrsWithClient,
   280  		},
   281  		BalancerConfig: childCfg,
   282  	}); err != nil {
   283  		b.logger.Warningf("Failed to push config to child policy: %v", err)
   284  	}
   285  }
   286  
   287  // handleErrorFromUpdate handles errors from the parent LB policy and endpoint
   288  // resolvers. fromParent is true if error is from the parent LB policy. In both
   289  // cases, the error is propagated to the child policy, if one exists.
   290  func (b *clusterResolverBalancer) handleErrorFromUpdate(err error, fromParent bool) {
   291  	b.logger.Warningf("Received error: %v", err)
   292  
   293  	// A resource-not-found error from the parent LB policy means that the LDS
   294  	// or CDS resource was removed. This should result in endpoint resolvers
   295  	// being stopped here.
   296  	//
   297  	// A resource-not-found error from the EDS endpoint resolver means that the
   298  	// EDS resource was removed. No action needs to be taken for this, and we
   299  	// should continue watching the same EDS resource.
   300  	if fromParent && xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound {
   301  		b.resourceWatcher.stop(false)
   302  	}
   303  
   304  	if b.child != nil {
   305  		b.child.ResolverError(err)
   306  		return
   307  	}
   308  	b.cc.UpdateState(balancer.State{
   309  		ConnectivityState: connectivity.TransientFailure,
   310  		Picker:            base.NewErrPicker(err),
   311  	})
   312  }
   313  
   314  // run is a long-running goroutine that handles updates from gRPC and endpoint
   315  // resolvers. The methods handling the individual updates simply push them onto
   316  // a channel which is read and acted upon from here.
   317  func (b *clusterResolverBalancer) run() {
   318  	for {
   319  		select {
   320  		case u, ok := <-b.updateCh.Get():
   321  			if !ok {
   322  				return
   323  			}
   324  			b.updateCh.Load()
   325  			switch update := u.(type) {
   326  			case *ccUpdate:
   327  				b.handleClientConnUpdate(update)
   328  			case exitIdle:
   329  				if b.child == nil {
   330  					// This is not necessarily an error. The EDS/DNS watch may
   331  					// not have  returned a list of endpoints yet, so the child
   332  					// may not be built.
   333  					if b.logger.V(2) {
   334  						b.logger.Infof("xds: received ExitIdle with no child balancer")
   335  					}
   336  					break
   337  				}
   338  				// This implementation assumes the child balancer supports
   339  				// ExitIdle (but still checks for the interface's existence to
   340  				// avoid a panic if not).  If the child does not, no subconns
   341  				// will be connected.
   342  				if ei, ok := b.child.(balancer.ExitIdler); ok {
   343  					ei.ExitIdle()
   344  				}
   345  			}
   346  		case u := <-b.resourceWatcher.updateChannel:
   347  			b.handleResourceUpdate(u)
   348  
   349  		// Close results in stopping the endpoint resolvers and closing the
   350  		// underlying child policy and is the only way to exit this goroutine.
   351  		case <-b.closed.Done():
   352  			b.resourceWatcher.stop(true)
   353  
   354  			if b.child != nil {
   355  				b.child.Close()
   356  				b.child = nil
   357  			}
   358  			b.updateCh.Close()
   359  			// This is the *ONLY* point of return from this function.
   360  			b.logger.Infof("Shutdown")
   361  			b.done.Fire()
   362  			return
   363  		}
   364  	}
   365  }
   366  
   367  // Following are methods to implement the balancer interface.
   368  
   369  func (b *clusterResolverBalancer) UpdateClientConnState(state balancer.ClientConnState) error {
   370  	if b.closed.HasFired() {
   371  		b.logger.Warningf("Received update from gRPC {%+v} after close", state)
   372  		return errBalancerClosed
   373  	}
   374  
   375  	if b.xdsClient == nil {
   376  		c := xdsclient.FromResolverState(state.ResolverState)
   377  		if c == nil {
   378  			return balancer.ErrBadResolverState
   379  		}
   380  		b.xdsClient = c
   381  		b.attrsWithClient = state.ResolverState.Attributes
   382  	}
   383  
   384  	b.updateCh.Put(&ccUpdate{state: state})
   385  	return nil
   386  }
   387  
   388  // ResolverError handles errors reported by the xdsResolver.
   389  func (b *clusterResolverBalancer) ResolverError(err error) {
   390  	if b.closed.HasFired() {
   391  		b.logger.Warningf("Received resolver error {%v} after close", err)
   392  		return
   393  	}
   394  	b.updateCh.Put(&ccUpdate{err: err})
   395  }
   396  
   397  // UpdateSubConnState handles subConn updates from gRPC.
   398  func (b *clusterResolverBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   399  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   400  }
   401  
   402  // Close closes the cdsBalancer and the underlying child balancer.
   403  func (b *clusterResolverBalancer) Close() {
   404  	b.closed.Fire()
   405  	<-b.done.Done()
   406  }
   407  
   408  func (b *clusterResolverBalancer) ExitIdle() {
   409  	b.updateCh.Put(exitIdle{})
   410  }
   411  
   412  // ccWrapper overrides ResolveNow(), so that re-resolution from the child
   413  // policies will trigger the DNS resolver in cluster_resolver balancer.  It
   414  // also intercepts NewSubConn calls in case children don't set the
   415  // StateListener, to allow redirection to happen via this cluster_resolver
   416  // balancer.
   417  type ccWrapper struct {
   418  	balancer.ClientConn
   419  	b               *clusterResolverBalancer
   420  	resourceWatcher *resourceResolver
   421  }
   422  
   423  func (c *ccWrapper) ResolveNow(resolver.ResolveNowOptions) {
   424  	c.resourceWatcher.resolveNow()
   425  }