google.golang.org/grpc@v1.62.1/xds/internal/balancer/clusterresolver/clusterresolver.go (about)

     1  /*
     2   *
     3   * Copyright 2019 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package clusterresolver contains the implementation of the
    20  // cluster_resolver_experimental LB policy which resolves endpoint addresses
    21  // using a list of one or more discovery mechanisms.
    22  package clusterresolver
    23  
    24  import (
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  
    29  	"google.golang.org/grpc/attributes"
    30  	"google.golang.org/grpc/balancer"
    31  	"google.golang.org/grpc/balancer/base"
    32  	"google.golang.org/grpc/connectivity"
    33  	"google.golang.org/grpc/internal/balancer/nop"
    34  	"google.golang.org/grpc/internal/buffer"
    35  	"google.golang.org/grpc/internal/grpclog"
    36  	"google.golang.org/grpc/internal/grpcsync"
    37  	"google.golang.org/grpc/internal/pretty"
    38  	"google.golang.org/grpc/resolver"
    39  	"google.golang.org/grpc/serviceconfig"
    40  	"google.golang.org/grpc/xds/internal/balancer/outlierdetection"
    41  	"google.golang.org/grpc/xds/internal/balancer/priority"
    42  	"google.golang.org/grpc/xds/internal/xdsclient"
    43  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    44  )
    45  
    46  // Name is the name of the cluster_resolver balancer.
    47  const Name = "cluster_resolver_experimental"
    48  
    49  var (
    50  	errBalancerClosed = errors.New("cdsBalancer is closed")
    51  	newChildBalancer  = func(bb balancer.Builder, cc balancer.ClientConn, o balancer.BuildOptions) balancer.Balancer {
    52  		return bb.Build(cc, o)
    53  	}
    54  )
    55  
    56  func init() {
    57  	balancer.Register(bb{})
    58  }
    59  
    60  type bb struct{}
    61  
    62  // Build helps implement the balancer.Builder interface.
    63  func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    64  	priorityBuilder := balancer.Get(priority.Name)
    65  	if priorityBuilder == nil {
    66  		logger.Errorf("%q LB policy is needed but not registered", priority.Name)
    67  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", priority.Name))
    68  	}
    69  	priorityConfigParser, ok := priorityBuilder.(balancer.ConfigParser)
    70  	if !ok {
    71  		logger.Errorf("%q LB policy does not implement a config parser", priority.Name)
    72  		return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", priority.Name))
    73  	}
    74  
    75  	b := &clusterResolverBalancer{
    76  		bOpts:    opts,
    77  		updateCh: buffer.NewUnbounded(),
    78  		closed:   grpcsync.NewEvent(),
    79  		done:     grpcsync.NewEvent(),
    80  
    81  		priorityBuilder:      priorityBuilder,
    82  		priorityConfigParser: priorityConfigParser,
    83  	}
    84  	b.logger = prefixLogger(b)
    85  	b.logger.Infof("Created")
    86  
    87  	b.resourceWatcher = newResourceResolver(b, b.logger)
    88  	b.cc = &ccWrapper{
    89  		ClientConn:      cc,
    90  		b:               b,
    91  		resourceWatcher: b.resourceWatcher,
    92  	}
    93  
    94  	go b.run()
    95  	return b
    96  }
    97  
    98  func (bb) Name() string {
    99  	return Name
   100  }
   101  
   102  func (bb) ParseConfig(j json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   103  	odBuilder := balancer.Get(outlierdetection.Name)
   104  	if odBuilder == nil {
   105  		// Shouldn't happen, registered through imported Outlier Detection,
   106  		// defensive programming.
   107  		return nil, fmt.Errorf("%q LB policy is needed but not registered", outlierdetection.Name)
   108  	}
   109  	odParser, ok := odBuilder.(balancer.ConfigParser)
   110  	if !ok {
   111  		// Shouldn't happen, imported Outlier Detection builder has this method.
   112  		return nil, fmt.Errorf("%q LB policy does not implement a config parser", outlierdetection.Name)
   113  	}
   114  
   115  	var cfg *LBConfig
   116  	if err := json.Unmarshal(j, &cfg); err != nil {
   117  		return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(j), err)
   118  	}
   119  
   120  	for i, dm := range cfg.DiscoveryMechanisms {
   121  		lbCfg, err := odParser.ParseConfig(dm.OutlierDetection)
   122  		if err != nil {
   123  			return nil, fmt.Errorf("error parsing Outlier Detection config %v: %v", dm.OutlierDetection, err)
   124  		}
   125  		odCfg, ok := lbCfg.(*outlierdetection.LBConfig)
   126  		if !ok {
   127  			// Shouldn't happen, Parser built at build time with Outlier Detection
   128  			// builder pulled from gRPC LB Registry.
   129  			return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg)
   130  		}
   131  		cfg.DiscoveryMechanisms[i].outlierDetection = *odCfg
   132  	}
   133  	if err := json.Unmarshal(cfg.XDSLBPolicy, &cfg.xdsLBPolicy); err != nil {
   134  		// This will never occur, valid configuration is emitted from the xDS
   135  		// Client. Validity is already checked in the xDS Client, however, this
   136  		// double validation is present because Unmarshalling and Validating are
   137  		// coupled into one json.Unmarshal operation). We will switch this in
   138  		// the future to two separate operations.
   139  		return nil, fmt.Errorf("error unmarshaling xDS LB Policy: %v", err)
   140  	}
   141  	return cfg, nil
   142  }
   143  
   144  // ccUpdate wraps a clientConn update received from gRPC.
   145  type ccUpdate struct {
   146  	state balancer.ClientConnState
   147  	err   error
   148  }
   149  
   150  type exitIdle struct{}
   151  
   152  // clusterResolverBalancer resolves endpoint addresses using a list of one or
   153  // more discovery mechanisms.
   154  type clusterResolverBalancer struct {
   155  	cc              balancer.ClientConn
   156  	bOpts           balancer.BuildOptions
   157  	updateCh        *buffer.Unbounded // Channel for updates from gRPC.
   158  	resourceWatcher *resourceResolver
   159  	logger          *grpclog.PrefixLogger
   160  	closed          *grpcsync.Event
   161  	done            *grpcsync.Event
   162  
   163  	priorityBuilder      balancer.Builder
   164  	priorityConfigParser balancer.ConfigParser
   165  
   166  	config          *LBConfig
   167  	configRaw       *serviceconfig.ParseResult
   168  	xdsClient       xdsclient.XDSClient    // xDS client to watch EDS resource.
   169  	attrsWithClient *attributes.Attributes // Attributes with xdsClient attached to be passed to the child policies.
   170  
   171  	child               balancer.Balancer
   172  	priorities          []priorityConfig
   173  	watchUpdateReceived bool
   174  }
   175  
   176  // handleClientConnUpdate handles a ClientConnUpdate received from gRPC.
   177  //
   178  // A good update results in creation of endpoint resolvers for the configured
   179  // discovery mechanisms. An update with an error results in cancellation of any
   180  // existing endpoint resolution and propagation of the same to the child policy.
   181  func (b *clusterResolverBalancer) handleClientConnUpdate(update *ccUpdate) {
   182  	if err := update.err; err != nil {
   183  		b.handleErrorFromUpdate(err, true)
   184  		return
   185  	}
   186  
   187  	b.logger.Infof("Received new balancer config: %v", pretty.ToJSON(update.state.BalancerConfig))
   188  	cfg, _ := update.state.BalancerConfig.(*LBConfig)
   189  	if cfg == nil {
   190  		b.logger.Warningf("Ignoring unsupported balancer configuration of type: %T", update.state.BalancerConfig)
   191  		return
   192  	}
   193  
   194  	b.config = cfg
   195  	b.configRaw = update.state.ResolverState.ServiceConfig
   196  	b.resourceWatcher.updateMechanisms(cfg.DiscoveryMechanisms)
   197  
   198  	// The child policy is created only after all configured discovery
   199  	// mechanisms have been successfully returned endpoints. If that is not the
   200  	// case, we return early.
   201  	if !b.watchUpdateReceived {
   202  		return
   203  	}
   204  	b.updateChildConfig()
   205  }
   206  
   207  // handleResourceUpdate handles a resource update or error from the resource
   208  // resolver by propagating the same to the child LB policy.
   209  func (b *clusterResolverBalancer) handleResourceUpdate(update *resourceUpdate) {
   210  	if err := update.err; err != nil {
   211  		b.handleErrorFromUpdate(err, false)
   212  		return
   213  	}
   214  
   215  	b.watchUpdateReceived = true
   216  	b.priorities = update.priorities
   217  
   218  	// An update from the resource resolver contains resolved endpoint addresses
   219  	// for all configured discovery mechanisms ordered by priority. This is used
   220  	// to generate configuration for the priority LB policy.
   221  	b.updateChildConfig()
   222  }
   223  
   224  // updateChildConfig builds child policy configuration using endpoint addresses
   225  // returned by the resource resolver and child policy configuration provided by
   226  // parent LB policy.
   227  //
   228  // A child policy is created if one doesn't already exist. The newly built
   229  // configuration is then pushed to the child policy.
   230  func (b *clusterResolverBalancer) updateChildConfig() {
   231  	if b.child == nil {
   232  		b.child = newChildBalancer(b.priorityBuilder, b.cc, b.bOpts)
   233  	}
   234  
   235  	childCfgBytes, addrs, err := buildPriorityConfigJSON(b.priorities, &b.config.xdsLBPolicy)
   236  	if err != nil {
   237  		b.logger.Warningf("Failed to build child policy config: %v", err)
   238  		return
   239  	}
   240  	childCfg, err := b.priorityConfigParser.ParseConfig(childCfgBytes)
   241  	if err != nil {
   242  		b.logger.Warningf("Failed to parse child policy config. This should never happen because the config was generated: %v", err)
   243  		return
   244  	}
   245  	b.logger.Infof("Built child policy config: %v", pretty.ToJSON(childCfg))
   246  
   247  	endpoints := make([]resolver.Endpoint, len(addrs))
   248  	for i, a := range addrs {
   249  		endpoints[i].Attributes = a.BalancerAttributes
   250  		endpoints[i].Addresses = []resolver.Address{a}
   251  		endpoints[i].Addresses[0].BalancerAttributes = nil
   252  	}
   253  	if err := b.child.UpdateClientConnState(balancer.ClientConnState{
   254  		ResolverState: resolver.State{
   255  			Endpoints:     endpoints,
   256  			Addresses:     addrs,
   257  			ServiceConfig: b.configRaw,
   258  			Attributes:    b.attrsWithClient,
   259  		},
   260  		BalancerConfig: childCfg,
   261  	}); err != nil {
   262  		b.logger.Warningf("Failed to push config to child policy: %v", err)
   263  	}
   264  }
   265  
   266  // handleErrorFromUpdate handles errors from the parent LB policy and endpoint
   267  // resolvers. fromParent is true if error is from the parent LB policy. In both
   268  // cases, the error is propagated to the child policy, if one exists.
   269  func (b *clusterResolverBalancer) handleErrorFromUpdate(err error, fromParent bool) {
   270  	b.logger.Warningf("Received error: %v", err)
   271  
   272  	// A resource-not-found error from the parent LB policy means that the LDS
   273  	// or CDS resource was removed. This should result in endpoint resolvers
   274  	// being stopped here.
   275  	//
   276  	// A resource-not-found error from the EDS endpoint resolver means that the
   277  	// EDS resource was removed. No action needs to be taken for this, and we
   278  	// should continue watching the same EDS resource.
   279  	if fromParent && xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound {
   280  		b.resourceWatcher.stop(false)
   281  	}
   282  
   283  	if b.child != nil {
   284  		b.child.ResolverError(err)
   285  		return
   286  	}
   287  	b.cc.UpdateState(balancer.State{
   288  		ConnectivityState: connectivity.TransientFailure,
   289  		Picker:            base.NewErrPicker(err),
   290  	})
   291  }
   292  
   293  // run is a long-running goroutine that handles updates from gRPC and endpoint
   294  // resolvers. The methods handling the individual updates simply push them onto
   295  // a channel which is read and acted upon from here.
   296  func (b *clusterResolverBalancer) run() {
   297  	for {
   298  		select {
   299  		case u, ok := <-b.updateCh.Get():
   300  			if !ok {
   301  				return
   302  			}
   303  			b.updateCh.Load()
   304  			switch update := u.(type) {
   305  			case *ccUpdate:
   306  				b.handleClientConnUpdate(update)
   307  			case exitIdle:
   308  				if b.child == nil {
   309  					b.logger.Errorf("xds: received ExitIdle with no child balancer")
   310  					break
   311  				}
   312  				// This implementation assumes the child balancer supports
   313  				// ExitIdle (but still checks for the interface's existence to
   314  				// avoid a panic if not).  If the child does not, no subconns
   315  				// will be connected.
   316  				if ei, ok := b.child.(balancer.ExitIdler); ok {
   317  					ei.ExitIdle()
   318  				}
   319  			}
   320  		case u := <-b.resourceWatcher.updateChannel:
   321  			b.handleResourceUpdate(u)
   322  
   323  		// Close results in stopping the endpoint resolvers and closing the
   324  		// underlying child policy and is the only way to exit this goroutine.
   325  		case <-b.closed.Done():
   326  			b.resourceWatcher.stop(true)
   327  
   328  			if b.child != nil {
   329  				b.child.Close()
   330  				b.child = nil
   331  			}
   332  			b.updateCh.Close()
   333  			// This is the *ONLY* point of return from this function.
   334  			b.logger.Infof("Shutdown")
   335  			b.done.Fire()
   336  			return
   337  		}
   338  	}
   339  }
   340  
   341  // Following are methods to implement the balancer interface.
   342  
   343  func (b *clusterResolverBalancer) UpdateClientConnState(state balancer.ClientConnState) error {
   344  	if b.closed.HasFired() {
   345  		b.logger.Warningf("Received update from gRPC {%+v} after close", state)
   346  		return errBalancerClosed
   347  	}
   348  
   349  	if b.xdsClient == nil {
   350  		c := xdsclient.FromResolverState(state.ResolverState)
   351  		if c == nil {
   352  			return balancer.ErrBadResolverState
   353  		}
   354  		b.xdsClient = c
   355  		b.attrsWithClient = state.ResolverState.Attributes
   356  	}
   357  
   358  	b.updateCh.Put(&ccUpdate{state: state})
   359  	return nil
   360  }
   361  
   362  // ResolverError handles errors reported by the xdsResolver.
   363  func (b *clusterResolverBalancer) ResolverError(err error) {
   364  	if b.closed.HasFired() {
   365  		b.logger.Warningf("Received resolver error {%v} after close", err)
   366  		return
   367  	}
   368  	b.updateCh.Put(&ccUpdate{err: err})
   369  }
   370  
   371  // UpdateSubConnState handles subConn updates from gRPC.
   372  func (b *clusterResolverBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   373  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   374  }
   375  
   376  // Close closes the cdsBalancer and the underlying child balancer.
   377  func (b *clusterResolverBalancer) Close() {
   378  	b.closed.Fire()
   379  	<-b.done.Done()
   380  }
   381  
   382  func (b *clusterResolverBalancer) ExitIdle() {
   383  	b.updateCh.Put(exitIdle{})
   384  }
   385  
   386  // ccWrapper overrides ResolveNow(), so that re-resolution from the child
   387  // policies will trigger the DNS resolver in cluster_resolver balancer.  It
   388  // also intercepts NewSubConn calls in case children don't set the
   389  // StateListener, to allow redirection to happen via this cluster_resolver
   390  // balancer.
   391  type ccWrapper struct {
   392  	balancer.ClientConn
   393  	b               *clusterResolverBalancer
   394  	resourceWatcher *resourceResolver
   395  }
   396  
   397  func (c *ccWrapper) ResolveNow(resolver.ResolveNowOptions) {
   398  	c.resourceWatcher.resolveNow()
   399  }