google.golang.org/grpc@v1.72.2/balancer/rls/balancer.go (about)

     1  /*
     2   *
     3   * Copyright 2020 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package rls implements the RLS LB policy.
    20  package rls
    21  
    22  import (
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  	"unsafe"
    30  
    31  	"google.golang.org/grpc/balancer"
    32  	"google.golang.org/grpc/connectivity"
    33  	estats "google.golang.org/grpc/experimental/stats"
    34  	"google.golang.org/grpc/grpclog"
    35  	"google.golang.org/grpc/internal"
    36  	"google.golang.org/grpc/internal/backoff"
    37  	"google.golang.org/grpc/internal/balancergroup"
    38  	"google.golang.org/grpc/internal/buffer"
    39  	internalgrpclog "google.golang.org/grpc/internal/grpclog"
    40  	"google.golang.org/grpc/internal/grpcsync"
    41  	"google.golang.org/grpc/internal/pretty"
    42  	"google.golang.org/grpc/resolver"
    43  )
    44  
    45  const (
    46  	// Name is the name of the RLS LB policy.
    47  	//
    48  	// It currently has an experimental suffix which would be removed once
    49  	// end-to-end testing of the policy is completed.
    50  	Name = internal.RLSLoadBalancingPolicyName
    51  	// Default frequency for data cache purging.
    52  	periodicCachePurgeFreq = time.Minute
    53  )
    54  
    55  var (
    56  	logger            = grpclog.Component("rls")
    57  	errBalancerClosed = errors.New("rls LB policy is closed")
    58  
    59  	// Below defined vars for overriding in unit tests.
    60  
    61  	// Default exponential backoff strategy for data cache entries.
    62  	defaultBackoffStrategy = backoff.Strategy(backoff.DefaultExponential)
    63  	// Ticker used for periodic data cache purging.
    64  	dataCachePurgeTicker = func() *time.Ticker { return time.NewTicker(periodicCachePurgeFreq) }
    65  	// We want every cache entry to live in the cache for at least this
    66  	// duration. If we encounter a cache entry whose minimum expiration time is
    67  	// in the future, we abort the LRU pass, which may temporarily leave the
    68  	// cache being too large. This is necessary to ensure that in cases where
    69  	// the cache is too small, when we receive an RLS Response, we keep the
    70  	// resulting cache entry around long enough for the pending incoming
    71  	// requests to be re-processed through the new Picker. If we didn't do this,
    72  	// then we'd risk throwing away each RLS response as we receive it, in which
    73  	// case we would fail to actually route any of our incoming requests.
    74  	minEvictDuration = 5 * time.Second
    75  
    76  	// Following functions are no-ops in actual code, but can be overridden in
    77  	// tests to give tests visibility into exactly when certain events happen.
    78  	clientConnUpdateHook = func() {}
    79  	dataCachePurgeHook   = func() {}
    80  	resetBackoffHook     = func() {}
    81  
    82  	cacheEntriesMetric = estats.RegisterInt64Gauge(estats.MetricDescriptor{
    83  		Name:        "grpc.lb.rls.cache_entries",
    84  		Description: "EXPERIMENTAL. Number of entries in the RLS cache.",
    85  		Unit:        "entry",
    86  		Labels:      []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.instance_uuid"},
    87  		Default:     false,
    88  	})
    89  	cacheSizeMetric = estats.RegisterInt64Gauge(estats.MetricDescriptor{
    90  		Name:        "grpc.lb.rls.cache_size",
    91  		Description: "EXPERIMENTAL. The current size of the RLS cache.",
    92  		Unit:        "By",
    93  		Labels:      []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.instance_uuid"},
    94  		Default:     false,
    95  	})
    96  	defaultTargetPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
    97  		Name:        "grpc.lb.rls.default_target_picks",
    98  		Description: "EXPERIMENTAL. Number of LB picks sent to the default target.",
    99  		Unit:        "pick",
   100  		Labels:      []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.data_plane_target", "grpc.lb.pick_result"},
   101  		Default:     false,
   102  	})
   103  	targetPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
   104  		Name:        "grpc.lb.rls.target_picks",
   105  		Description: "EXPERIMENTAL. Number of LB picks sent to each RLS target. Note that if the default target is also returned by the RLS server, RPCs sent to that target from the cache will be counted in this metric, not in grpc.rls.default_target_picks.",
   106  		Unit:        "pick",
   107  		Labels:      []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.data_plane_target", "grpc.lb.pick_result"},
   108  		Default:     false,
   109  	})
   110  	failedPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
   111  		Name:        "grpc.lb.rls.failed_picks",
   112  		Description: "EXPERIMENTAL. Number of LB picks failed due to either a failed RLS request or the RLS channel being throttled.",
   113  		Unit:        "pick",
   114  		Labels:      []string{"grpc.target", "grpc.lb.rls.server_target"},
   115  		Default:     false,
   116  	})
   117  )
   118  
   119  func init() {
   120  	balancer.Register(&rlsBB{})
   121  }
   122  
   123  type rlsBB struct{}
   124  
   125  func (rlsBB) Name() string {
   126  	return Name
   127  }
   128  
   129  func (rlsBB) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
   130  	lb := &rlsBalancer{
   131  		closed:             grpcsync.NewEvent(),
   132  		done:               grpcsync.NewEvent(),
   133  		cc:                 cc,
   134  		bopts:              opts,
   135  		purgeTicker:        dataCachePurgeTicker(),
   136  		dataCachePurgeHook: dataCachePurgeHook,
   137  		lbCfg:              &lbConfig{},
   138  		pendingMap:         make(map[cacheKey]*backoffState),
   139  		childPolicies:      make(map[string]*childPolicyWrapper),
   140  		updateCh:           buffer.NewUnbounded(),
   141  	}
   142  	lb.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-experimental-lb %p] ", lb))
   143  	lb.dataCache = newDataCache(maxCacheSize, lb.logger, cc.MetricsRecorder(), opts.Target.String())
   144  	lb.bg = balancergroup.New(balancergroup.Options{
   145  		CC:                      cc,
   146  		BuildOpts:               opts,
   147  		StateAggregator:         lb,
   148  		Logger:                  lb.logger,
   149  		SubBalancerCloseTimeout: time.Duration(0), // Disable caching of removed child policies
   150  	})
   151  	go lb.run()
   152  	return lb
   153  }
   154  
   155  // rlsBalancer implements the RLS LB policy.
   156  type rlsBalancer struct {
   157  	closed             *grpcsync.Event // Fires when Close() is invoked. Guarded by stateMu.
   158  	done               *grpcsync.Event // Fires when Close() is done.
   159  	cc                 balancer.ClientConn
   160  	bopts              balancer.BuildOptions
   161  	purgeTicker        *time.Ticker
   162  	dataCachePurgeHook func()
   163  	logger             *internalgrpclog.PrefixLogger
   164  
   165  	// If both cacheMu and stateMu need to be acquired, the former must be
   166  	// acquired first to prevent a deadlock. This order restriction is due to the
   167  	// fact that in places where we need to acquire both the locks, we always
   168  	// start off reading the cache.
   169  
   170  	// cacheMu guards access to the data cache and pending requests map. We
   171  	// cannot use an RWMutex here since even an operation like
   172  	// dataCache.getEntry() modifies the underlying LRU, which is implemented as
   173  	// a doubly linked list.
   174  	cacheMu    sync.Mutex
   175  	dataCache  *dataCache                 // Cache of RLS data.
   176  	pendingMap map[cacheKey]*backoffState // Map of pending RLS requests.
   177  
   178  	// stateMu guards access to all LB policy state.
   179  	stateMu            sync.Mutex
   180  	lbCfg              *lbConfig        // Most recently received service config.
   181  	childPolicyBuilder balancer.Builder // Cached child policy builder.
   182  	resolverState      resolver.State   // Cached resolver state.
   183  	ctrlCh             *controlChannel  // Control channel to the RLS server.
   184  	bg                 *balancergroup.BalancerGroup
   185  	childPolicies      map[string]*childPolicyWrapper
   186  	defaultPolicy      *childPolicyWrapper
   187  	// A reference to the most recent picker sent to gRPC as part of a state
   188  	// update is cached in this field so that we can release the reference to the
   189  	// default child policy wrapper when a new picker is created. See
   190  	// sendNewPickerLocked() for details.
   191  	lastPicker *rlsPicker
   192  	// Set during UpdateClientConnState when pushing updates to child policies.
   193  	// Prevents state updates from child policies causing new pickers to be sent
   194  	// up the channel. Cleared after all child policies have processed the
   195  	// updates sent to them, after which a new picker is sent up the channel.
   196  	inhibitPickerUpdates bool
   197  
   198  	// Channel on which all updates are pushed. Processed in run().
   199  	updateCh *buffer.Unbounded
   200  }
   201  
   202  type resumePickerUpdates struct {
   203  	done chan struct{}
   204  }
   205  
   206  // childPolicyIDAndState wraps a child policy id and its state update.
   207  type childPolicyIDAndState struct {
   208  	id    string
   209  	state balancer.State
   210  }
   211  
   212  type controlChannelReady struct{}
   213  
   214  // run is a long-running goroutine which handles all the updates that the
   215  // balancer wishes to handle. The appropriate updateHandler will push the update
   216  // on to a channel that this goroutine will select on, thereby the handling of
   217  // the update will happen asynchronously.
   218  func (b *rlsBalancer) run() {
   219  	// We exit out of the for loop below only after `Close()` has been invoked.
   220  	// Firing the done event here will ensure that Close() returns only after
   221  	// all goroutines are done.
   222  	defer func() { b.done.Fire() }()
   223  
   224  	// Wait for purgeDataCache() goroutine to exit before returning from here.
   225  	doneCh := make(chan struct{})
   226  	defer func() {
   227  		<-doneCh
   228  	}()
   229  	go b.purgeDataCache(doneCh)
   230  
   231  	for {
   232  		select {
   233  		case u, ok := <-b.updateCh.Get():
   234  			if !ok {
   235  				return
   236  			}
   237  			b.updateCh.Load()
   238  			switch update := u.(type) {
   239  			case childPolicyIDAndState:
   240  				b.handleChildPolicyStateUpdate(update.id, update.state)
   241  			case controlChannelReady:
   242  				b.logger.Infof("Resetting backoff state after control channel getting back to READY")
   243  				b.cacheMu.Lock()
   244  				updatePicker := b.dataCache.resetBackoffState(&backoffState{bs: defaultBackoffStrategy})
   245  				b.cacheMu.Unlock()
   246  				if updatePicker {
   247  					b.sendNewPicker()
   248  				}
   249  				resetBackoffHook()
   250  			case resumePickerUpdates:
   251  				b.stateMu.Lock()
   252  				b.logger.Infof("Resuming picker updates after config propagation to child policies")
   253  				b.inhibitPickerUpdates = false
   254  				b.sendNewPickerLocked()
   255  				close(update.done)
   256  				b.stateMu.Unlock()
   257  			default:
   258  				b.logger.Errorf("Unsupported update type %T", update)
   259  			}
   260  		case <-b.closed.Done():
   261  			return
   262  		}
   263  	}
   264  }
   265  
   266  // purgeDataCache is a long-running goroutine which periodically deletes expired
   267  // entries. An expired entry is one for which both the expiryTime and
   268  // backoffExpiryTime are in the past.
   269  func (b *rlsBalancer) purgeDataCache(doneCh chan struct{}) {
   270  	defer close(doneCh)
   271  
   272  	for {
   273  		select {
   274  		case <-b.closed.Done():
   275  			return
   276  		case <-b.purgeTicker.C:
   277  			b.cacheMu.Lock()
   278  			updatePicker := b.dataCache.evictExpiredEntries()
   279  			b.cacheMu.Unlock()
   280  			if updatePicker {
   281  				b.sendNewPicker()
   282  			}
   283  			b.dataCachePurgeHook()
   284  		}
   285  	}
   286  }
   287  
   288  func (b *rlsBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error {
   289  	defer clientConnUpdateHook()
   290  
   291  	b.stateMu.Lock()
   292  	if b.closed.HasFired() {
   293  		b.stateMu.Unlock()
   294  		b.logger.Warningf("Received service config after balancer close: %s", pretty.ToJSON(ccs.BalancerConfig))
   295  		return errBalancerClosed
   296  	}
   297  
   298  	newCfg := ccs.BalancerConfig.(*lbConfig)
   299  	if b.lbCfg.Equal(newCfg) {
   300  		b.stateMu.Unlock()
   301  		b.logger.Infof("New service config matches existing config")
   302  		return nil
   303  	}
   304  
   305  	b.logger.Infof("Delaying picker updates until config is propagated to and processed by child policies")
   306  	b.inhibitPickerUpdates = true
   307  
   308  	// When the RLS server name changes, the old control channel needs to be
   309  	// swapped out for a new one. All state associated with the throttling
   310  	// algorithm is stored on a per-control-channel basis; when we swap out
   311  	// channels, we also swap out the throttling state.
   312  	b.handleControlChannelUpdate(newCfg)
   313  
   314  	// Any changes to child policy name or configuration needs to be handled by
   315  	// either creating new child policies or pushing updates to existing ones.
   316  	b.resolverState = ccs.ResolverState
   317  	b.handleChildPolicyConfigUpdate(newCfg, &ccs)
   318  
   319  	// Resize the cache if the size in the config has changed.
   320  	resizeCache := newCfg.cacheSizeBytes != b.lbCfg.cacheSizeBytes
   321  
   322  	// Update the copy of the config in the LB policy before releasing the lock.
   323  	b.lbCfg = newCfg
   324  	b.stateMu.Unlock()
   325  
   326  	// We cannot do cache operations above because `cacheMu` needs to be grabbed
   327  	// before `stateMu` if we are to hold both locks at the same time.
   328  	b.cacheMu.Lock()
   329  	b.dataCache.updateRLSServerTarget(newCfg.lookupService)
   330  	if resizeCache {
   331  		// If the new config changes reduces the size of the data cache, we
   332  		// might have to evict entries to get the cache size down to the newly
   333  		// specified size. If we do evict an entry with valid backoff timer,
   334  		// the new picker needs to be sent to the channel to re-process any
   335  		// RPCs queued as a result of this backoff timer.
   336  		b.dataCache.resize(newCfg.cacheSizeBytes)
   337  	}
   338  	b.cacheMu.Unlock()
   339  	// Enqueue an event which will notify us when the above update has been
   340  	// propagated to all child policies, and the child policies have all
   341  	// processed their updates, and we have sent a picker update.
   342  	done := make(chan struct{})
   343  	b.updateCh.Put(resumePickerUpdates{done: done})
   344  	<-done
   345  	return nil
   346  }
   347  
   348  // handleControlChannelUpdate handles updates to service config fields which
   349  // influence the control channel to the RLS server.
   350  //
   351  // Caller must hold lb.stateMu.
   352  func (b *rlsBalancer) handleControlChannelUpdate(newCfg *lbConfig) {
   353  	if newCfg.lookupService == b.lbCfg.lookupService && newCfg.lookupServiceTimeout == b.lbCfg.lookupServiceTimeout {
   354  		return
   355  	}
   356  
   357  	// Create a new control channel and close the existing one.
   358  	b.logger.Infof("Creating control channel to RLS server at: %v", newCfg.lookupService)
   359  	backToReadyFn := func() {
   360  		b.updateCh.Put(controlChannelReady{})
   361  	}
   362  	ctrlCh, err := newControlChannel(newCfg.lookupService, newCfg.controlChannelServiceConfig, newCfg.lookupServiceTimeout, b.bopts, backToReadyFn)
   363  	if err != nil {
   364  		// This is very uncommon and usually represents a non-transient error.
   365  		// There is not much we can do here other than wait for another update
   366  		// which might fix things.
   367  		b.logger.Errorf("Failed to create control channel to %q: %v", newCfg.lookupService, err)
   368  		return
   369  	}
   370  	if b.ctrlCh != nil {
   371  		b.ctrlCh.close()
   372  	}
   373  	b.ctrlCh = ctrlCh
   374  }
   375  
   376  // handleChildPolicyConfigUpdate handles updates to service config fields which
   377  // influence child policy configuration.
   378  //
   379  // Caller must hold lb.stateMu.
   380  func (b *rlsBalancer) handleChildPolicyConfigUpdate(newCfg *lbConfig, ccs *balancer.ClientConnState) {
   381  	// Update child policy builder first since other steps are dependent on this.
   382  	if b.childPolicyBuilder == nil || b.childPolicyBuilder.Name() != newCfg.childPolicyName {
   383  		b.logger.Infof("Child policy changed to %q", newCfg.childPolicyName)
   384  		b.childPolicyBuilder = balancer.Get(newCfg.childPolicyName)
   385  		for _, cpw := range b.childPolicies {
   386  			// If the child policy has changed, we need to remove the old policy
   387  			// from the BalancerGroup and add a new one. The BalancerGroup takes
   388  			// care of closing the old one in this case.
   389  			b.bg.Remove(cpw.target)
   390  			b.bg.Add(cpw.target, b.childPolicyBuilder)
   391  		}
   392  	}
   393  
   394  	configSentToDefault := false
   395  	if b.lbCfg.defaultTarget != newCfg.defaultTarget {
   396  		// If the default target has changed, create a new childPolicyWrapper for
   397  		// the new target if required. If a new wrapper is created, add it to the
   398  		// childPolicies map and the BalancerGroup.
   399  		b.logger.Infof("Default target in LB config changing from %q to %q", b.lbCfg.defaultTarget, newCfg.defaultTarget)
   400  		cpw := b.childPolicies[newCfg.defaultTarget]
   401  		if cpw == nil {
   402  			cpw = newChildPolicyWrapper(newCfg.defaultTarget)
   403  			b.childPolicies[newCfg.defaultTarget] = cpw
   404  			b.bg.Add(newCfg.defaultTarget, b.childPolicyBuilder)
   405  			b.logger.Infof("Child policy %q added to BalancerGroup", newCfg.defaultTarget)
   406  		}
   407  		if err := b.buildAndPushChildPolicyConfigs(newCfg.defaultTarget, newCfg, ccs); err != nil {
   408  			cpw.lamify(err)
   409  		}
   410  
   411  		// If an old default exists, release its reference. If this was the last
   412  		// reference, remove the child policy from the BalancerGroup and remove the
   413  		// corresponding entry the childPolicies map.
   414  		if b.defaultPolicy != nil {
   415  			if b.defaultPolicy.releaseRef() {
   416  				delete(b.childPolicies, b.lbCfg.defaultTarget)
   417  				b.bg.Remove(b.defaultPolicy.target)
   418  			}
   419  		}
   420  		b.defaultPolicy = cpw
   421  		configSentToDefault = true
   422  	}
   423  
   424  	// No change in configuration affecting child policies. Return early.
   425  	if b.lbCfg.childPolicyName == newCfg.childPolicyName && b.lbCfg.childPolicyTargetField == newCfg.childPolicyTargetField && childPolicyConfigEqual(b.lbCfg.childPolicyConfig, newCfg.childPolicyConfig) {
   426  		return
   427  	}
   428  
   429  	// If fields affecting child policy configuration have changed, the changes
   430  	// are pushed to the childPolicyWrapper which handles them appropriately.
   431  	for _, cpw := range b.childPolicies {
   432  		if configSentToDefault && cpw.target == newCfg.defaultTarget {
   433  			// Default target has already been taken care of.
   434  			continue
   435  		}
   436  		if err := b.buildAndPushChildPolicyConfigs(cpw.target, newCfg, ccs); err != nil {
   437  			cpw.lamify(err)
   438  		}
   439  	}
   440  }
   441  
   442  // buildAndPushChildPolicyConfigs builds the final child policy configuration by
   443  // adding the `targetField` to the base child policy configuration received in
   444  // RLS LB policy configuration. The `targetField` is set to target and
   445  // configuration is pushed to the child policy through the BalancerGroup.
   446  //
   447  // Caller must hold lb.stateMu.
   448  func (b *rlsBalancer) buildAndPushChildPolicyConfigs(target string, newCfg *lbConfig, ccs *balancer.ClientConnState) error {
   449  	jsonTarget, err := json.Marshal(target)
   450  	if err != nil {
   451  		return fmt.Errorf("failed to marshal child policy target %q: %v", target, err)
   452  	}
   453  
   454  	config := newCfg.childPolicyConfig
   455  	targetField := newCfg.childPolicyTargetField
   456  	config[targetField] = jsonTarget
   457  	jsonCfg, err := json.Marshal(config)
   458  	if err != nil {
   459  		return fmt.Errorf("failed to marshal child policy config %+v: %v", config, err)
   460  	}
   461  
   462  	parser, _ := b.childPolicyBuilder.(balancer.ConfigParser)
   463  	parsedCfg, err := parser.ParseConfig(jsonCfg)
   464  	if err != nil {
   465  		return fmt.Errorf("childPolicy config parsing failed: %v", err)
   466  	}
   467  
   468  	state := balancer.ClientConnState{ResolverState: ccs.ResolverState, BalancerConfig: parsedCfg}
   469  	b.logger.Infof("Pushing new state to child policy %q: %+v", target, state)
   470  	if err := b.bg.UpdateClientConnState(target, state); err != nil {
   471  		b.logger.Warningf("UpdateClientConnState(%q, %+v) failed : %v", target, ccs, err)
   472  	}
   473  	return nil
   474  }
   475  
   476  func (b *rlsBalancer) ResolverError(err error) {
   477  	b.bg.ResolverError(err)
   478  }
   479  
   480  func (b *rlsBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   481  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   482  }
   483  
   484  func (b *rlsBalancer) Close() {
   485  	b.stateMu.Lock()
   486  	b.closed.Fire()
   487  	b.purgeTicker.Stop()
   488  	if b.ctrlCh != nil {
   489  		b.ctrlCh.close()
   490  	}
   491  	b.bg.Close()
   492  	b.stateMu.Unlock()
   493  
   494  	b.cacheMu.Lock()
   495  	b.dataCache.stop()
   496  	b.cacheMu.Unlock()
   497  
   498  	b.updateCh.Close()
   499  
   500  	<-b.done.Done()
   501  }
   502  
   503  func (b *rlsBalancer) ExitIdle() {
   504  	b.bg.ExitIdle()
   505  }
   506  
   507  // sendNewPickerLocked pushes a new picker on to the channel.
   508  //
   509  // Note that regardless of what connectivity state is reported, the policy will
   510  // return its own picker, and not a picker that unconditionally queues
   511  // (typically used for IDLE or CONNECTING) or a picker that unconditionally
   512  // fails (typically used for TRANSIENT_FAILURE). This is required because,
   513  // irrespective of the connectivity state, we need to able to perform RLS
   514  // lookups for incoming RPCs and affect the status of queued RPCs based on the
   515  // receipt of RLS responses.
   516  //
   517  // Caller must hold lb.stateMu.
   518  func (b *rlsBalancer) sendNewPickerLocked() {
   519  	aggregatedState := b.aggregatedConnectivityState()
   520  
   521  	// Acquire a separate reference for the picker. This is required to ensure
   522  	// that the wrapper held by the old picker is not closed when the default
   523  	// target changes in the config, and a new wrapper is created for the new
   524  	// default target. See handleChildPolicyConfigUpdate() for how config changes
   525  	// affecting the default target are handled.
   526  	if b.defaultPolicy != nil {
   527  		b.defaultPolicy.acquireRef()
   528  	}
   529  
   530  	picker := &rlsPicker{
   531  		kbm:             b.lbCfg.kbMap,
   532  		origEndpoint:    b.bopts.Target.Endpoint(),
   533  		lb:              b,
   534  		defaultPolicy:   b.defaultPolicy,
   535  		ctrlCh:          b.ctrlCh,
   536  		maxAge:          b.lbCfg.maxAge,
   537  		staleAge:        b.lbCfg.staleAge,
   538  		bg:              b.bg,
   539  		rlsServerTarget: b.lbCfg.lookupService,
   540  		grpcTarget:      b.bopts.Target.String(),
   541  		metricsRecorder: b.cc.MetricsRecorder(),
   542  	}
   543  	picker.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-picker %p] ", picker))
   544  	state := balancer.State{
   545  		ConnectivityState: aggregatedState,
   546  		Picker:            picker,
   547  	}
   548  
   549  	if !b.inhibitPickerUpdates {
   550  		b.logger.Infof("New balancer.State: %+v", state)
   551  		b.cc.UpdateState(state)
   552  	} else {
   553  		b.logger.Infof("Delaying picker update: %+v", state)
   554  	}
   555  
   556  	if b.lastPicker != nil {
   557  		if b.defaultPolicy != nil {
   558  			b.defaultPolicy.releaseRef()
   559  		}
   560  	}
   561  	b.lastPicker = picker
   562  }
   563  
   564  func (b *rlsBalancer) sendNewPicker() {
   565  	b.stateMu.Lock()
   566  	defer b.stateMu.Unlock()
   567  	if b.closed.HasFired() {
   568  		return
   569  	}
   570  	b.sendNewPickerLocked()
   571  }
   572  
   573  // The aggregated connectivity state reported is determined as follows:
   574  //   - If there is at least one child policy in state READY, the connectivity
   575  //     state is READY.
   576  //   - Otherwise, if there is at least one child policy in state CONNECTING, the
   577  //     connectivity state is CONNECTING.
   578  //   - Otherwise, if there is at least one child policy in state IDLE, the
   579  //     connectivity state is IDLE.
   580  //   - Otherwise, all child policies are in TRANSIENT_FAILURE, and the
   581  //     connectivity state is TRANSIENT_FAILURE.
   582  //
   583  // If the RLS policy has no child policies and no configured default target,
   584  // then we will report connectivity state IDLE.
   585  //
   586  // Caller must hold lb.stateMu.
   587  func (b *rlsBalancer) aggregatedConnectivityState() connectivity.State {
   588  	if len(b.childPolicies) == 0 && b.lbCfg.defaultTarget == "" {
   589  		return connectivity.Idle
   590  	}
   591  
   592  	var readyN, connectingN, idleN int
   593  	for _, cpw := range b.childPolicies {
   594  		state := (*balancer.State)(atomic.LoadPointer(&cpw.state))
   595  		switch state.ConnectivityState {
   596  		case connectivity.Ready:
   597  			readyN++
   598  		case connectivity.Connecting:
   599  			connectingN++
   600  		case connectivity.Idle:
   601  			idleN++
   602  		}
   603  	}
   604  
   605  	switch {
   606  	case readyN > 0:
   607  		return connectivity.Ready
   608  	case connectingN > 0:
   609  		return connectivity.Connecting
   610  	case idleN > 0:
   611  		return connectivity.Idle
   612  	default:
   613  		return connectivity.TransientFailure
   614  	}
   615  }
   616  
   617  // UpdateState is a implementation of the balancergroup.BalancerStateAggregator
   618  // interface. The actual state aggregation functionality is handled
   619  // asynchronously. This method only pushes the state update on to channel read
   620  // and dispatched by the run() goroutine.
   621  func (b *rlsBalancer) UpdateState(id string, state balancer.State) {
   622  	b.updateCh.Put(childPolicyIDAndState{id: id, state: state})
   623  }
   624  
   625  // handleChildPolicyStateUpdate provides the state aggregator functionality for
   626  // the BalancerGroup.
   627  //
   628  // This method is invoked by the BalancerGroup whenever a child policy sends a
   629  // state update. We cache the child policy's connectivity state and picker for
   630  // two reasons:
   631  //   - to suppress connectivity state transitions from TRANSIENT_FAILURE to states
   632  //     other than READY
   633  //   - to delegate picks to child policies
   634  func (b *rlsBalancer) handleChildPolicyStateUpdate(id string, newState balancer.State) {
   635  	b.stateMu.Lock()
   636  	defer b.stateMu.Unlock()
   637  
   638  	cpw := b.childPolicies[id]
   639  	if cpw == nil {
   640  		// All child policies start with an entry in the map. If ID is not in
   641  		// map, it's either been removed, or never existed.
   642  		b.logger.Warningf("Received state update %+v for missing child policy %q", newState, id)
   643  		return
   644  	}
   645  
   646  	oldState := (*balancer.State)(atomic.LoadPointer(&cpw.state))
   647  	if oldState.ConnectivityState == connectivity.TransientFailure && newState.ConnectivityState == connectivity.Connecting {
   648  		// Ignore state transitions from TRANSIENT_FAILURE to CONNECTING, and thus
   649  		// fail pending RPCs instead of queuing them indefinitely when all
   650  		// subChannels are failing, even if the subChannels are bouncing back and
   651  		// forth between CONNECTING and TRANSIENT_FAILURE.
   652  		return
   653  	}
   654  	atomic.StorePointer(&cpw.state, unsafe.Pointer(&newState))
   655  	b.logger.Infof("Child policy %q has new state %+v", id, newState)
   656  	b.sendNewPickerLocked()
   657  }
   658  
   659  // acquireChildPolicyReferences attempts to acquire references to
   660  // childPolicyWrappers corresponding to the passed in targets. If there is no
   661  // childPolicyWrapper corresponding to one of the targets, a new one is created
   662  // and added to the BalancerGroup.
   663  func (b *rlsBalancer) acquireChildPolicyReferences(targets []string) []*childPolicyWrapper {
   664  	b.stateMu.Lock()
   665  	var newChildPolicies []*childPolicyWrapper
   666  	for _, target := range targets {
   667  		// If the target exists in the LB policy's childPolicies map. a new
   668  		// reference is taken here and added to the new list.
   669  		if cpw := b.childPolicies[target]; cpw != nil {
   670  			cpw.acquireRef()
   671  			newChildPolicies = append(newChildPolicies, cpw)
   672  			continue
   673  		}
   674  
   675  		// If the target does not exist in the child policy map, then a new
   676  		// child policy wrapper is created and added to the new list.
   677  		cpw := newChildPolicyWrapper(target)
   678  		b.childPolicies[target] = cpw
   679  		b.bg.Add(target, b.childPolicyBuilder)
   680  		b.logger.Infof("Child policy %q added to BalancerGroup", target)
   681  		newChildPolicies = append(newChildPolicies, cpw)
   682  		if err := b.buildAndPushChildPolicyConfigs(target, b.lbCfg, &balancer.ClientConnState{
   683  			ResolverState: b.resolverState,
   684  		}); err != nil {
   685  			cpw.lamify(err)
   686  		}
   687  	}
   688  	b.stateMu.Unlock()
   689  	return newChildPolicies
   690  }
   691  
   692  // releaseChildPolicyReferences releases references to childPolicyWrappers
   693  // corresponding to the passed in targets. If the release reference was the last
   694  // one, the child policy is removed from the BalancerGroup.
   695  func (b *rlsBalancer) releaseChildPolicyReferences(targets []string) {
   696  	b.stateMu.Lock()
   697  	for _, target := range targets {
   698  		if cpw := b.childPolicies[target]; cpw.releaseRef() {
   699  			delete(b.childPolicies, cpw.target)
   700  			b.bg.Remove(cpw.target)
   701  		}
   702  	}
   703  	b.stateMu.Unlock()
   704  }