github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/grpc/internal/balancergroup/balancergroup.go (about)

     1  /*
     2   * Copyright 2019 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package balancergroup implements a utility struct to bind multiple balancers
    18  // into one balancer.
    19  package balancergroup
    20  
    21  import (
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/hxx258456/ccgo/grpc/balancer"
    27  	"github.com/hxx258456/ccgo/grpc/connectivity"
    28  	"github.com/hxx258456/ccgo/grpc/internal/cache"
    29  	"github.com/hxx258456/ccgo/grpc/internal/grpclog"
    30  	"github.com/hxx258456/ccgo/grpc/resolver"
    31  )
    32  
    33  // subBalancerWrapper is used to keep the configurations that will be used to start
    34  // the underlying balancer. It can be called to start/stop the underlying
    35  // balancer.
    36  //
    37  // When the config changes, it will pass the update to the underlying balancer
    38  // if it exists.
    39  //
    40  // TODO: move to a separate file?
    41  type subBalancerWrapper struct {
    42  	// subBalancerWrapper is passed to the sub-balancer as a ClientConn
    43  	// wrapper, only to keep the state and picker.  When sub-balancer is
    44  	// restarted while in cache, the picker needs to be resent.
    45  	//
    46  	// It also contains the sub-balancer ID, so the parent balancer group can
    47  	// keep track of SubConn/pickers and the sub-balancers they belong to. Some
    48  	// of the actions are forwarded to the parent ClientConn with no change.
    49  	// Some are forward to balancer group with the sub-balancer ID.
    50  	balancer.ClientConn
    51  	id    string
    52  	group *BalancerGroup
    53  
    54  	mu    sync.Mutex
    55  	state balancer.State
    56  
    57  	// The static part of sub-balancer. Keeps balancerBuilders and addresses.
    58  	// To be used when restarting sub-balancer.
    59  	builder balancer.Builder
    60  	// Options to be passed to sub-balancer at the time of creation.
    61  	buildOpts balancer.BuildOptions
    62  	// ccState is a cache of the addresses/balancer config, so when the balancer
    63  	// is restarted after close, it will get the previous update. It's a pointer
    64  	// and is set to nil at init, so when the balancer is built for the first
    65  	// time (not a restart), it won't receive an empty update. Note that this
    66  	// isn't reset to nil when the underlying balancer is closed.
    67  	ccState *balancer.ClientConnState
    68  	// The dynamic part of sub-balancer. Only used when balancer group is
    69  	// started. Gets cleared when sub-balancer is closed.
    70  	balancer balancer.Balancer
    71  }
    72  
    73  // UpdateState overrides balancer.ClientConn, to keep state and picker.
    74  func (sbc *subBalancerWrapper) UpdateState(state balancer.State) {
    75  	sbc.mu.Lock()
    76  	sbc.state = state
    77  	sbc.group.updateBalancerState(sbc.id, state)
    78  	sbc.mu.Unlock()
    79  }
    80  
    81  // NewSubConn overrides balancer.ClientConn, so balancer group can keep track of
    82  // the relation between subconns and sub-balancers.
    83  func (sbc *subBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
    84  	return sbc.group.newSubConn(sbc, addrs, opts)
    85  }
    86  
    87  func (sbc *subBalancerWrapper) updateBalancerStateWithCachedPicker() {
    88  	sbc.mu.Lock()
    89  	if sbc.state.Picker != nil {
    90  		sbc.group.updateBalancerState(sbc.id, sbc.state)
    91  	}
    92  	sbc.mu.Unlock()
    93  }
    94  
    95  func (sbc *subBalancerWrapper) startBalancer() {
    96  	b := sbc.builder.Build(sbc, sbc.buildOpts)
    97  	sbc.group.logger.Infof("Created child policy %p of type %v", b, sbc.builder.Name())
    98  	sbc.balancer = b
    99  	if sbc.ccState != nil {
   100  		b.UpdateClientConnState(*sbc.ccState)
   101  	}
   102  }
   103  
   104  // exitIdle invokes the sub-balancer's ExitIdle method. Returns a boolean
   105  // indicating whether or not the operation was completed.
   106  func (sbc *subBalancerWrapper) exitIdle() (complete bool) {
   107  	b := sbc.balancer
   108  	if b == nil {
   109  		return true
   110  	}
   111  	if ei, ok := b.(balancer.ExitIdler); ok {
   112  		ei.ExitIdle()
   113  		return true
   114  	}
   115  	return false
   116  }
   117  
   118  func (sbc *subBalancerWrapper) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   119  	b := sbc.balancer
   120  	if b == nil {
   121  		// This sub-balancer was closed. This can happen when EDS removes a
   122  		// locality. The balancer for this locality was already closed, and the
   123  		// SubConns are being deleted. But SubConn state change can still
   124  		// happen.
   125  		return
   126  	}
   127  	b.UpdateSubConnState(sc, state)
   128  }
   129  
   130  func (sbc *subBalancerWrapper) updateClientConnState(s balancer.ClientConnState) error {
   131  	sbc.ccState = &s
   132  	b := sbc.balancer
   133  	if b == nil {
   134  		// This sub-balancer was closed. This should never happen because
   135  		// sub-balancers are closed when the locality is removed from EDS, or
   136  		// the balancer group is closed. There should be no further address
   137  		// updates when either of this happened.
   138  		//
   139  		// This will be a common case with priority support, because a
   140  		// sub-balancer (and the whole balancer group) could be closed because
   141  		// it's the lower priority, but it can still get address updates.
   142  		return nil
   143  	}
   144  	return b.UpdateClientConnState(s)
   145  }
   146  
   147  func (sbc *subBalancerWrapper) resolverError(err error) {
   148  	b := sbc.balancer
   149  	if b == nil {
   150  		// This sub-balancer was closed. This should never happen because
   151  		// sub-balancers are closed when the locality is removed from EDS, or
   152  		// the balancer group is closed. There should be no further address
   153  		// updates when either of this happened.
   154  		//
   155  		// This will be a common case with priority support, because a
   156  		// sub-balancer (and the whole balancer group) could be closed because
   157  		// it's the lower priority, but it can still get address updates.
   158  		return
   159  	}
   160  	b.ResolverError(err)
   161  }
   162  
   163  func (sbc *subBalancerWrapper) stopBalancer() {
   164  	sbc.balancer.Close()
   165  	sbc.balancer = nil
   166  }
   167  
   168  // BalancerGroup takes a list of balancers, and make them into one balancer.
   169  //
   170  // Note that this struct doesn't implement balancer.Balancer, because it's not
   171  // intended to be used directly as a balancer. It's expected to be used as a
   172  // sub-balancer manager by a high level balancer.
   173  //
   174  // Updates from ClientConn are forwarded to sub-balancers
   175  //  - service config update
   176  //  - address update
   177  //  - subConn state change
   178  //     - find the corresponding balancer and forward
   179  //
   180  // Actions from sub-balances are forwarded to parent ClientConn
   181  //  - new/remove SubConn
   182  //  - picker update and health states change
   183  //     - sub-pickers are sent to an aggregator provided by the parent, which
   184  //     will group them into a group-picker. The aggregated connectivity state is
   185  //     also handled by the aggregator.
   186  //  - resolveNow
   187  //
   188  // Sub-balancers are only built when the balancer group is started. If the
   189  // balancer group is closed, the sub-balancers are also closed. And it's
   190  // guaranteed that no updates will be sent to parent ClientConn from a closed
   191  // balancer group.
   192  type BalancerGroup struct {
   193  	cc        balancer.ClientConn
   194  	buildOpts balancer.BuildOptions
   195  	logger    *grpclog.PrefixLogger
   196  
   197  	// stateAggregator is where the state/picker updates will be sent to. It's
   198  	// provided by the parent balancer, to build a picker with all the
   199  	// sub-pickers.
   200  	stateAggregator BalancerStateAggregator
   201  
   202  	// outgoingMu guards all operations in the direction:
   203  	// ClientConn-->Sub-balancer. Including start, stop, resolver updates and
   204  	// SubConn state changes.
   205  	//
   206  	// The corresponding boolean outgoingStarted is used to stop further updates
   207  	// to sub-balancers after they are closed.
   208  	outgoingMu         sync.Mutex
   209  	outgoingStarted    bool
   210  	idToBalancerConfig map[string]*subBalancerWrapper
   211  	// Cache for sub-balancers when they are removed.
   212  	balancerCache *cache.TimeoutCache
   213  
   214  	// incomingMu is to make sure this balancer group doesn't send updates to cc
   215  	// after it's closed.
   216  	//
   217  	// We don't share the mutex to avoid deadlocks (e.g. a call to sub-balancer
   218  	// may call back to balancer group inline. It causes deaclock if they
   219  	// require the same mutex).
   220  	//
   221  	// We should never need to hold multiple locks at the same time in this
   222  	// struct. The case where two locks are held can only happen when the
   223  	// underlying balancer calls back into balancer group inline. So there's an
   224  	// implicit lock acquisition order that outgoingMu is locked before
   225  	// incomingMu.
   226  
   227  	// incomingMu guards all operations in the direction:
   228  	// Sub-balancer-->ClientConn. Including NewSubConn, RemoveSubConn. It also
   229  	// guards the map from SubConn to balancer ID, so updateSubConnState needs
   230  	// to hold it shortly to find the sub-balancer to forward the update.
   231  	//
   232  	// UpdateState is called by the balancer state aggretator, and it will
   233  	// decide when and whether to call.
   234  	//
   235  	// The corresponding boolean incomingStarted is used to stop further updates
   236  	// from sub-balancers after they are closed.
   237  	incomingMu      sync.Mutex
   238  	incomingStarted bool // This boolean only guards calls back to ClientConn.
   239  	scToSubBalancer map[balancer.SubConn]*subBalancerWrapper
   240  }
   241  
   242  // DefaultSubBalancerCloseTimeout is defined as a variable instead of const for
   243  // testing.
   244  //
   245  // TODO: make it a parameter for New().
   246  var DefaultSubBalancerCloseTimeout = 15 * time.Minute
   247  
   248  // New creates a new BalancerGroup. Note that the BalancerGroup
   249  // needs to be started to work.
   250  func New(cc balancer.ClientConn, bOpts balancer.BuildOptions, stateAggregator BalancerStateAggregator, logger *grpclog.PrefixLogger) *BalancerGroup {
   251  	return &BalancerGroup{
   252  		cc:              cc,
   253  		buildOpts:       bOpts,
   254  		logger:          logger,
   255  		stateAggregator: stateAggregator,
   256  
   257  		idToBalancerConfig: make(map[string]*subBalancerWrapper),
   258  		balancerCache:      cache.NewTimeoutCache(DefaultSubBalancerCloseTimeout),
   259  		scToSubBalancer:    make(map[balancer.SubConn]*subBalancerWrapper),
   260  	}
   261  }
   262  
   263  // Start starts the balancer group, including building all the sub-balancers,
   264  // and send the existing addresses to them.
   265  //
   266  // A BalancerGroup can be closed and started later. When a BalancerGroup is
   267  // closed, it can still receive address updates, which will be applied when
   268  // restarted.
   269  func (bg *BalancerGroup) Start() {
   270  	bg.incomingMu.Lock()
   271  	bg.incomingStarted = true
   272  	bg.incomingMu.Unlock()
   273  
   274  	bg.outgoingMu.Lock()
   275  	if bg.outgoingStarted {
   276  		bg.outgoingMu.Unlock()
   277  		return
   278  	}
   279  
   280  	for _, config := range bg.idToBalancerConfig {
   281  		config.startBalancer()
   282  	}
   283  	bg.outgoingStarted = true
   284  	bg.outgoingMu.Unlock()
   285  }
   286  
   287  // Add adds a balancer built by builder to the group, with given id.
   288  func (bg *BalancerGroup) Add(id string, builder balancer.Builder) {
   289  	// Store data in static map, and then check to see if bg is started.
   290  	bg.outgoingMu.Lock()
   291  	var sbc *subBalancerWrapper
   292  	// If outgoingStarted is true, search in the cache. Otherwise, cache is
   293  	// guaranteed to be empty, searching is unnecessary.
   294  	if bg.outgoingStarted {
   295  		if old, ok := bg.balancerCache.Remove(id); ok {
   296  			sbc, _ = old.(*subBalancerWrapper)
   297  			if sbc != nil && sbc.builder != builder {
   298  				// If the sub-balancer in cache was built with a different
   299  				// balancer builder, don't use it, cleanup this old-balancer,
   300  				// and behave as sub-balancer is not found in cache.
   301  				//
   302  				// NOTE that this will also drop the cached addresses for this
   303  				// sub-balancer, which seems to be reasonable.
   304  				sbc.stopBalancer()
   305  				// cleanupSubConns must be done before the new balancer starts,
   306  				// otherwise new SubConns created by the new balancer might be
   307  				// removed by mistake.
   308  				bg.cleanupSubConns(sbc)
   309  				sbc = nil
   310  			}
   311  		}
   312  	}
   313  	if sbc == nil {
   314  		sbc = &subBalancerWrapper{
   315  			ClientConn: bg.cc,
   316  			id:         id,
   317  			group:      bg,
   318  			builder:    builder,
   319  			buildOpts:  bg.buildOpts,
   320  		}
   321  		if bg.outgoingStarted {
   322  			// Only start the balancer if bg is started. Otherwise, we only keep the
   323  			// static data.
   324  			sbc.startBalancer()
   325  		}
   326  	} else {
   327  		// When brining back a sub-balancer from cache, re-send the cached
   328  		// picker and state.
   329  		sbc.updateBalancerStateWithCachedPicker()
   330  	}
   331  	bg.idToBalancerConfig[id] = sbc
   332  	bg.outgoingMu.Unlock()
   333  }
   334  
   335  // Remove removes the balancer with id from the group.
   336  //
   337  // But doesn't close the balancer. The balancer is kept in a cache, and will be
   338  // closed after timeout. Cleanup work (closing sub-balancer and removing
   339  // subconns) will be done after timeout.
   340  func (bg *BalancerGroup) Remove(id string) {
   341  	bg.outgoingMu.Lock()
   342  	if sbToRemove, ok := bg.idToBalancerConfig[id]; ok {
   343  		if bg.outgoingStarted {
   344  			bg.balancerCache.Add(id, sbToRemove, func() {
   345  				// After timeout, when sub-balancer is removed from cache, need
   346  				// to close the underlying sub-balancer, and remove all its
   347  				// subconns.
   348  				bg.outgoingMu.Lock()
   349  				if bg.outgoingStarted {
   350  					sbToRemove.stopBalancer()
   351  				}
   352  				bg.outgoingMu.Unlock()
   353  				bg.cleanupSubConns(sbToRemove)
   354  			})
   355  		}
   356  		delete(bg.idToBalancerConfig, id)
   357  	} else {
   358  		bg.logger.Infof("balancer group: trying to remove a non-existing locality from balancer group: %v", id)
   359  	}
   360  	bg.outgoingMu.Unlock()
   361  }
   362  
   363  // bg.remove(id) doesn't do cleanup for the sub-balancer. This function does
   364  // cleanup after the timeout.
   365  func (bg *BalancerGroup) cleanupSubConns(config *subBalancerWrapper) {
   366  	bg.incomingMu.Lock()
   367  	// Remove SubConns. This is only done after the balancer is
   368  	// actually closed.
   369  	//
   370  	// NOTE: if NewSubConn is called by this (closed) balancer later, the
   371  	// SubConn will be leaked. This shouldn't happen if the balancer
   372  	// implementation is correct. To make sure this never happens, we need to
   373  	// add another layer (balancer manager) between balancer group and the
   374  	// sub-balancers.
   375  	for sc, b := range bg.scToSubBalancer {
   376  		if b == config {
   377  			bg.cc.RemoveSubConn(sc)
   378  			delete(bg.scToSubBalancer, sc)
   379  		}
   380  	}
   381  	bg.incomingMu.Unlock()
   382  }
   383  
   384  // connect attempts to connect to all subConns belonging to sb.
   385  func (bg *BalancerGroup) connect(sb *subBalancerWrapper) {
   386  	bg.incomingMu.Lock()
   387  	for sc, b := range bg.scToSubBalancer {
   388  		if b == sb {
   389  			sc.Connect()
   390  		}
   391  	}
   392  	bg.incomingMu.Unlock()
   393  }
   394  
   395  // Following are actions from the parent grpc.ClientConn, forward to sub-balancers.
   396  
   397  // UpdateSubConnState handles the state for the subconn. It finds the
   398  // corresponding balancer and forwards the update.
   399  func (bg *BalancerGroup) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   400  	bg.incomingMu.Lock()
   401  	config, ok := bg.scToSubBalancer[sc]
   402  	if !ok {
   403  		bg.incomingMu.Unlock()
   404  		return
   405  	}
   406  	if state.ConnectivityState == connectivity.Shutdown {
   407  		// Only delete sc from the map when state changed to Shutdown.
   408  		delete(bg.scToSubBalancer, sc)
   409  	}
   410  	bg.incomingMu.Unlock()
   411  
   412  	bg.outgoingMu.Lock()
   413  	config.updateSubConnState(sc, state)
   414  	bg.outgoingMu.Unlock()
   415  }
   416  
   417  // UpdateClientConnState handles ClientState (including balancer config and
   418  // addresses) from resolver. It finds the balancer and forwards the update.
   419  func (bg *BalancerGroup) UpdateClientConnState(id string, s balancer.ClientConnState) error {
   420  	bg.outgoingMu.Lock()
   421  	defer bg.outgoingMu.Unlock()
   422  	if config, ok := bg.idToBalancerConfig[id]; ok {
   423  		return config.updateClientConnState(s)
   424  	}
   425  	return nil
   426  }
   427  
   428  // ResolverError forwards resolver errors to all sub-balancers.
   429  func (bg *BalancerGroup) ResolverError(err error) {
   430  	bg.outgoingMu.Lock()
   431  	for _, config := range bg.idToBalancerConfig {
   432  		config.resolverError(err)
   433  	}
   434  	bg.outgoingMu.Unlock()
   435  }
   436  
   437  // Following are actions from sub-balancers, forward to ClientConn.
   438  
   439  // newSubConn: forward to ClientConn, and also create a map from sc to balancer,
   440  // so state update will find the right balancer.
   441  //
   442  // One note about removing SubConn: only forward to ClientConn, but not delete
   443  // from map. Delete sc from the map only when state changes to Shutdown. Since
   444  // it's just forwarding the action, there's no need for a removeSubConn()
   445  // wrapper function.
   446  func (bg *BalancerGroup) newSubConn(config *subBalancerWrapper, addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
   447  	// NOTE: if balancer with id was already removed, this should also return
   448  	// error. But since we call balancer.stopBalancer when removing the balancer, this
   449  	// shouldn't happen.
   450  	bg.incomingMu.Lock()
   451  	if !bg.incomingStarted {
   452  		bg.incomingMu.Unlock()
   453  		return nil, fmt.Errorf("NewSubConn is called after balancer group is closed")
   454  	}
   455  	sc, err := bg.cc.NewSubConn(addrs, opts)
   456  	if err != nil {
   457  		bg.incomingMu.Unlock()
   458  		return nil, err
   459  	}
   460  	bg.scToSubBalancer[sc] = config
   461  	bg.incomingMu.Unlock()
   462  	return sc, nil
   463  }
   464  
   465  // updateBalancerState: forward the new state to balancer state aggregator. The
   466  // aggregator will create an aggregated picker and an aggregated connectivity
   467  // state, then forward to ClientConn.
   468  func (bg *BalancerGroup) updateBalancerState(id string, state balancer.State) {
   469  	bg.logger.Infof("Balancer state update from locality %v, new state: %+v", id, state)
   470  
   471  	// Send new state to the aggregator, without holding the incomingMu.
   472  	// incomingMu is to protect all calls to the parent ClientConn, this update
   473  	// doesn't necessary trigger a call to ClientConn, and should already be
   474  	// protected by aggregator's mutex if necessary.
   475  	if bg.stateAggregator != nil {
   476  		bg.stateAggregator.UpdateState(id, state)
   477  	}
   478  }
   479  
   480  // Close closes the balancer. It stops sub-balancers, and removes the subconns.
   481  // The BalancerGroup can be restarted later.
   482  func (bg *BalancerGroup) Close() {
   483  	bg.incomingMu.Lock()
   484  	if bg.incomingStarted {
   485  		bg.incomingStarted = false
   486  		// Also remove all SubConns.
   487  		for sc := range bg.scToSubBalancer {
   488  			bg.cc.RemoveSubConn(sc)
   489  			delete(bg.scToSubBalancer, sc)
   490  		}
   491  	}
   492  	bg.incomingMu.Unlock()
   493  
   494  	// Clear(true) runs clear function to close sub-balancers in cache. It
   495  	// must be called out of outgoing mutex.
   496  	bg.balancerCache.Clear(true)
   497  
   498  	bg.outgoingMu.Lock()
   499  	if bg.outgoingStarted {
   500  		bg.outgoingStarted = false
   501  		for _, config := range bg.idToBalancerConfig {
   502  			config.stopBalancer()
   503  		}
   504  	}
   505  	bg.outgoingMu.Unlock()
   506  }
   507  
   508  // ExitIdle should be invoked when the parent LB policy's ExitIdle is invoked.
   509  // It will trigger this on all sub-balancers, or reconnect their subconns if
   510  // not supported.
   511  func (bg *BalancerGroup) ExitIdle() {
   512  	bg.outgoingMu.Lock()
   513  	for _, config := range bg.idToBalancerConfig {
   514  		if !config.exitIdle() {
   515  			bg.connect(config)
   516  		}
   517  	}
   518  	bg.outgoingMu.Unlock()
   519  }
   520  
   521  // ExitIdleOne instructs the sub-balancer `id` to exit IDLE state, if
   522  // appropriate and possible.
   523  func (bg *BalancerGroup) ExitIdleOne(id string) {
   524  	bg.outgoingMu.Lock()
   525  	if config := bg.idToBalancerConfig[id]; config != nil {
   526  		if !config.exitIdle() {
   527  			bg.connect(config)
   528  		}
   529  	}
   530  	bg.outgoingMu.Unlock()
   531  }