dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/utils/balancergroup/balancergroup.go (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one or more
     3   * contributor license agreements.  See the NOTICE file distributed with
     4   * this work for additional information regarding copyright ownership.
     5   * The ASF licenses this file to You under the Apache License, Version 2.0
     6   * (the "License"); you may not use this file except in compliance with
     7   * the License.  You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  /*
    19   *
    20   * Copyright 2019 gRPC authors.
    21   *
    22   */
    23  
    24  // Package balancergroup implements a utility struct to bind multiple balancers
    25  // into one balancer.
    26  package balancergroup
    27  
    28  import (
    29  	"fmt"
    30  	"sync"
    31  	"time"
    32  )
    33  
    34  import (
    35  	dubbogoLogger "github.com/dubbogo/gost/log/logger"
    36  
    37  	"google.golang.org/grpc/balancer"
    38  
    39  	"google.golang.org/grpc/connectivity"
    40  
    41  	"google.golang.org/grpc/resolver"
    42  )
    43  
    44  import (
    45  	cache "dubbo.apache.org/dubbo-go/v3/xds/utils/xds_cache"
    46  )
    47  
    48  // subBalancerWrapper is used to keep the configurations that will be used to start
    49  // the underlying balancer. It can be called to start/stop the underlying
    50  // balancer.
    51  //
    52  // When the config changes, it will pass the update to the underlying balancer
    53  // if it exists.
    54  //
    55  // TODO: move to a separate file?
    56  type subBalancerWrapper struct {
    57  	// subBalancerWrapper is passed to the sub-balancer as a ClientConn
    58  	// wrapper, only to keep the state and picker.  When sub-balancer is
    59  	// restarted while in cache, the picker needs to be resent.
    60  	//
    61  	// It also contains the sub-balancer ID, so the parent balancer group can
    62  	// keep track of SubConn/pickers and the sub-balancers they belong to. Some
    63  	// of the actions are forwarded to the parent ClientConn with no change.
    64  	// Some are forward to balancer group with the sub-balancer ID.
    65  	balancer.ClientConn
    66  	id    string
    67  	group *BalancerGroup
    68  
    69  	mu    sync.Mutex
    70  	state balancer.State
    71  
    72  	// The static part of sub-balancer. Keeps balancerBuilders and addresses.
    73  	// To be used when restarting sub-balancer.
    74  	builder balancer.Builder
    75  	// Options to be passed to sub-balancer at the time of creation.
    76  	buildOpts balancer.BuildOptions
    77  	// ccState is a cache of the addresses/balancer config, so when the balancer
    78  	// is restarted after close, it will get the previous update. It's a pointer
    79  	// and is set to nil at init, so when the balancer is built for the first
    80  	// time (not a restart), it won't receive an empty update. Note that this
    81  	// isn't reset to nil when the underlying balancer is closed.
    82  	ccState *balancer.ClientConnState
    83  	// The dynamic part of sub-balancer. Only used when balancer group is
    84  	// started. Gets cleared when sub-balancer is closed.
    85  	balancer balancer.Balancer
    86  }
    87  
    88  // UpdateState overrides balancer.ClientConn, to keep state and picker.
    89  func (sbc *subBalancerWrapper) UpdateState(state balancer.State) {
    90  	sbc.mu.Lock()
    91  	sbc.state = state
    92  	sbc.group.updateBalancerState(sbc.id, state)
    93  	sbc.mu.Unlock()
    94  }
    95  
    96  // NewSubConn overrides balancer.ClientConn, so balancer group can keep track of
    97  // the relation between subconns and sub-balancers.
    98  func (sbc *subBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
    99  	return sbc.group.newSubConn(sbc, addrs, opts)
   100  }
   101  
   102  func (sbc *subBalancerWrapper) updateBalancerStateWithCachedPicker() {
   103  	sbc.mu.Lock()
   104  	if sbc.state.Picker != nil {
   105  		sbc.group.updateBalancerState(sbc.id, sbc.state)
   106  	}
   107  	sbc.mu.Unlock()
   108  }
   109  
   110  func (sbc *subBalancerWrapper) startBalancer() {
   111  	b := sbc.builder.Build(sbc, sbc.buildOpts)
   112  	sbc.group.logger.Infof("Created child policy %p of type %v", b, sbc.builder.Name())
   113  	sbc.balancer = b
   114  	if sbc.ccState != nil {
   115  		b.UpdateClientConnState(*sbc.ccState)
   116  	}
   117  }
   118  
   119  // exitIdle invokes the sub-balancer's ExitIdle method. Returns a boolean
   120  // indicating whether or not the operation was completed.
   121  func (sbc *subBalancerWrapper) exitIdle() (complete bool) {
   122  	b := sbc.balancer
   123  	if b == nil {
   124  		return true
   125  	}
   126  	if ei, ok := b.(balancer.ExitIdler); ok {
   127  		ei.ExitIdle()
   128  		return true
   129  	}
   130  	return false
   131  }
   132  
   133  func (sbc *subBalancerWrapper) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   134  	b := sbc.balancer
   135  	if b == nil {
   136  		// This sub-balancer was closed. This can happen when EDS removes a
   137  		// locality. The balancer for this locality was already closed, and the
   138  		// SubConns are being deleted. But SubConn state change can still
   139  		// happen.
   140  		return
   141  	}
   142  	b.UpdateSubConnState(sc, state)
   143  }
   144  
   145  func (sbc *subBalancerWrapper) updateClientConnState(s balancer.ClientConnState) error {
   146  	sbc.ccState = &s
   147  	b := sbc.balancer
   148  	if b == nil {
   149  		// This sub-balancer was closed. This should never happen because
   150  		// sub-balancers are closed when the locality is removed from EDS, or
   151  		// the balancer group is closed. There should be no further address
   152  		// updates when either of this happened.
   153  		//
   154  		// This will be a common case with priority support, because a
   155  		// sub-balancer (and the whole balancer group) could be closed because
   156  		// it's the lower priority, but it can still get address updates.
   157  		return nil
   158  	}
   159  	return b.UpdateClientConnState(s)
   160  }
   161  
   162  func (sbc *subBalancerWrapper) resolverError(err error) {
   163  	b := sbc.balancer
   164  	if b == nil {
   165  		// This sub-balancer was closed. This should never happen because
   166  		// sub-balancers are closed when the locality is removed from EDS, or
   167  		// the balancer group is closed. There should be no further address
   168  		// updates when either of this happened.
   169  		//
   170  		// This will be a common case with priority support, because a
   171  		// sub-balancer (and the whole balancer group) could be closed because
   172  		// it's the lower priority, but it can still get address updates.
   173  		return
   174  	}
   175  	b.ResolverError(err)
   176  }
   177  
   178  func (sbc *subBalancerWrapper) stopBalancer() {
   179  	sbc.balancer.Close()
   180  	sbc.balancer = nil
   181  }
   182  
   183  // BalancerGroup takes a list of balancers, and make them into one balancer.
   184  //
   185  // Note that this struct doesn't implement balancer.Balancer, because it's not
   186  // intended to be used directly as a balancer. It's expected to be used as a
   187  // sub-balancer manager by a high level balancer.
   188  //
   189  // Updates from ClientConn are forwarded to sub-balancers
   190  //   - service config update
   191  //   - address update
   192  //   - subConn state change
   193  //   - find the corresponding balancer and forward
   194  //
   195  // Actions from sub-balances are forwarded to parent ClientConn
   196  //   - new/remove SubConn
   197  //   - picker update and health states change
   198  //   - sub-pickers are sent to an aggregator provided by the parent, which
   199  //     will group them into a group-picker. The aggregated connectivity state is
   200  //     also handled by the aggregator.
   201  //   - resolveNow
   202  //
   203  // Sub-balancers are only built when the balancer group is started. If the
   204  // balancer group is closed, the sub-balancers are also closed. And it's
   205  // guaranteed that no updates will be sent to parent ClientConn from a closed
   206  // balancer group.
   207  type BalancerGroup struct {
   208  	cc        balancer.ClientConn
   209  	buildOpts balancer.BuildOptions
   210  	logger    dubbogoLogger.Logger
   211  
   212  	// stateAggregator is where the state/picker updates will be sent to. It's
   213  	// provided by the parent balancer, to build a picker with all the
   214  	// sub-pickers.
   215  	stateAggregator BalancerStateAggregator
   216  
   217  	// outgoingMu guards all operations in the direction:
   218  	// ClientConn-->Sub-balancer. Including start, stop, resolver updates and
   219  	// SubConn state changes.
   220  	//
   221  	// The corresponding boolean outgoingStarted is used to stop further updates
   222  	// to sub-balancers after they are closed.
   223  	outgoingMu         sync.Mutex
   224  	outgoingStarted    bool
   225  	idToBalancerConfig map[string]*subBalancerWrapper
   226  	// Cache for sub-balancers when they are removed.
   227  	balancerCache *cache.TimeoutCache
   228  
   229  	// incomingMu is to make sure this balancer group doesn't send updates to cc
   230  	// after it's closed.
   231  	//
   232  	// We don't share the mutex to avoid deadlocks (e.g. a call to sub-balancer
   233  	// may call back to balancer group inline. It causes deaclock if they
   234  	// require the same mutex).
   235  	//
   236  	// We should never need to hold multiple locks at the same time in this
   237  	// struct. The case where two locks are held can only happen when the
   238  	// underlying balancer calls back into balancer group inline. So there's an
   239  	// implicit lock acquisition order that outgoingMu is locked before
   240  	// incomingMu.
   241  
   242  	// incomingMu guards all operations in the direction:
   243  	// Sub-balancer-->ClientConn. Including NewSubConn, RemoveSubConn. It also
   244  	// guards the map from SubConn to balancer ID, so updateSubConnState needs
   245  	// to hold it shortly to find the sub-balancer to forward the update.
   246  	//
   247  	// UpdateState is called by the balancer state aggretator, and it will
   248  	// decide when and whether to call.
   249  	//
   250  	// The corresponding boolean incomingStarted is used to stop further updates
   251  	// from sub-balancers after they are closed.
   252  	incomingMu      sync.Mutex
   253  	incomingStarted bool // This boolean only guards calls back to ClientConn.
   254  	scToSubBalancer map[balancer.SubConn]*subBalancerWrapper
   255  }
   256  
   257  // DefaultSubBalancerCloseTimeout is defined as a variable instead of const for
   258  // testing.
   259  //
   260  // TODO: make it a parameter for New().
   261  var DefaultSubBalancerCloseTimeout = 15 * time.Minute
   262  
   263  // New creates a new BalancerGroup. Note that the BalancerGroup
   264  // needs to be started to work.
   265  func New(cc balancer.ClientConn, bOpts balancer.BuildOptions, stateAggregator BalancerStateAggregator, logger dubbogoLogger.Logger) *BalancerGroup {
   266  	return &BalancerGroup{
   267  		cc:              cc,
   268  		buildOpts:       bOpts,
   269  		logger:          logger,
   270  		stateAggregator: stateAggregator,
   271  
   272  		idToBalancerConfig: make(map[string]*subBalancerWrapper),
   273  		balancerCache:      cache.NewTimeoutCache(DefaultSubBalancerCloseTimeout),
   274  		scToSubBalancer:    make(map[balancer.SubConn]*subBalancerWrapper),
   275  	}
   276  }
   277  
   278  // Start starts the balancer group, including building all the sub-balancers,
   279  // and send the existing addresses to them.
   280  //
   281  // A BalancerGroup can be closed and started later. When a BalancerGroup is
   282  // closed, it can still receive address updates, which will be applied when
   283  // restarted.
   284  func (bg *BalancerGroup) Start() {
   285  	bg.incomingMu.Lock()
   286  	bg.incomingStarted = true
   287  	bg.incomingMu.Unlock()
   288  
   289  	bg.outgoingMu.Lock()
   290  	if bg.outgoingStarted {
   291  		bg.outgoingMu.Unlock()
   292  		return
   293  	}
   294  
   295  	for _, config := range bg.idToBalancerConfig {
   296  		config.startBalancer()
   297  	}
   298  	bg.outgoingStarted = true
   299  	bg.outgoingMu.Unlock()
   300  }
   301  
   302  // Add adds a balancer built by builder to the group, with given id.
   303  func (bg *BalancerGroup) Add(id string, builder balancer.Builder) {
   304  	// Store data in static map, and then check to see if bg is started.
   305  	bg.outgoingMu.Lock()
   306  	var sbc *subBalancerWrapper
   307  	// If outgoingStarted is true, search in the cache. Otherwise, cache is
   308  	// guaranteed to be empty, searching is unnecessary.
   309  	if bg.outgoingStarted {
   310  		if old, ok := bg.balancerCache.Remove(id); ok {
   311  			sbc, _ = old.(*subBalancerWrapper)
   312  			if sbc != nil && sbc.builder != builder {
   313  				// If the sub-balancer in cache was built with a different
   314  				// balancer builder, don't use it, cleanup this old-balancer,
   315  				// and behave as sub-balancer is not found in cache.
   316  				//
   317  				// NOTE that this will also drop the cached addresses for this
   318  				// sub-balancer, which seems to be reasonable.
   319  				sbc.stopBalancer()
   320  				// cleanupSubConns must be done before the new balancer starts,
   321  				// otherwise new SubConns created by the new balancer might be
   322  				// removed by mistake.
   323  				bg.cleanupSubConns(sbc)
   324  				sbc = nil
   325  			}
   326  		}
   327  	}
   328  	if sbc == nil {
   329  		sbc = &subBalancerWrapper{
   330  			ClientConn: bg.cc,
   331  			id:         id,
   332  			group:      bg,
   333  			builder:    builder,
   334  			buildOpts:  bg.buildOpts,
   335  		}
   336  		if bg.outgoingStarted {
   337  			// Only start the balancer if bg is started. Otherwise, we only keep the
   338  			// static data.
   339  			sbc.startBalancer()
   340  		}
   341  	} else {
   342  		// When brining back a sub-balancer from cache, re-send the cached
   343  		// picker and state.
   344  		sbc.updateBalancerStateWithCachedPicker()
   345  	}
   346  	bg.idToBalancerConfig[id] = sbc
   347  	bg.outgoingMu.Unlock()
   348  }
   349  
   350  // Remove removes the balancer with id from the group.
   351  //
   352  // But doesn't close the balancer. The balancer is kept in a cache, and will be
   353  // closed after timeout. Cleanup work (closing sub-balancer and removing
   354  // subconns) will be done after timeout.
   355  func (bg *BalancerGroup) Remove(id string) {
   356  	bg.outgoingMu.Lock()
   357  	if sbToRemove, ok := bg.idToBalancerConfig[id]; ok {
   358  		if bg.outgoingStarted {
   359  			bg.balancerCache.Add(id, sbToRemove, func() {
   360  				// After timeout, when sub-balancer is removed from cache, need
   361  				// to close the underlying sub-balancer, and remove all its
   362  				// subconns.
   363  				bg.outgoingMu.Lock()
   364  				if bg.outgoingStarted {
   365  					sbToRemove.stopBalancer()
   366  				}
   367  				bg.outgoingMu.Unlock()
   368  				bg.cleanupSubConns(sbToRemove)
   369  			})
   370  		}
   371  		delete(bg.idToBalancerConfig, id)
   372  	} else {
   373  		bg.logger.Infof("balancer group: trying to remove a non-existing locality from balancer group: %v", id)
   374  	}
   375  	bg.outgoingMu.Unlock()
   376  }
   377  
   378  // bg.remove(id) doesn't do cleanup for the sub-balancer. This function does
   379  // cleanup after the timeout.
   380  func (bg *BalancerGroup) cleanupSubConns(config *subBalancerWrapper) {
   381  	bg.incomingMu.Lock()
   382  	// Remove SubConns. This is only done after the balancer is
   383  	// actually closed.
   384  	//
   385  	// NOTE: if NewSubConn is called by this (closed) balancer later, the
   386  	// SubConn will be leaked. This shouldn't happen if the balancer
   387  	// implementation is correct. To make sure this never happens, we need to
   388  	// add another layer (balancer manager) between balancer group and the
   389  	// sub-balancers.
   390  	for sc, b := range bg.scToSubBalancer {
   391  		if b == config {
   392  			bg.cc.RemoveSubConn(sc)
   393  			delete(bg.scToSubBalancer, sc)
   394  		}
   395  	}
   396  	bg.incomingMu.Unlock()
   397  }
   398  
   399  // connect attempts to connect to all subConns belonging to sb.
   400  func (bg *BalancerGroup) connect(sb *subBalancerWrapper) {
   401  	bg.incomingMu.Lock()
   402  	for sc, b := range bg.scToSubBalancer {
   403  		if b == sb {
   404  			sc.Connect()
   405  		}
   406  	}
   407  	bg.incomingMu.Unlock()
   408  }
   409  
   410  // Following are actions from the parent grpc.ClientConn, forward to sub-balancers.
   411  
   412  // UpdateSubConnState handles the state for the subconn. It finds the
   413  // corresponding balancer and forwards the update.
   414  func (bg *BalancerGroup) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   415  	bg.incomingMu.Lock()
   416  	config, ok := bg.scToSubBalancer[sc]
   417  	if !ok {
   418  		bg.incomingMu.Unlock()
   419  		return
   420  	}
   421  	if state.ConnectivityState == connectivity.Shutdown {
   422  		// Only delete sc from the map when state changed to Shutdown.
   423  		delete(bg.scToSubBalancer, sc)
   424  	}
   425  	bg.incomingMu.Unlock()
   426  
   427  	bg.outgoingMu.Lock()
   428  	config.updateSubConnState(sc, state)
   429  	bg.outgoingMu.Unlock()
   430  }
   431  
   432  // UpdateClientConnState handles ClientState (including balancer config and
   433  // addresses) from resolver. It finds the balancer and forwards the update.
   434  func (bg *BalancerGroup) UpdateClientConnState(id string, s balancer.ClientConnState) error {
   435  	bg.outgoingMu.Lock()
   436  	defer bg.outgoingMu.Unlock()
   437  	if config, ok := bg.idToBalancerConfig[id]; ok {
   438  		return config.updateClientConnState(s)
   439  	}
   440  	return nil
   441  }
   442  
   443  // ResolverError forwards resolver errors to all sub-balancers.
   444  func (bg *BalancerGroup) ResolverError(err error) {
   445  	bg.outgoingMu.Lock()
   446  	for _, config := range bg.idToBalancerConfig {
   447  		config.resolverError(err)
   448  	}
   449  	bg.outgoingMu.Unlock()
   450  }
   451  
   452  // Following are actions from sub-balancers, forward to ClientConn.
   453  
   454  // newSubConn: forward to ClientConn, and also create a map from sc to balancer,
   455  // so state update will find the right balancer.
   456  //
   457  // One note about removing SubConn: only forward to ClientConn, but not delete
   458  // from map. Delete sc from the map only when state changes to Shutdown. Since
   459  // it's just forwarding the action, there's no need for a removeSubConn()
   460  // wrapper function.
   461  func (bg *BalancerGroup) newSubConn(config *subBalancerWrapper, addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
   462  	// NOTE: if balancer with id was already removed, this should also return
   463  	// error. But since we call balancer.stopBalancer when removing the balancer, this
   464  	// shouldn't happen.
   465  	bg.incomingMu.Lock()
   466  	if !bg.incomingStarted {
   467  		bg.incomingMu.Unlock()
   468  		return nil, fmt.Errorf("NewSubConn is called after balancer group is closed")
   469  	}
   470  	sc, err := bg.cc.NewSubConn(addrs, opts)
   471  	if err != nil {
   472  		bg.incomingMu.Unlock()
   473  		return nil, err
   474  	}
   475  	bg.scToSubBalancer[sc] = config
   476  	bg.incomingMu.Unlock()
   477  	return sc, nil
   478  }
   479  
   480  // updateBalancerState: forward the new state to balancer state aggregator. The
   481  // aggregator will create an aggregated picker and an aggregated connectivity
   482  // state, then forward to ClientConn.
   483  func (bg *BalancerGroup) updateBalancerState(id string, state balancer.State) {
   484  	bg.logger.Infof("Balancer state update from locality %v, new state: %+v", id, state)
   485  
   486  	// Send new state to the aggregator, without holding the incomingMu.
   487  	// incomingMu is to protect all calls to the parent ClientConn, this update
   488  	// doesn't necessary trigger a call to ClientConn, and should already be
   489  	// protected by aggregator's mutex if necessary.
   490  	if bg.stateAggregator != nil {
   491  		bg.stateAggregator.UpdateState(id, state)
   492  	}
   493  }
   494  
   495  // Close closes the balancer. It stops sub-balancers, and removes the subconns.
   496  // The BalancerGroup can be restarted later.
   497  func (bg *BalancerGroup) Close() {
   498  	bg.incomingMu.Lock()
   499  	if bg.incomingStarted {
   500  		bg.incomingStarted = false
   501  		// Also remove all SubConns.
   502  		for sc := range bg.scToSubBalancer {
   503  			bg.cc.RemoveSubConn(sc)
   504  			delete(bg.scToSubBalancer, sc)
   505  		}
   506  	}
   507  	bg.incomingMu.Unlock()
   508  
   509  	// Clear(true) runs clear function to close sub-balancers in cache. It
   510  	// must be called out of outgoing mutex.
   511  	bg.balancerCache.Clear(true)
   512  
   513  	bg.outgoingMu.Lock()
   514  	if bg.outgoingStarted {
   515  		bg.outgoingStarted = false
   516  		for _, config := range bg.idToBalancerConfig {
   517  			config.stopBalancer()
   518  		}
   519  	}
   520  	bg.outgoingMu.Unlock()
   521  }
   522  
   523  // ExitIdle should be invoked when the parent LB policy's ExitIdle is invoked.
   524  // It will trigger this on all sub-balancers, or reconnect their subconns if
   525  // not supported.
   526  func (bg *BalancerGroup) ExitIdle() {
   527  	bg.outgoingMu.Lock()
   528  	for _, config := range bg.idToBalancerConfig {
   529  		if !config.exitIdle() {
   530  			bg.connect(config)
   531  		}
   532  	}
   533  	bg.outgoingMu.Unlock()
   534  }
   535  
   536  // ExitIdleOne instructs the sub-balancer `id` to exit IDLE state, if
   537  // appropriate and possible.
   538  func (bg *BalancerGroup) ExitIdleOne(id string) {
   539  	bg.outgoingMu.Lock()
   540  	if config := bg.idToBalancerConfig[id]; config != nil {
   541  		if !config.exitIdle() {
   542  			bg.connect(config)
   543  		}
   544  	}
   545  	bg.outgoingMu.Unlock()
   546  }