github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/grpc/xds/internal/balancer/ringhash/ringhash.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package ringhash implements the ringhash balancer.
    20  package ringhash
    21  
    22  import (
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"sync"
    27  
    28  	"github.com/hxx258456/ccgo/grpc/balancer"
    29  	"github.com/hxx258456/ccgo/grpc/balancer/base"
    30  	"github.com/hxx258456/ccgo/grpc/balancer/weightedroundrobin"
    31  	"github.com/hxx258456/ccgo/grpc/connectivity"
    32  	"github.com/hxx258456/ccgo/grpc/internal/grpclog"
    33  	"github.com/hxx258456/ccgo/grpc/internal/pretty"
    34  	"github.com/hxx258456/ccgo/grpc/resolver"
    35  	"github.com/hxx258456/ccgo/grpc/serviceconfig"
    36  )
    37  
    38  // Name is the name of the ring_hash balancer.
    39  const Name = "ring_hash_experimental"
    40  
    41  func init() {
    42  	balancer.Register(bb{})
    43  }
    44  
    45  type bb struct{}
    46  
    47  func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer {
    48  	b := &ringhashBalancer{
    49  		cc:       cc,
    50  		subConns: make(map[resolver.Address]*subConn),
    51  		scStates: make(map[balancer.SubConn]*subConn),
    52  		csEvltr:  &connectivityStateEvaluator{},
    53  	}
    54  	b.logger = prefixLogger(b)
    55  	b.logger.Infof("Created")
    56  	return b
    57  }
    58  
    59  func (bb) Name() string {
    60  	return Name
    61  }
    62  
    63  func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
    64  	return parseConfig(c)
    65  }
    66  
    67  type subConn struct {
    68  	addr string
    69  	sc   balancer.SubConn
    70  
    71  	mu sync.RWMutex
    72  	// This is the actual state of this SubConn (as updated by the ClientConn).
    73  	// The effective state can be different, see comment of attemptedToConnect.
    74  	state connectivity.State
    75  	// failing is whether this SubConn is in a failing state. A subConn is
    76  	// considered to be in a failing state if it was previously in
    77  	// TransientFailure.
    78  	//
    79  	// This affects the effective connectivity state of this SubConn, e.g.
    80  	// - if the actual state is Idle or Connecting, but this SubConn is failing,
    81  	// the effective state is TransientFailure.
    82  	//
    83  	// This is used in pick(). E.g. if a subConn is Idle, but has failing as
    84  	// true, pick() will
    85  	// - consider this SubConn as TransientFailure, and check the state of the
    86  	// next SubConn.
    87  	// - trigger Connect() (note that normally a SubConn in real
    88  	// TransientFailure cannot Connect())
    89  	//
    90  	// A subConn starts in non-failing (failing is false). A transition to
    91  	// TransientFailure sets failing to true (and it stays true). A transition
    92  	// to Ready sets failing to false.
    93  	failing bool
    94  	// connectQueued is true if a Connect() was queued for this SubConn while
    95  	// it's not in Idle (most likely was in TransientFailure). A Connect() will
    96  	// be triggered on this SubConn when it turns Idle.
    97  	//
    98  	// When connectivity state is updated to Idle for this SubConn, if
    99  	// connectQueued is true, Connect() will be called on the SubConn.
   100  	connectQueued bool
   101  }
   102  
   103  // setState updates the state of this SubConn.
   104  //
   105  // It also handles the queued Connect(). If the new state is Idle, and a
   106  // Connect() was queued, this SubConn will be triggered to Connect().
   107  func (sc *subConn) setState(s connectivity.State) {
   108  	sc.mu.Lock()
   109  	defer sc.mu.Unlock()
   110  	switch s {
   111  	case connectivity.Idle:
   112  		// Trigger Connect() if new state is Idle, and there is a queued connect.
   113  		if sc.connectQueued {
   114  			sc.connectQueued = false
   115  			sc.sc.Connect()
   116  		}
   117  	case connectivity.Connecting:
   118  		// Clear connectQueued if the SubConn isn't failing. This state
   119  		// transition is unlikely to happen, but handle this just in case.
   120  		sc.connectQueued = false
   121  	case connectivity.Ready:
   122  		// Clear connectQueued if the SubConn isn't failing. This state
   123  		// transition is unlikely to happen, but handle this just in case.
   124  		sc.connectQueued = false
   125  		// Set to a non-failing state.
   126  		sc.failing = false
   127  	case connectivity.TransientFailure:
   128  		// Set to a failing state.
   129  		sc.failing = true
   130  	}
   131  	sc.state = s
   132  }
   133  
   134  // effectiveState returns the effective state of this SubConn. It can be
   135  // different from the actual state, e.g. Idle while the subConn is failing is
   136  // considered TransientFailure. Read comment of field failing for other cases.
   137  func (sc *subConn) effectiveState() connectivity.State {
   138  	sc.mu.RLock()
   139  	defer sc.mu.RUnlock()
   140  	if sc.failing && (sc.state == connectivity.Idle || sc.state == connectivity.Connecting) {
   141  		return connectivity.TransientFailure
   142  	}
   143  	return sc.state
   144  }
   145  
   146  // queueConnect sets a boolean so that when the SubConn state changes to Idle,
   147  // it's Connect() will be triggered. If the SubConn state is already Idle, it
   148  // will just call Connect().
   149  func (sc *subConn) queueConnect() {
   150  	sc.mu.Lock()
   151  	defer sc.mu.Unlock()
   152  	if sc.state == connectivity.Idle {
   153  		sc.sc.Connect()
   154  		return
   155  	}
   156  	// Queue this connect, and when this SubConn switches back to Idle (happens
   157  	// after backoff in TransientFailure), it will Connect().
   158  	sc.connectQueued = true
   159  }
   160  
   161  type ringhashBalancer struct {
   162  	cc     balancer.ClientConn
   163  	logger *grpclog.PrefixLogger
   164  
   165  	config *LBConfig
   166  
   167  	subConns map[resolver.Address]*subConn // `attributes` is stripped from the keys of this map (the addresses)
   168  	scStates map[balancer.SubConn]*subConn
   169  
   170  	// ring is always in sync with subConns. When subConns change, a new ring is
   171  	// generated. Note that address weights updates (they are keys in the
   172  	// subConns map) also regenerates the ring.
   173  	ring    *ring
   174  	picker  balancer.Picker
   175  	csEvltr *connectivityStateEvaluator
   176  	state   connectivity.State
   177  
   178  	resolverErr error // the last error reported by the resolver; cleared on successful resolution
   179  	connErr     error // the last connection error; cleared upon leaving TransientFailure
   180  }
   181  
   182  // updateAddresses creates new SubConns and removes SubConns, based on the
   183  // address update.
   184  //
   185  // The return value is whether the new address list is different from the
   186  // previous. True if
   187  // - an address was added
   188  // - an address was removed
   189  // - an address's weight was updated
   190  //
   191  // Note that this function doesn't trigger SubConn connecting, so all the new
   192  // SubConn states are Idle.
   193  func (b *ringhashBalancer) updateAddresses(addrs []resolver.Address) bool {
   194  	var addrsUpdated bool
   195  	// addrsSet is the set converted from addrs, it's used for quick lookup of
   196  	// an address.
   197  	//
   198  	// Addresses in this map all have attributes stripped, but metadata set to
   199  	// the weight. So that weight change can be detected.
   200  	//
   201  	// TODO: this won't be necessary if there are ways to compare address
   202  	// attributes.
   203  	addrsSet := make(map[resolver.Address]struct{})
   204  	for _, a := range addrs {
   205  		aNoAttrs := a
   206  		// Strip attributes but set Metadata to the weight.
   207  		aNoAttrs.Attributes = nil
   208  		w := weightedroundrobin.GetAddrInfo(a).Weight
   209  		if w == 0 {
   210  			// If weight is not set, use 1.
   211  			w = 1
   212  		}
   213  		aNoAttrs.Metadata = w
   214  		addrsSet[aNoAttrs] = struct{}{}
   215  		if scInfo, ok := b.subConns[aNoAttrs]; !ok {
   216  			// When creating SubConn, the original address with attributes is
   217  			// passed through. So that connection configurations in attributes
   218  			// (like creds) will be used.
   219  			sc, err := b.cc.NewSubConn([]resolver.Address{a}, balancer.NewSubConnOptions{HealthCheckEnabled: true})
   220  			if err != nil {
   221  				logger.Warningf("base.baseBalancer: failed to create new SubConn: %v", err)
   222  				continue
   223  			}
   224  			scs := &subConn{addr: a.Addr, sc: sc}
   225  			scs.setState(connectivity.Idle)
   226  			b.state = b.csEvltr.recordTransition(connectivity.Shutdown, connectivity.Idle)
   227  			b.subConns[aNoAttrs] = scs
   228  			b.scStates[sc] = scs
   229  			addrsUpdated = true
   230  		} else {
   231  			// Always update the subconn's address in case the attributes
   232  			// changed. The SubConn does a reflect.DeepEqual of the new and old
   233  			// addresses. So this is a noop if the current address is the same
   234  			// as the old one (including attributes).
   235  			b.subConns[aNoAttrs] = scInfo
   236  			b.cc.UpdateAddresses(scInfo.sc, []resolver.Address{a})
   237  		}
   238  	}
   239  	for a, scInfo := range b.subConns {
   240  		// a was removed by resolver.
   241  		if _, ok := addrsSet[a]; !ok {
   242  			b.cc.RemoveSubConn(scInfo.sc)
   243  			delete(b.subConns, a)
   244  			addrsUpdated = true
   245  			// Keep the state of this sc in b.scStates until sc's state becomes Shutdown.
   246  			// The entry will be deleted in UpdateSubConnState.
   247  		}
   248  	}
   249  	return addrsUpdated
   250  }
   251  
   252  func (b *ringhashBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
   253  	b.logger.Infof("Received update from resolver, balancer config: %+v", pretty.ToJSON(s.BalancerConfig))
   254  	if b.config == nil {
   255  		newConfig, ok := s.BalancerConfig.(*LBConfig)
   256  		if !ok {
   257  			return fmt.Errorf("unexpected balancer config with type: %T", s.BalancerConfig)
   258  		}
   259  		b.config = newConfig
   260  	}
   261  
   262  	// Successful resolution; clear resolver error and ensure we return nil.
   263  	b.resolverErr = nil
   264  	if b.updateAddresses(s.ResolverState.Addresses) {
   265  		// If addresses were updated, no matter whether it resulted in SubConn
   266  		// creation/deletion, or just weight update, we will need to regenerate
   267  		// the ring.
   268  		var err error
   269  		b.ring, err = newRing(b.subConns, b.config.MinRingSize, b.config.MaxRingSize)
   270  		if err != nil {
   271  			panic(err)
   272  		}
   273  		b.regeneratePicker()
   274  		b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
   275  	}
   276  
   277  	// If resolver state contains no addresses, return an error so ClientConn
   278  	// will trigger re-resolve. Also records this as an resolver error, so when
   279  	// the overall state turns transient failure, the error message will have
   280  	// the zero address information.
   281  	if len(s.ResolverState.Addresses) == 0 {
   282  		b.ResolverError(errors.New("produced zero addresses"))
   283  		return balancer.ErrBadResolverState
   284  	}
   285  	return nil
   286  }
   287  
   288  func (b *ringhashBalancer) ResolverError(err error) {
   289  	b.resolverErr = err
   290  	if len(b.subConns) == 0 {
   291  		b.state = connectivity.TransientFailure
   292  	}
   293  
   294  	if b.state != connectivity.TransientFailure {
   295  		// The picker will not change since the balancer does not currently
   296  		// report an error.
   297  		return
   298  	}
   299  	b.regeneratePicker()
   300  	b.cc.UpdateState(balancer.State{
   301  		ConnectivityState: b.state,
   302  		Picker:            b.picker,
   303  	})
   304  }
   305  
   306  // UpdateSubConnState updates the per-SubConn state stored in the ring, and also
   307  // the aggregated state.
   308  //
   309  // It triggers an update to cc when:
   310  // - the new state is TransientFailure, to update the error message
   311  //   - it's possible that this is a noop, but sending an extra update is easier
   312  //   than comparing errors
   313  // - the aggregated state is changed
   314  //   - the same picker will be sent again, but this update may trigger a re-pick
   315  //   for some RPCs.
   316  func (b *ringhashBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   317  	s := state.ConnectivityState
   318  	b.logger.Infof("handle SubConn state change: %p, %v", sc, s)
   319  	scs, ok := b.scStates[sc]
   320  	if !ok {
   321  		b.logger.Infof("got state changes for an unknown SubConn: %p, %v", sc, s)
   322  		return
   323  	}
   324  	oldSCState := scs.effectiveState()
   325  	scs.setState(s)
   326  	newSCState := scs.effectiveState()
   327  
   328  	var sendUpdate bool
   329  	oldBalancerState := b.state
   330  	b.state = b.csEvltr.recordTransition(oldSCState, newSCState)
   331  	if oldBalancerState != b.state {
   332  		sendUpdate = true
   333  	}
   334  
   335  	switch s {
   336  	case connectivity.Idle:
   337  		// When the overall state is TransientFailure, this will never get picks
   338  		// if there's a lower priority. Need to keep the SubConns connecting so
   339  		// there's a chance it will recover.
   340  		if b.state == connectivity.TransientFailure {
   341  			scs.queueConnect()
   342  		}
   343  		// No need to send an update. No queued RPC can be unblocked. If the
   344  		// overall state changed because of this, sendUpdate is already true.
   345  	case connectivity.Connecting:
   346  		// No need to send an update. No queued RPC can be unblocked. If the
   347  		// overall state changed because of this, sendUpdate is already true.
   348  	case connectivity.Ready:
   349  		// Resend the picker, there's no need to regenerate the picker because
   350  		// the ring didn't change.
   351  		sendUpdate = true
   352  	case connectivity.TransientFailure:
   353  		// Save error to be reported via picker.
   354  		b.connErr = state.ConnectionError
   355  		// Regenerate picker to update error message.
   356  		b.regeneratePicker()
   357  		sendUpdate = true
   358  	case connectivity.Shutdown:
   359  		// When an address was removed by resolver, b called RemoveSubConn but
   360  		// kept the sc's state in scStates. Remove state for this sc here.
   361  		delete(b.scStates, sc)
   362  	}
   363  
   364  	if sendUpdate {
   365  		b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
   366  	}
   367  }
   368  
   369  // mergeErrors builds an error from the last connection error and the last
   370  // resolver error.  Must only be called if b.state is TransientFailure.
   371  func (b *ringhashBalancer) mergeErrors() error {
   372  	// connErr must always be non-nil unless there are no SubConns, in which
   373  	// case resolverErr must be non-nil.
   374  	if b.connErr == nil {
   375  		return fmt.Errorf("last resolver error: %v", b.resolverErr)
   376  	}
   377  	if b.resolverErr == nil {
   378  		return fmt.Errorf("last connection error: %v", b.connErr)
   379  	}
   380  	return fmt.Errorf("last connection error: %v; last resolver error: %v", b.connErr, b.resolverErr)
   381  }
   382  
   383  func (b *ringhashBalancer) regeneratePicker() {
   384  	if b.state == connectivity.TransientFailure {
   385  		b.picker = base.NewErrPicker(b.mergeErrors())
   386  		return
   387  	}
   388  	b.picker = newPicker(b.ring, b.logger)
   389  }
   390  
   391  func (b *ringhashBalancer) Close() {}
   392  
   393  // connectivityStateEvaluator takes the connectivity states of multiple SubConns
   394  // and returns one aggregated connectivity state.
   395  //
   396  // It's not thread safe.
   397  type connectivityStateEvaluator struct {
   398  	nums [5]uint64
   399  }
   400  
   401  // recordTransition records state change happening in subConn and based on that
   402  // it evaluates what aggregated state should be.
   403  //
   404  // - If there is at least one subchannel in READY state, report READY.
   405  // - If there are 2 or more subchannels in TRANSIENT_FAILURE state, report TRANSIENT_FAILURE.
   406  // - If there is at least one subchannel in CONNECTING state, report CONNECTING.
   407  // - If there is at least one subchannel in Idle state, report Idle.
   408  // - Otherwise, report TRANSIENT_FAILURE.
   409  //
   410  // Note that if there are 1 connecting, 2 transient failure, the overall state
   411  // is transient failure. This is because the second transient failure is a
   412  // fallback of the first failing SubConn, and we want to report transient
   413  // failure to failover to the lower priority.
   414  func (cse *connectivityStateEvaluator) recordTransition(oldState, newState connectivity.State) connectivity.State {
   415  	// Update counters.
   416  	for idx, state := range []connectivity.State{oldState, newState} {
   417  		updateVal := 2*uint64(idx) - 1 // -1 for oldState and +1 for new.
   418  		cse.nums[state] += updateVal
   419  	}
   420  
   421  	if cse.nums[connectivity.Ready] > 0 {
   422  		return connectivity.Ready
   423  	}
   424  	if cse.nums[connectivity.TransientFailure] > 1 {
   425  		return connectivity.TransientFailure
   426  	}
   427  	if cse.nums[connectivity.Connecting] > 0 {
   428  		return connectivity.Connecting
   429  	}
   430  	if cse.nums[connectivity.Idle] > 0 {
   431  		return connectivity.Idle
   432  	}
   433  	return connectivity.TransientFailure
   434  }