google.golang.org/grpc@v1.74.2/balancer/ringhash/ringhash.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package ringhash implements the ringhash balancer. See the following
    20  // gRFCs for details:
    21  // - https://github.com/grpc/proposal/blob/master/A42-xds-ring-hash-lb-policy.md
    22  // - https://github.com/grpc/proposal/blob/master/A61-IPv4-IPv6-dualstack-backends.md#ring-hash
    23  // - https://github.com/grpc/proposal/blob/master/A76-ring-hash-improvements.md
    24  //
    25  // # Experimental
    26  //
    27  // Notice: This package is EXPERIMENTAL and may be changed or removed in a
    28  // later release.
    29  package ringhash
    30  
    31  import (
    32  	"encoding/json"
    33  	"errors"
    34  	"fmt"
    35  	"math/rand/v2"
    36  	"sort"
    37  	"sync"
    38  
    39  	"google.golang.org/grpc/balancer"
    40  	"google.golang.org/grpc/balancer/base"
    41  	"google.golang.org/grpc/balancer/endpointsharding"
    42  	"google.golang.org/grpc/balancer/lazy"
    43  	"google.golang.org/grpc/balancer/pickfirst/pickfirstleaf"
    44  	"google.golang.org/grpc/connectivity"
    45  	"google.golang.org/grpc/internal/balancer/weight"
    46  	"google.golang.org/grpc/internal/grpclog"
    47  	"google.golang.org/grpc/internal/pretty"
    48  	iringhash "google.golang.org/grpc/internal/ringhash"
    49  	"google.golang.org/grpc/resolver"
    50  	"google.golang.org/grpc/resolver/ringhash"
    51  	"google.golang.org/grpc/serviceconfig"
    52  )
    53  
    54  // Name is the name of the ring_hash balancer.
    55  const Name = "ring_hash_experimental"
    56  
    57  func lazyPickFirstBuilder(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    58  	return lazy.NewBalancer(cc, opts, balancer.Get(pickfirstleaf.Name).Build)
    59  }
    60  
    61  func init() {
    62  	balancer.Register(bb{})
    63  }
    64  
    65  type bb struct{}
    66  
    67  func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    68  	b := &ringhashBalancer{
    69  		ClientConn:     cc,
    70  		endpointStates: resolver.NewEndpointMap[*endpointState](),
    71  	}
    72  	esOpts := endpointsharding.Options{DisableAutoReconnect: true}
    73  	b.child = endpointsharding.NewBalancer(b, opts, lazyPickFirstBuilder, esOpts)
    74  	b.logger = prefixLogger(b)
    75  	b.logger.Infof("Created")
    76  	return b
    77  }
    78  
    79  func (bb) Name() string {
    80  	return Name
    81  }
    82  
    83  func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
    84  	return parseConfig(c)
    85  }
    86  
    87  type ringhashBalancer struct {
    88  	// The following fields are initialized at build time and read-only after
    89  	// that and therefore do not need to be guarded by a mutex.
    90  
    91  	// ClientConn is embedded to intercept UpdateState calls from the child
    92  	// endpointsharding balancer.
    93  	balancer.ClientConn
    94  	logger *grpclog.PrefixLogger
    95  	child  balancer.Balancer
    96  
    97  	mu                   sync.Mutex
    98  	config               *iringhash.LBConfig
    99  	inhibitChildUpdates  bool
   100  	shouldRegenerateRing bool
   101  	endpointStates       *resolver.EndpointMap[*endpointState]
   102  
   103  	// ring is always in sync with endpoints. When endpoints change, a new ring
   104  	// is generated. Note that address weights updates also regenerates the
   105  	// ring.
   106  	ring *ring
   107  }
   108  
   109  // hashKey returns the hash key to use for an endpoint. Per gRFC A61, each entry
   110  // in the ring is a hash of the endpoint's hash key concatenated with a
   111  // per-entry unique suffix.
   112  func hashKey(endpoint resolver.Endpoint) string {
   113  	if hk := ringhash.HashKey(endpoint); hk != "" {
   114  		return hk
   115  	}
   116  	// If no hash key is set, use the endpoint's first address as the hash key.
   117  	// This is the default behavior when no hash key is set.
   118  	return endpoint.Addresses[0].Addr
   119  }
   120  
   121  // UpdateState intercepts child balancer state updates. It updates the
   122  // per-endpoint state stored in the ring, and also the aggregated state based on
   123  // the child picker. It also reconciles the endpoint list. It sets
   124  // `b.shouldRegenerateRing` to true if the new endpoint list is different from
   125  // the previous, i.e. any of the following is true:
   126  // - an endpoint was added
   127  // - an endpoint was removed
   128  // - an endpoint's weight was updated
   129  // - the first addresses of the endpoint has changed
   130  func (b *ringhashBalancer) UpdateState(state balancer.State) {
   131  	b.mu.Lock()
   132  	defer b.mu.Unlock()
   133  	childStates := endpointsharding.ChildStatesFromPicker(state.Picker)
   134  	// endpointsSet is the set converted from endpoints, used for quick lookup.
   135  	endpointsSet := resolver.NewEndpointMap[bool]()
   136  
   137  	for _, childState := range childStates {
   138  		endpoint := childState.Endpoint
   139  		endpointsSet.Set(endpoint, true)
   140  		newWeight := getWeightAttribute(endpoint)
   141  		hk := hashKey(endpoint)
   142  		es, ok := b.endpointStates.Get(endpoint)
   143  		if !ok {
   144  			es := &endpointState{
   145  				balancer: childState.Balancer,
   146  				hashKey:  hk,
   147  				weight:   newWeight,
   148  				state:    childState.State,
   149  			}
   150  			b.endpointStates.Set(endpoint, es)
   151  			b.shouldRegenerateRing = true
   152  		} else {
   153  			// We have seen this endpoint before and created a `endpointState`
   154  			// object for it. If the weight or the hash key of the endpoint has
   155  			// changed, update the endpoint state map with the new weight or
   156  			// hash key. This will be used when a new ring is created.
   157  			if oldWeight := es.weight; oldWeight != newWeight {
   158  				b.shouldRegenerateRing = true
   159  				es.weight = newWeight
   160  			}
   161  			if es.hashKey != hk {
   162  				b.shouldRegenerateRing = true
   163  				es.hashKey = hk
   164  			}
   165  			es.state = childState.State
   166  		}
   167  	}
   168  
   169  	for _, endpoint := range b.endpointStates.Keys() {
   170  		if _, ok := endpointsSet.Get(endpoint); ok {
   171  			continue
   172  		}
   173  		// endpoint was removed by resolver.
   174  		b.endpointStates.Delete(endpoint)
   175  		b.shouldRegenerateRing = true
   176  	}
   177  
   178  	b.updatePickerLocked()
   179  }
   180  
   181  func (b *ringhashBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error {
   182  	if b.logger.V(2) {
   183  		b.logger.Infof("Received update from resolver, balancer config: %+v", pretty.ToJSON(ccs.BalancerConfig))
   184  	}
   185  
   186  	newConfig, ok := ccs.BalancerConfig.(*iringhash.LBConfig)
   187  	if !ok {
   188  		return fmt.Errorf("unexpected balancer config with type: %T", ccs.BalancerConfig)
   189  	}
   190  
   191  	b.mu.Lock()
   192  	b.inhibitChildUpdates = true
   193  	b.mu.Unlock()
   194  
   195  	defer func() {
   196  		b.mu.Lock()
   197  		b.inhibitChildUpdates = false
   198  		b.updatePickerLocked()
   199  		b.mu.Unlock()
   200  	}()
   201  
   202  	if err := b.child.UpdateClientConnState(balancer.ClientConnState{
   203  		// Make pickfirst children use health listeners for outlier detection
   204  		// and health checking to work.
   205  		ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState),
   206  	}); err != nil {
   207  		return err
   208  	}
   209  
   210  	b.mu.Lock()
   211  	// Ring updates can happen due to the following:
   212  	// 1. Addition or deletion of endpoints: The synchronous picker update from
   213  	//    the child endpointsharding balancer would contain the list of updated
   214  	//    endpoints.  Updates triggered by the child after handling the
   215  	//    `UpdateClientConnState` call will not change the endpoint list.
   216  	// 2. Change in the `LoadBalancerConfig`: Ring config such as max/min ring
   217  	//    size.
   218  	// To avoid extra ring updates, a boolean is used to track the need for a
   219  	// ring update and the update is done only once at the end.
   220  	//
   221  	// If the ring configuration has changed, we need to regenerate the ring
   222  	// while sending a new picker.
   223  	if b.config == nil || b.config.MinRingSize != newConfig.MinRingSize || b.config.MaxRingSize != newConfig.MaxRingSize {
   224  		b.shouldRegenerateRing = true
   225  	}
   226  	b.config = newConfig
   227  	b.mu.Unlock()
   228  	return nil
   229  }
   230  
   231  func (b *ringhashBalancer) ResolverError(err error) {
   232  	b.child.ResolverError(err)
   233  }
   234  
   235  func (b *ringhashBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   236  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   237  }
   238  
   239  func (b *ringhashBalancer) updatePickerLocked() {
   240  	state := b.aggregatedStateLocked()
   241  	// Start connecting to new endpoints if necessary.
   242  	if state == connectivity.Connecting || state == connectivity.TransientFailure {
   243  		// When overall state is TransientFailure, we need to make sure at least
   244  		// one endpoint is attempting to connect, otherwise this balancer may
   245  		// never get picks if the parent is priority.
   246  		//
   247  		// Because we report Connecting as the overall state when only one
   248  		// endpoint is in TransientFailure, we do the same check for Connecting
   249  		// here.
   250  		//
   251  		// Note that this check also covers deleting endpoints. E.g. if the
   252  		// endpoint attempting to connect is deleted, and the overall state is
   253  		// TF. Since there must be at least one endpoint attempting to connect,
   254  		// we need to trigger one.
   255  		//
   256  		// After calling `ExitIdle` on a child balancer, the child will send a
   257  		// picker update asynchronously. A race condition may occur if another
   258  		// picker update from endpointsharding arrives before the child's
   259  		// picker update. The received picker may trigger a re-execution of the
   260  		// loop below to find an idle child. Since map iteration order is
   261  		// non-deterministic, the list of `endpointState`s must be sorted to
   262  		// ensure `ExitIdle` is called on the same child, preventing unnecessary
   263  		// connections.
   264  		var endpointStates = make([]*endpointState, b.endpointStates.Len())
   265  		for i, s := range b.endpointStates.Values() {
   266  			endpointStates[i] = s
   267  		}
   268  		sort.Slice(endpointStates, func(i, j int) bool {
   269  			return endpointStates[i].hashKey < endpointStates[j].hashKey
   270  		})
   271  		var idleBalancer endpointsharding.ExitIdler
   272  		for _, es := range endpointStates {
   273  			connState := es.state.ConnectivityState
   274  			if connState == connectivity.Connecting {
   275  				idleBalancer = nil
   276  				break
   277  			}
   278  			if idleBalancer == nil && connState == connectivity.Idle {
   279  				idleBalancer = es.balancer
   280  			}
   281  		}
   282  		if idleBalancer != nil {
   283  			idleBalancer.ExitIdle()
   284  		}
   285  	}
   286  
   287  	if b.inhibitChildUpdates {
   288  		return
   289  	}
   290  
   291  	// Update the channel.
   292  	if b.endpointStates.Len() > 0 && b.shouldRegenerateRing {
   293  		// with a non-empty list of endpoints.
   294  		b.ring = newRing(b.endpointStates, b.config.MinRingSize, b.config.MaxRingSize, b.logger)
   295  	}
   296  	b.shouldRegenerateRing = false
   297  	var newPicker balancer.Picker
   298  	if b.endpointStates.Len() == 0 {
   299  		newPicker = base.NewErrPicker(errors.New("produced zero addresses"))
   300  	} else {
   301  		newPicker = b.newPickerLocked()
   302  	}
   303  	b.ClientConn.UpdateState(balancer.State{
   304  		ConnectivityState: state,
   305  		Picker:            newPicker,
   306  	})
   307  }
   308  
   309  func (b *ringhashBalancer) Close() {
   310  	b.logger.Infof("Shutdown")
   311  	b.child.Close()
   312  }
   313  
   314  func (b *ringhashBalancer) ExitIdle() {
   315  	// ExitIdle implementation is a no-op because connections are either
   316  	// triggers from picks or from child balancer state changes.
   317  }
   318  
   319  // newPickerLocked generates a picker. The picker copies the endpoint states
   320  // over to avoid locking the mutex at RPC time. The picker should be
   321  // re-generated every time an endpoint state is updated.
   322  func (b *ringhashBalancer) newPickerLocked() *picker {
   323  	states := make(map[string]endpointState)
   324  	hasEndpointConnecting := false
   325  	for _, epState := range b.endpointStates.Values() {
   326  		// Copy the endpoint state to avoid races, since ring hash
   327  		// mutates the state, weight and hash key in place.
   328  		states[epState.hashKey] = *epState
   329  		if epState.state.ConnectivityState == connectivity.Connecting {
   330  			hasEndpointConnecting = true
   331  		}
   332  	}
   333  	return &picker{
   334  		ring:                         b.ring,
   335  		endpointStates:               states,
   336  		requestHashHeader:            b.config.RequestHashHeader,
   337  		hasEndpointInConnectingState: hasEndpointConnecting,
   338  		randUint64:                   rand.Uint64,
   339  	}
   340  }
   341  
   342  // aggregatedStateLocked returns the aggregated child balancers state
   343  // based on the following rules.
   344  //   - If there is at least one endpoint in READY state, report READY.
   345  //   - If there are 2 or more endpoints in TRANSIENT_FAILURE state, report
   346  //     TRANSIENT_FAILURE.
   347  //   - If there is at least one endpoint in CONNECTING state, report CONNECTING.
   348  //   - If there is one endpoint in TRANSIENT_FAILURE and there is more than one
   349  //     endpoint, report state CONNECTING.
   350  //   - If there is at least one endpoint in Idle state, report Idle.
   351  //   - Otherwise, report TRANSIENT_FAILURE.
   352  //
   353  // Note that if there are 1 connecting, 2 transient failure, the overall state
   354  // is transient failure. This is because the second transient failure is a
   355  // fallback of the first failing endpoint, and we want to report transient
   356  // failure to failover to the lower priority.
   357  func (b *ringhashBalancer) aggregatedStateLocked() connectivity.State {
   358  	var nums [5]int
   359  	for _, es := range b.endpointStates.Values() {
   360  		nums[es.state.ConnectivityState]++
   361  	}
   362  
   363  	if nums[connectivity.Ready] > 0 {
   364  		return connectivity.Ready
   365  	}
   366  	if nums[connectivity.TransientFailure] > 1 {
   367  		return connectivity.TransientFailure
   368  	}
   369  	if nums[connectivity.Connecting] > 0 {
   370  		return connectivity.Connecting
   371  	}
   372  	if nums[connectivity.TransientFailure] == 1 && b.endpointStates.Len() > 1 {
   373  		return connectivity.Connecting
   374  	}
   375  	if nums[connectivity.Idle] > 0 {
   376  		return connectivity.Idle
   377  	}
   378  	return connectivity.TransientFailure
   379  }
   380  
   381  // getWeightAttribute is a convenience function which returns the value of the
   382  // weight endpoint Attribute.
   383  //
   384  // When used in the xDS context, the weight attribute is guaranteed to be
   385  // non-zero. But, when used in a non-xDS context, the weight attribute could be
   386  // unset. A Default of 1 is used in the latter case.
   387  func getWeightAttribute(e resolver.Endpoint) uint32 {
   388  	w := weight.FromEndpoint(e).Weight
   389  	if w == 0 {
   390  		return 1
   391  	}
   392  	return w
   393  }
   394  
   395  type endpointState struct {
   396  	// hashKey is the hash key of the endpoint. Per gRFC A61, each entry in the
   397  	// ring is an endpoint, positioned based on the hash of the endpoint's first
   398  	// address by default. Per gRFC A76, the hash key of an endpoint may be
   399  	// overridden, for example based on EDS endpoint metadata.
   400  	hashKey  string
   401  	weight   uint32
   402  	balancer endpointsharding.ExitIdler
   403  
   404  	// state is updated by the balancer while receiving resolver updates from
   405  	// the channel and picker updates from its children. Access to it is guarded
   406  	// by ringhashBalancer.mu.
   407  	state balancer.State
   408  }