google.golang.org/grpc@v1.72.2/balancer/weightedroundrobin/balancer.go (about)

     1  /*
     2   *
     3   * Copyright 2023 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package weightedroundrobin provides an implementation of the weighted round
    20  // robin LB policy, as defined in [gRFC A58].
    21  //
    22  // # Experimental
    23  //
    24  // Notice: This package is EXPERIMENTAL and may be changed or removed in a
    25  // later release.
    26  //
    27  // [gRFC A58]: https://github.com/grpc/proposal/blob/master/A58-client-side-weighted-round-robin-lb-policy.md
    28  package weightedroundrobin
    29  
    30  import (
    31  	"encoding/json"
    32  	"fmt"
    33  	rand "math/rand/v2"
    34  	"sync"
    35  	"sync/atomic"
    36  	"time"
    37  	"unsafe"
    38  
    39  	"google.golang.org/grpc/balancer"
    40  	"google.golang.org/grpc/balancer/endpointsharding"
    41  	"google.golang.org/grpc/balancer/pickfirst/pickfirstleaf"
    42  	"google.golang.org/grpc/balancer/weightedroundrobin/internal"
    43  	"google.golang.org/grpc/balancer/weightedtarget"
    44  	"google.golang.org/grpc/connectivity"
    45  	estats "google.golang.org/grpc/experimental/stats"
    46  	"google.golang.org/grpc/internal/grpclog"
    47  	"google.golang.org/grpc/internal/grpcsync"
    48  	iserviceconfig "google.golang.org/grpc/internal/serviceconfig"
    49  	"google.golang.org/grpc/orca"
    50  	"google.golang.org/grpc/resolver"
    51  	"google.golang.org/grpc/serviceconfig"
    52  
    53  	v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3"
    54  )
    55  
    56  // Name is the name of the weighted round robin balancer.
    57  const Name = "weighted_round_robin"
    58  
    59  var (
    60  	rrFallbackMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
    61  		Name:           "grpc.lb.wrr.rr_fallback",
    62  		Description:    "EXPERIMENTAL. Number of scheduler updates in which there were not enough endpoints with valid weight, which caused the WRR policy to fall back to RR behavior.",
    63  		Unit:           "update",
    64  		Labels:         []string{"grpc.target"},
    65  		OptionalLabels: []string{"grpc.lb.locality"},
    66  		Default:        false,
    67  	})
    68  
    69  	endpointWeightNotYetUsableMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
    70  		Name:           "grpc.lb.wrr.endpoint_weight_not_yet_usable",
    71  		Description:    "EXPERIMENTAL. Number of endpoints from each scheduler update that don't yet have usable weight information (i.e., either the load report has not yet been received, or it is within the blackout period).",
    72  		Unit:           "endpoint",
    73  		Labels:         []string{"grpc.target"},
    74  		OptionalLabels: []string{"grpc.lb.locality"},
    75  		Default:        false,
    76  	})
    77  
    78  	endpointWeightStaleMetric = estats.RegisterInt64Count(estats.MetricDescriptor{
    79  		Name:           "grpc.lb.wrr.endpoint_weight_stale",
    80  		Description:    "EXPERIMENTAL. Number of endpoints from each scheduler update whose latest weight is older than the expiration period.",
    81  		Unit:           "endpoint",
    82  		Labels:         []string{"grpc.target"},
    83  		OptionalLabels: []string{"grpc.lb.locality"},
    84  		Default:        false,
    85  	})
    86  	endpointWeightsMetric = estats.RegisterFloat64Histo(estats.MetricDescriptor{
    87  		Name:           "grpc.lb.wrr.endpoint_weights",
    88  		Description:    "EXPERIMENTAL. Weight of each endpoint, recorded on every scheduler update. Endpoints without usable weights will be recorded as weight 0.",
    89  		Unit:           "endpoint",
    90  		Labels:         []string{"grpc.target"},
    91  		OptionalLabels: []string{"grpc.lb.locality"},
    92  		Default:        false,
    93  	})
    94  )
    95  
    96  func init() {
    97  	balancer.Register(bb{})
    98  }
    99  
   100  type bb struct{}
   101  
   102  func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer {
   103  	b := &wrrBalancer{
   104  		ClientConn:       cc,
   105  		target:           bOpts.Target.String(),
   106  		metricsRecorder:  cc.MetricsRecorder(),
   107  		addressWeights:   resolver.NewAddressMapV2[*endpointWeight](),
   108  		endpointToWeight: resolver.NewEndpointMap[*endpointWeight](),
   109  		scToWeight:       make(map[balancer.SubConn]*endpointWeight),
   110  	}
   111  
   112  	b.child = endpointsharding.NewBalancer(b, bOpts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{})
   113  	b.logger = prefixLogger(b)
   114  	b.logger.Infof("Created")
   115  	return b
   116  }
   117  
   118  func (bb) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   119  	lbCfg := &lbConfig{
   120  		// Default values as documented in A58.
   121  		OOBReportingPeriod:      iserviceconfig.Duration(10 * time.Second),
   122  		BlackoutPeriod:          iserviceconfig.Duration(10 * time.Second),
   123  		WeightExpirationPeriod:  iserviceconfig.Duration(3 * time.Minute),
   124  		WeightUpdatePeriod:      iserviceconfig.Duration(time.Second),
   125  		ErrorUtilizationPenalty: 1,
   126  	}
   127  	if err := json.Unmarshal(js, lbCfg); err != nil {
   128  		return nil, fmt.Errorf("wrr: unable to unmarshal LB policy config: %s, error: %v", string(js), err)
   129  	}
   130  
   131  	if lbCfg.ErrorUtilizationPenalty < 0 {
   132  		return nil, fmt.Errorf("wrr: errorUtilizationPenalty must be non-negative")
   133  	}
   134  
   135  	// For easier comparisons later, ensure the OOB reporting period is unset
   136  	// (0s) when OOB reports are disabled.
   137  	if !lbCfg.EnableOOBLoadReport {
   138  		lbCfg.OOBReportingPeriod = 0
   139  	}
   140  
   141  	// Impose lower bound of 100ms on weightUpdatePeriod.
   142  	if !internal.AllowAnyWeightUpdatePeriod && lbCfg.WeightUpdatePeriod < iserviceconfig.Duration(100*time.Millisecond) {
   143  		lbCfg.WeightUpdatePeriod = iserviceconfig.Duration(100 * time.Millisecond)
   144  	}
   145  
   146  	return lbCfg, nil
   147  }
   148  
   149  func (bb) Name() string {
   150  	return Name
   151  }
   152  
   153  // updateEndpointsLocked updates endpoint weight state based off new update, by
   154  // starting and clearing any endpoint weights needed.
   155  //
   156  // Caller must hold b.mu.
   157  func (b *wrrBalancer) updateEndpointsLocked(endpoints []resolver.Endpoint) {
   158  	endpointSet := resolver.NewEndpointMap[*endpointWeight]()
   159  	addressSet := resolver.NewAddressMapV2[*endpointWeight]()
   160  	for _, endpoint := range endpoints {
   161  		endpointSet.Set(endpoint, nil)
   162  		for _, addr := range endpoint.Addresses {
   163  			addressSet.Set(addr, nil)
   164  		}
   165  		ew, ok := b.endpointToWeight.Get(endpoint)
   166  		if !ok {
   167  			ew = &endpointWeight{
   168  				logger:            b.logger,
   169  				connectivityState: connectivity.Connecting,
   170  				// Initially, we set load reports to off, because they are not
   171  				// running upon initial endpointWeight creation.
   172  				cfg:             &lbConfig{EnableOOBLoadReport: false},
   173  				metricsRecorder: b.metricsRecorder,
   174  				target:          b.target,
   175  				locality:        b.locality,
   176  			}
   177  			for _, addr := range endpoint.Addresses {
   178  				b.addressWeights.Set(addr, ew)
   179  			}
   180  			b.endpointToWeight.Set(endpoint, ew)
   181  		}
   182  		ew.updateConfig(b.cfg)
   183  	}
   184  
   185  	for _, endpoint := range b.endpointToWeight.Keys() {
   186  		if _, ok := endpointSet.Get(endpoint); ok {
   187  			// Existing endpoint also in new endpoint list; skip.
   188  			continue
   189  		}
   190  		b.endpointToWeight.Delete(endpoint)
   191  		for _, addr := range endpoint.Addresses {
   192  			if _, ok := addressSet.Get(addr); !ok { // old endpoints to be deleted can share addresses with new endpoints, so only delete if necessary
   193  				b.addressWeights.Delete(addr)
   194  			}
   195  		}
   196  		// SubConn map will get handled in updateSubConnState
   197  		// when receives SHUTDOWN signal.
   198  	}
   199  }
   200  
   201  // wrrBalancer implements the weighted round robin LB policy.
   202  type wrrBalancer struct {
   203  	// The following fields are set at initialization time and read only after that,
   204  	// so they do not need to be protected by a mutex.
   205  	child               balancer.Balancer
   206  	balancer.ClientConn // Embed to intercept NewSubConn operation
   207  	logger              *grpclog.PrefixLogger
   208  	target              string
   209  	metricsRecorder     estats.MetricsRecorder
   210  
   211  	mu               sync.Mutex
   212  	cfg              *lbConfig // active config
   213  	locality         string
   214  	stopPicker       *grpcsync.Event
   215  	addressWeights   *resolver.AddressMapV2[*endpointWeight]
   216  	endpointToWeight *resolver.EndpointMap[*endpointWeight]
   217  	scToWeight       map[balancer.SubConn]*endpointWeight
   218  }
   219  
   220  func (b *wrrBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error {
   221  	if b.logger.V(2) {
   222  		b.logger.Infof("UpdateCCS: %v", ccs)
   223  	}
   224  	cfg, ok := ccs.BalancerConfig.(*lbConfig)
   225  	if !ok {
   226  		return fmt.Errorf("wrr: received nil or illegal BalancerConfig (type %T): %v", ccs.BalancerConfig, ccs.BalancerConfig)
   227  	}
   228  
   229  	// Note: empty endpoints and duplicate addresses across endpoints won't
   230  	// explicitly error but will have undefined behavior.
   231  	b.mu.Lock()
   232  	b.cfg = cfg
   233  	b.locality = weightedtarget.LocalityFromResolverState(ccs.ResolverState)
   234  	b.updateEndpointsLocked(ccs.ResolverState.Endpoints)
   235  	b.mu.Unlock()
   236  
   237  	// This causes child to update picker inline and will thus cause inline
   238  	// picker update.
   239  	return b.child.UpdateClientConnState(balancer.ClientConnState{
   240  		// Make pickfirst children use health listeners for outlier detection to
   241  		// work.
   242  		ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState),
   243  	})
   244  }
   245  
   246  func (b *wrrBalancer) UpdateState(state balancer.State) {
   247  	b.mu.Lock()
   248  	defer b.mu.Unlock()
   249  
   250  	if b.stopPicker != nil {
   251  		b.stopPicker.Fire()
   252  		b.stopPicker = nil
   253  	}
   254  
   255  	childStates := endpointsharding.ChildStatesFromPicker(state.Picker)
   256  
   257  	var readyPickersWeight []pickerWeightedEndpoint
   258  
   259  	for _, childState := range childStates {
   260  		if childState.State.ConnectivityState == connectivity.Ready {
   261  			ew, ok := b.endpointToWeight.Get(childState.Endpoint)
   262  			if !ok {
   263  				// Should never happen, simply continue and ignore this endpoint
   264  				// for READY pickers.
   265  				continue
   266  			}
   267  			readyPickersWeight = append(readyPickersWeight, pickerWeightedEndpoint{
   268  				picker:           childState.State.Picker,
   269  				weightedEndpoint: ew,
   270  			})
   271  		}
   272  	}
   273  	// If no ready pickers are present, simply defer to the round robin picker
   274  	// from endpoint sharding, which will round robin across the most relevant
   275  	// pick first children in the highest precedence connectivity state.
   276  	if len(readyPickersWeight) == 0 {
   277  		b.ClientConn.UpdateState(balancer.State{
   278  			ConnectivityState: state.ConnectivityState,
   279  			Picker:            state.Picker,
   280  		})
   281  		return
   282  	}
   283  
   284  	p := &picker{
   285  		v:               rand.Uint32(), // start the scheduler at a random point
   286  		cfg:             b.cfg,
   287  		weightedPickers: readyPickersWeight,
   288  		metricsRecorder: b.metricsRecorder,
   289  		locality:        b.locality,
   290  		target:          b.target,
   291  	}
   292  
   293  	b.stopPicker = grpcsync.NewEvent()
   294  	p.start(b.stopPicker)
   295  
   296  	b.ClientConn.UpdateState(balancer.State{
   297  		ConnectivityState: state.ConnectivityState,
   298  		Picker:            p,
   299  	})
   300  }
   301  
   302  type pickerWeightedEndpoint struct {
   303  	picker           balancer.Picker
   304  	weightedEndpoint *endpointWeight
   305  }
   306  
   307  func (b *wrrBalancer) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
   308  	addr := addrs[0] // The new pick first policy for DualStack will only ever create a SubConn with one address.
   309  	var sc balancer.SubConn
   310  
   311  	oldListener := opts.StateListener
   312  	opts.StateListener = func(state balancer.SubConnState) {
   313  		b.updateSubConnState(sc, state)
   314  		oldListener(state)
   315  	}
   316  
   317  	b.mu.Lock()
   318  	defer b.mu.Unlock()
   319  	ewi, ok := b.addressWeights.Get(addr)
   320  	if !ok {
   321  		// SubConn state updates can come in for a no longer relevant endpoint
   322  		// weight (from the old system after a new config update is applied).
   323  		return nil, fmt.Errorf("balancer is being closed; no new SubConns allowed")
   324  	}
   325  	sc, err := b.ClientConn.NewSubConn([]resolver.Address{addr}, opts)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	b.scToWeight[sc] = ewi
   330  	return sc, nil
   331  }
   332  
   333  func (b *wrrBalancer) ResolverError(err error) {
   334  	// Will cause inline picker update from endpoint sharding.
   335  	b.child.ResolverError(err)
   336  }
   337  
   338  func (b *wrrBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   339  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   340  }
   341  
   342  func (b *wrrBalancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   343  	b.mu.Lock()
   344  	ew := b.scToWeight[sc]
   345  	// updates from a no longer relevant SubConn update, nothing to do here but
   346  	// forward state to state listener, which happens in wrapped listener. Will
   347  	// eventually get cleared from scMap once receives Shutdown signal.
   348  	if ew == nil {
   349  		b.mu.Unlock()
   350  		return
   351  	}
   352  	if state.ConnectivityState == connectivity.Shutdown {
   353  		delete(b.scToWeight, sc)
   354  	}
   355  	b.mu.Unlock()
   356  
   357  	// On the first READY SubConn/Transition for an endpoint, set pickedSC,
   358  	// clear endpoint tracking weight state, and potentially start an OOB watch.
   359  	if state.ConnectivityState == connectivity.Ready && ew.pickedSC == nil {
   360  		ew.pickedSC = sc
   361  		ew.mu.Lock()
   362  		ew.nonEmptySince = time.Time{}
   363  		ew.lastUpdated = time.Time{}
   364  		cfg := ew.cfg
   365  		ew.mu.Unlock()
   366  		ew.updateORCAListener(cfg)
   367  		return
   368  	}
   369  
   370  	// If the pickedSC (the one pick first uses for an endpoint) transitions out
   371  	// of READY, stop OOB listener if needed and clear pickedSC so the next
   372  	// created SubConn for the endpoint that goes READY will be chosen for
   373  	// endpoint as the active SubConn.
   374  	if state.ConnectivityState != connectivity.Ready && ew.pickedSC == sc {
   375  		// The first SubConn that goes READY for an endpoint is what pick first
   376  		// will pick. Only once that SubConn goes not ready will pick first
   377  		// restart this cycle of creating SubConns and using the first READY
   378  		// one. The lower level endpoint sharding will ping the Pick First once
   379  		// this occurs to ExitIdle which will trigger a connection attempt.
   380  		if ew.stopORCAListener != nil {
   381  			ew.stopORCAListener()
   382  		}
   383  		ew.pickedSC = nil
   384  	}
   385  }
   386  
   387  // Close stops the balancer.  It cancels any ongoing scheduler updates and
   388  // stops any ORCA listeners.
   389  func (b *wrrBalancer) Close() {
   390  	b.mu.Lock()
   391  	if b.stopPicker != nil {
   392  		b.stopPicker.Fire()
   393  		b.stopPicker = nil
   394  	}
   395  	b.mu.Unlock()
   396  
   397  	// Ensure any lingering OOB watchers are stopped.
   398  	for _, ew := range b.endpointToWeight.Values() {
   399  		if ew.stopORCAListener != nil {
   400  			ew.stopORCAListener()
   401  		}
   402  	}
   403  	b.child.Close()
   404  }
   405  
   406  func (b *wrrBalancer) ExitIdle() {
   407  	if ei, ok := b.child.(balancer.ExitIdler); ok { // Should always be ok, as child is endpoint sharding.
   408  		ei.ExitIdle()
   409  	}
   410  }
   411  
   412  // picker is the WRR policy's picker.  It uses live-updating backend weights to
   413  // update the scheduler periodically and ensure picks are routed proportional
   414  // to those weights.
   415  type picker struct {
   416  	scheduler unsafe.Pointer // *scheduler; accessed atomically
   417  	v         uint32         // incrementing value used by the scheduler; accessed atomically
   418  	cfg       *lbConfig      // active config when picker created
   419  
   420  	weightedPickers []pickerWeightedEndpoint // all READY pickers
   421  
   422  	// The following fields are immutable.
   423  	target          string
   424  	locality        string
   425  	metricsRecorder estats.MetricsRecorder
   426  }
   427  
   428  func (p *picker) endpointWeights(recordMetrics bool) []float64 {
   429  	wp := make([]float64, len(p.weightedPickers))
   430  	now := internal.TimeNow()
   431  	for i, wpi := range p.weightedPickers {
   432  		wp[i] = wpi.weightedEndpoint.weight(now, time.Duration(p.cfg.WeightExpirationPeriod), time.Duration(p.cfg.BlackoutPeriod), recordMetrics)
   433  	}
   434  	return wp
   435  }
   436  
   437  func (p *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
   438  	// Read the scheduler atomically.  All scheduler operations are threadsafe,
   439  	// and if the scheduler is replaced during this usage, we want to use the
   440  	// scheduler that was live when the pick started.
   441  	sched := *(*scheduler)(atomic.LoadPointer(&p.scheduler))
   442  
   443  	pickedPicker := p.weightedPickers[sched.nextIndex()]
   444  	pr, err := pickedPicker.picker.Pick(info)
   445  	if err != nil {
   446  		logger.Errorf("ready picker returned error: %v", err)
   447  		return balancer.PickResult{}, err
   448  	}
   449  	if !p.cfg.EnableOOBLoadReport {
   450  		oldDone := pr.Done
   451  		pr.Done = func(info balancer.DoneInfo) {
   452  			if load, ok := info.ServerLoad.(*v3orcapb.OrcaLoadReport); ok && load != nil {
   453  				pickedPicker.weightedEndpoint.OnLoadReport(load)
   454  			}
   455  			if oldDone != nil {
   456  				oldDone(info)
   457  			}
   458  		}
   459  	}
   460  	return pr, nil
   461  }
   462  
   463  func (p *picker) inc() uint32 {
   464  	return atomic.AddUint32(&p.v, 1)
   465  }
   466  
   467  func (p *picker) regenerateScheduler() {
   468  	s := p.newScheduler(true)
   469  	atomic.StorePointer(&p.scheduler, unsafe.Pointer(&s))
   470  }
   471  
   472  func (p *picker) start(stopPicker *grpcsync.Event) {
   473  	p.regenerateScheduler()
   474  	if len(p.weightedPickers) == 1 {
   475  		// No need to regenerate weights with only one backend.
   476  		return
   477  	}
   478  
   479  	go func() {
   480  		ticker := time.NewTicker(time.Duration(p.cfg.WeightUpdatePeriod))
   481  		defer ticker.Stop()
   482  		for {
   483  			select {
   484  			case <-stopPicker.Done():
   485  				return
   486  			case <-ticker.C:
   487  				p.regenerateScheduler()
   488  			}
   489  		}
   490  	}()
   491  }
   492  
   493  // endpointWeight is the weight for an endpoint. It tracks the SubConn that will
   494  // be picked for the endpoint, and other parameters relevant to computing the
   495  // effective weight. When needed, it also tracks connectivity state, listens for
   496  // metrics updates by implementing the orca.OOBListener interface and manages
   497  // that listener.
   498  type endpointWeight struct {
   499  	// The following fields are immutable.
   500  	balancer.SubConn
   501  	logger          *grpclog.PrefixLogger
   502  	target          string
   503  	metricsRecorder estats.MetricsRecorder
   504  	locality        string
   505  
   506  	// The following fields are only accessed on calls into the LB policy, and
   507  	// do not need a mutex.
   508  	connectivityState connectivity.State
   509  	stopORCAListener  func()
   510  	// The first SubConn for the endpoint that goes READY when endpoint has no
   511  	// READY SubConns yet, cleared on that sc disconnecting (i.e. going out of
   512  	// READY). Represents what pick first will use as it's picked SubConn for
   513  	// this endpoint.
   514  	pickedSC balancer.SubConn
   515  
   516  	// The following fields are accessed asynchronously and are protected by
   517  	// mu.  Note that mu may not be held when calling into the stopORCAListener
   518  	// or when registering a new listener, as those calls require the ORCA
   519  	// producer mu which is held when calling the listener, and the listener
   520  	// holds mu.
   521  	mu            sync.Mutex
   522  	weightVal     float64
   523  	nonEmptySince time.Time
   524  	lastUpdated   time.Time
   525  	cfg           *lbConfig
   526  }
   527  
   528  func (w *endpointWeight) OnLoadReport(load *v3orcapb.OrcaLoadReport) {
   529  	if w.logger.V(2) {
   530  		w.logger.Infof("Received load report for subchannel %v: %v", w.SubConn, load)
   531  	}
   532  	// Update weights of this endpoint according to the reported load.
   533  	utilization := load.ApplicationUtilization
   534  	if utilization == 0 {
   535  		utilization = load.CpuUtilization
   536  	}
   537  	if utilization == 0 || load.RpsFractional == 0 {
   538  		if w.logger.V(2) {
   539  			w.logger.Infof("Ignoring empty load report for subchannel %v", w.SubConn)
   540  		}
   541  		return
   542  	}
   543  
   544  	w.mu.Lock()
   545  	defer w.mu.Unlock()
   546  
   547  	errorRate := load.Eps / load.RpsFractional
   548  	w.weightVal = load.RpsFractional / (utilization + errorRate*w.cfg.ErrorUtilizationPenalty)
   549  	if w.logger.V(2) {
   550  		w.logger.Infof("New weight for subchannel %v: %v", w.SubConn, w.weightVal)
   551  	}
   552  
   553  	w.lastUpdated = internal.TimeNow()
   554  	if w.nonEmptySince.Equal(time.Time{}) {
   555  		w.nonEmptySince = w.lastUpdated
   556  	}
   557  }
   558  
   559  // updateConfig updates the parameters of the WRR policy and
   560  // stops/starts/restarts the ORCA OOB listener.
   561  func (w *endpointWeight) updateConfig(cfg *lbConfig) {
   562  	w.mu.Lock()
   563  	oldCfg := w.cfg
   564  	w.cfg = cfg
   565  	w.mu.Unlock()
   566  
   567  	if cfg.EnableOOBLoadReport == oldCfg.EnableOOBLoadReport &&
   568  		cfg.OOBReportingPeriod == oldCfg.OOBReportingPeriod {
   569  		// Load reporting wasn't enabled before or after, or load reporting was
   570  		// enabled before and after, and had the same period.  (Note that with
   571  		// load reporting disabled, OOBReportingPeriod is always 0.)
   572  		return
   573  	}
   574  	// (Re)start the listener to use the new config's settings for OOB
   575  	// reporting.
   576  	w.updateORCAListener(cfg)
   577  }
   578  
   579  func (w *endpointWeight) updateORCAListener(cfg *lbConfig) {
   580  	if w.stopORCAListener != nil {
   581  		w.stopORCAListener()
   582  	}
   583  	if !cfg.EnableOOBLoadReport {
   584  		w.stopORCAListener = nil
   585  		return
   586  	}
   587  	if w.pickedSC == nil { // No picked SC for this endpoint yet, nothing to listen on.
   588  		return
   589  	}
   590  	if w.logger.V(2) {
   591  		w.logger.Infof("Registering ORCA listener for %v with interval %v", w.pickedSC, cfg.OOBReportingPeriod)
   592  	}
   593  	opts := orca.OOBListenerOptions{ReportInterval: time.Duration(cfg.OOBReportingPeriod)}
   594  	w.stopORCAListener = orca.RegisterOOBListener(w.pickedSC, w, opts)
   595  }
   596  
   597  // weight returns the current effective weight of the endpoint, taking into
   598  // account the parameters.  Returns 0 for blacked out or expired data, which
   599  // will cause the backend weight to be treated as the mean of the weights of the
   600  // other backends. If forScheduler is set to true, this function will emit
   601  // metrics through the metrics registry.
   602  func (w *endpointWeight) weight(now time.Time, weightExpirationPeriod, blackoutPeriod time.Duration, recordMetrics bool) (weight float64) {
   603  	w.mu.Lock()
   604  	defer w.mu.Unlock()
   605  
   606  	if recordMetrics {
   607  		defer func() {
   608  			endpointWeightsMetric.Record(w.metricsRecorder, weight, w.target, w.locality)
   609  		}()
   610  	}
   611  
   612  	// The endpoint has not received a load report (i.e. just turned READY with
   613  	// no load report).
   614  	if w.lastUpdated.Equal(time.Time{}) {
   615  		endpointWeightNotYetUsableMetric.Record(w.metricsRecorder, 1, w.target, w.locality)
   616  		return 0
   617  	}
   618  
   619  	// If the most recent update was longer ago than the expiration period,
   620  	// reset nonEmptySince so that we apply the blackout period again if we
   621  	// start getting data again in the future, and return 0.
   622  	if now.Sub(w.lastUpdated) >= weightExpirationPeriod {
   623  		if recordMetrics {
   624  			endpointWeightStaleMetric.Record(w.metricsRecorder, 1, w.target, w.locality)
   625  		}
   626  		w.nonEmptySince = time.Time{}
   627  		return 0
   628  	}
   629  
   630  	// If we don't have at least blackoutPeriod worth of data, return 0.
   631  	if blackoutPeriod != 0 && (w.nonEmptySince.Equal(time.Time{}) || now.Sub(w.nonEmptySince) < blackoutPeriod) {
   632  		if recordMetrics {
   633  			endpointWeightNotYetUsableMetric.Record(w.metricsRecorder, 1, w.target, w.locality)
   634  		}
   635  		return 0
   636  	}
   637  
   638  	return w.weightVal
   639  }