google.golang.org/grpc@v1.72.2/xds/internal/balancer/clustermanager/balancerstateaggregator.go (about)

     1  /*
     2   *
     3   * Copyright 2020 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package clustermanager
    20  
    21  import (
    22  	"fmt"
    23  	"sync"
    24  
    25  	"google.golang.org/grpc/balancer"
    26  	"google.golang.org/grpc/balancer/base"
    27  	"google.golang.org/grpc/connectivity"
    28  	"google.golang.org/grpc/internal/grpclog"
    29  )
    30  
    31  type subBalancerState struct {
    32  	state balancer.State
    33  	// stateToAggregate is the connectivity state used only for state
    34  	// aggregation. It could be different from state.ConnectivityState. For
    35  	// example when a sub-balancer transitions from TransientFailure to
    36  	// connecting, state.ConnectivityState is Connecting, but stateToAggregate
    37  	// is still TransientFailure.
    38  	stateToAggregate connectivity.State
    39  }
    40  
    41  func (s *subBalancerState) String() string {
    42  	return fmt.Sprintf("picker:%p,state:%v,stateToAggregate:%v", s.state.Picker, s.state.ConnectivityState, s.stateToAggregate)
    43  }
    44  
    45  type balancerStateAggregator struct {
    46  	cc     balancer.ClientConn
    47  	logger *grpclog.PrefixLogger
    48  	csEval *balancer.ConnectivityStateEvaluator
    49  
    50  	mu sync.Mutex
    51  	// This field is used to ensure that no updates are forwarded to the parent
    52  	// CC once the aggregator is closed. A closed sub-balancer could still send
    53  	// pickers to this aggregator.
    54  	closed bool
    55  	// Map from child policy name to last reported state.
    56  	idToPickerState map[string]*subBalancerState
    57  	// Set when UpdateState call propagation is paused.
    58  	pauseUpdateState bool
    59  	// Set when UpdateState call propagation is paused and an UpdateState call
    60  	// is suppressed.
    61  	needUpdateStateOnResume bool
    62  }
    63  
    64  func newBalancerStateAggregator(cc balancer.ClientConn, logger *grpclog.PrefixLogger) *balancerStateAggregator {
    65  	return &balancerStateAggregator{
    66  		cc:              cc,
    67  		logger:          logger,
    68  		csEval:          &balancer.ConnectivityStateEvaluator{},
    69  		idToPickerState: make(map[string]*subBalancerState),
    70  	}
    71  }
    72  
    73  func (bsa *balancerStateAggregator) close() {
    74  	bsa.mu.Lock()
    75  	defer bsa.mu.Unlock()
    76  	bsa.closed = true
    77  }
    78  
    79  // add adds a sub-balancer in CONNECTING state.
    80  //
    81  // This is called when there's a new child.
    82  func (bsa *balancerStateAggregator) add(id string) {
    83  	bsa.mu.Lock()
    84  	defer bsa.mu.Unlock()
    85  
    86  	bsa.idToPickerState[id] = &subBalancerState{
    87  		// Start everything in CONNECTING, so if one of the sub-balancers
    88  		// reports TransientFailure, the RPCs will still wait for the other
    89  		// sub-balancers.
    90  		state: balancer.State{
    91  			ConnectivityState: connectivity.Connecting,
    92  			Picker:            base.NewErrPicker(balancer.ErrNoSubConnAvailable),
    93  		},
    94  		stateToAggregate: connectivity.Connecting,
    95  	}
    96  	bsa.csEval.RecordTransition(connectivity.Shutdown, connectivity.Connecting)
    97  	bsa.buildAndUpdateLocked()
    98  }
    99  
   100  // remove removes the sub-balancer state. Future updates from this sub-balancer,
   101  // if any, will be ignored.
   102  //
   103  // This is called when a child is removed.
   104  func (bsa *balancerStateAggregator) remove(id string) {
   105  	bsa.mu.Lock()
   106  	defer bsa.mu.Unlock()
   107  	if _, ok := bsa.idToPickerState[id]; !ok {
   108  		return
   109  	}
   110  	// Setting the state of the deleted sub-balancer to Shutdown will get
   111  	// csEvltr to remove the previous state for any aggregated state
   112  	// evaluations. Transitions to and from connectivity.Shutdown are ignored
   113  	// by csEvltr.
   114  	bsa.csEval.RecordTransition(bsa.idToPickerState[id].stateToAggregate, connectivity.Shutdown)
   115  	// Remove id and picker from picker map. This also results in future updates
   116  	// for this ID to be ignored.
   117  	delete(bsa.idToPickerState, id)
   118  	bsa.buildAndUpdateLocked()
   119  }
   120  
   121  // pauseStateUpdates causes UpdateState calls to not propagate to the parent
   122  // ClientConn.  The last state will be remembered and propagated when
   123  // ResumeStateUpdates is called.
   124  func (bsa *balancerStateAggregator) pauseStateUpdates() {
   125  	bsa.mu.Lock()
   126  	defer bsa.mu.Unlock()
   127  	bsa.pauseUpdateState = true
   128  	bsa.needUpdateStateOnResume = false
   129  }
   130  
   131  // resumeStateUpdates will resume propagating UpdateState calls to the parent,
   132  // and call UpdateState on the parent if any UpdateState call was suppressed.
   133  func (bsa *balancerStateAggregator) resumeStateUpdates() {
   134  	bsa.mu.Lock()
   135  	defer bsa.mu.Unlock()
   136  	bsa.pauseUpdateState = false
   137  	if bsa.needUpdateStateOnResume {
   138  		bsa.cc.UpdateState(bsa.buildLocked())
   139  	}
   140  }
   141  
   142  // UpdateState is called to report a balancer state change from sub-balancer.
   143  // It's usually called by the balancer group.
   144  //
   145  // It calls parent ClientConn's UpdateState with the new aggregated state.
   146  func (bsa *balancerStateAggregator) UpdateState(id string, state balancer.State) {
   147  	bsa.logger.Infof("State update from sub-balancer %q: %+v", id, state)
   148  
   149  	bsa.mu.Lock()
   150  	defer bsa.mu.Unlock()
   151  	pickerSt, ok := bsa.idToPickerState[id]
   152  	if !ok {
   153  		// All state starts with an entry in pickStateMap. If ID is not in map,
   154  		// it's either removed, or never existed.
   155  		return
   156  	}
   157  	if !(pickerSt.state.ConnectivityState == connectivity.TransientFailure && state.ConnectivityState == connectivity.Connecting) {
   158  		// If old state is TransientFailure, and new state is Connecting, don't
   159  		// update the state, to prevent the aggregated state from being always
   160  		// CONNECTING. Otherwise, stateToAggregate is the same as
   161  		// state.ConnectivityState.
   162  		bsa.csEval.RecordTransition(pickerSt.stateToAggregate, state.ConnectivityState)
   163  		pickerSt.stateToAggregate = state.ConnectivityState
   164  	}
   165  	pickerSt.state = state
   166  	bsa.buildAndUpdateLocked()
   167  }
   168  
   169  // buildAndUpdateLocked combines the sub-state from each sub-balancer into one
   170  // state, and sends a picker update to the parent ClientConn.
   171  func (bsa *balancerStateAggregator) buildAndUpdateLocked() {
   172  	if bsa.closed {
   173  		return
   174  	}
   175  	if bsa.pauseUpdateState {
   176  		// If updates are paused, do not call UpdateState, but remember that we
   177  		// need to call it when they are resumed.
   178  		bsa.needUpdateStateOnResume = true
   179  		return
   180  	}
   181  	bsa.cc.UpdateState(bsa.buildLocked())
   182  }
   183  
   184  // buildLocked combines sub-states into one.
   185  func (bsa *balancerStateAggregator) buildLocked() balancer.State {
   186  	// The picker's return error might not be consistent with the
   187  	// aggregatedState. Because for this LB policy, we want to always build
   188  	// picker with all sub-pickers (not only ready sub-pickers), so even if the
   189  	// overall state is Ready, pick for certain RPCs can behave like Connecting
   190  	// or TransientFailure.
   191  	bsa.logger.Infof("Child pickers: %+v", bsa.idToPickerState)
   192  	return balancer.State{
   193  		ConnectivityState: bsa.csEval.CurrentState(),
   194  		Picker:            newPickerGroup(bsa.idToPickerState),
   195  	}
   196  }