google.golang.org/grpc@v1.62.1/xds/internal/balancer/priority/balancer.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package priority implements the priority balancer.
    20  //
    21  // This balancer will be kept in internal until we use it in the xds balancers,
    22  // and are confident its functionalities are stable. It will then be exported
    23  // for more users.
    24  package priority
    25  
    26  import (
    27  	"encoding/json"
    28  	"fmt"
    29  	"sync"
    30  	"time"
    31  
    32  	"google.golang.org/grpc/balancer"
    33  	"google.golang.org/grpc/balancer/base"
    34  	"google.golang.org/grpc/connectivity"
    35  	"google.golang.org/grpc/internal/balancergroup"
    36  	"google.golang.org/grpc/internal/buffer"
    37  	"google.golang.org/grpc/internal/grpclog"
    38  	"google.golang.org/grpc/internal/grpcsync"
    39  	"google.golang.org/grpc/internal/hierarchy"
    40  	"google.golang.org/grpc/internal/pretty"
    41  	"google.golang.org/grpc/resolver"
    42  	"google.golang.org/grpc/serviceconfig"
    43  )
    44  
    45  // Name is the name of the priority balancer.
    46  const Name = "priority_experimental"
    47  
    48  // DefaultSubBalancerCloseTimeout is defined as a variable instead of const for
    49  // testing.
    50  var DefaultSubBalancerCloseTimeout = 15 * time.Minute
    51  
    52  func init() {
    53  	balancer.Register(bb{})
    54  }
    55  
    56  type bb struct{}
    57  
    58  func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer {
    59  	b := &priorityBalancer{
    60  		cc:                       cc,
    61  		done:                     grpcsync.NewEvent(),
    62  		children:                 make(map[string]*childBalancer),
    63  		childBalancerStateUpdate: buffer.NewUnbounded(),
    64  	}
    65  
    66  	b.logger = prefixLogger(b)
    67  	b.bg = balancergroup.New(balancergroup.Options{
    68  		CC:                      cc,
    69  		BuildOpts:               bOpts,
    70  		StateAggregator:         b,
    71  		Logger:                  b.logger,
    72  		SubBalancerCloseTimeout: DefaultSubBalancerCloseTimeout,
    73  	})
    74  	b.bg.Start()
    75  	go b.run()
    76  	b.logger.Infof("Created")
    77  	return b
    78  }
    79  
    80  func (b bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
    81  	return parseConfig(s)
    82  }
    83  
    84  func (bb) Name() string {
    85  	return Name
    86  }
    87  
    88  // timerWrapper wraps a timer with a boolean. So that when a race happens
    89  // between AfterFunc and Stop, the func is guaranteed to not execute.
    90  type timerWrapper struct {
    91  	stopped bool
    92  	timer   *time.Timer
    93  }
    94  
    95  type priorityBalancer struct {
    96  	logger                   *grpclog.PrefixLogger
    97  	cc                       balancer.ClientConn
    98  	bg                       *balancergroup.BalancerGroup
    99  	done                     *grpcsync.Event
   100  	childBalancerStateUpdate *buffer.Unbounded
   101  
   102  	mu         sync.Mutex
   103  	childInUse string
   104  	// priorities is a list of child names from higher to lower priority.
   105  	priorities []string
   106  	// children is a map from child name to sub-balancers.
   107  	children map[string]*childBalancer
   108  
   109  	// Set during UpdateClientConnState when calling into sub-balancers.
   110  	// Prevents child updates from recomputing the active priority or sending
   111  	// an update of the aggregated picker to the parent.  Cleared after all
   112  	// sub-balancers have finished UpdateClientConnState, after which
   113  	// syncPriority is called manually.
   114  	inhibitPickerUpdates bool
   115  }
   116  
   117  func (b *priorityBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
   118  	b.logger.Debugf("Received an update with balancer config: %+v", pretty.ToJSON(s.BalancerConfig))
   119  	newConfig, ok := s.BalancerConfig.(*LBConfig)
   120  	if !ok {
   121  		return fmt.Errorf("unexpected balancer config with type: %T", s.BalancerConfig)
   122  	}
   123  	addressesSplit := hierarchy.Group(s.ResolverState.Addresses)
   124  
   125  	b.mu.Lock()
   126  	// Create and remove children, since we know all children from the config
   127  	// are used by some priority.
   128  	for name, newSubConfig := range newConfig.Children {
   129  		bb := balancer.Get(newSubConfig.Config.Name)
   130  		if bb == nil {
   131  			b.logger.Errorf("balancer name %v from config is not registered", newSubConfig.Config.Name)
   132  			continue
   133  		}
   134  
   135  		currentChild, ok := b.children[name]
   136  		if !ok {
   137  			// This is a new child, add it to the children list. But note that
   138  			// the balancer isn't built, because this child can be a low
   139  			// priority. If necessary, it will be built when syncing priorities.
   140  			cb := newChildBalancer(name, b, bb.Name(), b.cc)
   141  			cb.updateConfig(newSubConfig, resolver.State{
   142  				Addresses:     addressesSplit[name],
   143  				ServiceConfig: s.ResolverState.ServiceConfig,
   144  				Attributes:    s.ResolverState.Attributes,
   145  			})
   146  			b.children[name] = cb
   147  			continue
   148  		}
   149  
   150  		// This is not a new child. But the config/addresses could change.
   151  
   152  		// The balancing policy name is changed, close the old child. But don't
   153  		// rebuild, rebuild will happen when syncing priorities.
   154  		if currentChild.balancerName != bb.Name() {
   155  			currentChild.stop()
   156  			currentChild.updateBalancerName(bb.Name())
   157  		}
   158  
   159  		// Update config and address, but note that this doesn't send the
   160  		// updates to non-started child balancers (the child balancer might not
   161  		// be built, if it's a low priority).
   162  		currentChild.updateConfig(newSubConfig, resolver.State{
   163  			Addresses:     addressesSplit[name],
   164  			ServiceConfig: s.ResolverState.ServiceConfig,
   165  			Attributes:    s.ResolverState.Attributes,
   166  		})
   167  	}
   168  	// Cleanup resources used by children removed from the config.
   169  	for name, oldChild := range b.children {
   170  		if _, ok := newConfig.Children[name]; !ok {
   171  			oldChild.stop()
   172  			delete(b.children, name)
   173  		}
   174  	}
   175  
   176  	// Update priorities and handle priority changes.
   177  	b.priorities = newConfig.Priorities
   178  
   179  	// Everything was removed by the update.
   180  	if len(b.priorities) == 0 {
   181  		b.childInUse = ""
   182  		b.cc.UpdateState(balancer.State{
   183  			ConnectivityState: connectivity.TransientFailure,
   184  			Picker:            base.NewErrPicker(ErrAllPrioritiesRemoved),
   185  		})
   186  		b.mu.Unlock()
   187  		return nil
   188  	}
   189  
   190  	// This will sync the states of all children to the new updated
   191  	// priorities. Includes starting/stopping child balancers when necessary.
   192  	// Block picker updates until all children have had a chance to call
   193  	// UpdateState to prevent races where, e.g., the active priority reports
   194  	// transient failure but a higher priority may have reported something that
   195  	// made it active, and if the transient failure update is handled first,
   196  	// RPCs could fail.
   197  	b.inhibitPickerUpdates = true
   198  	// Add an item to queue to notify us when the current items in the queue
   199  	// are done and syncPriority has been called.
   200  	done := make(chan struct{})
   201  	b.childBalancerStateUpdate.Put(resumePickerUpdates{done: done})
   202  	b.mu.Unlock()
   203  	<-done
   204  
   205  	return nil
   206  }
   207  
   208  func (b *priorityBalancer) ResolverError(err error) {
   209  	b.bg.ResolverError(err)
   210  }
   211  
   212  func (b *priorityBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
   213  	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
   214  }
   215  
   216  func (b *priorityBalancer) Close() {
   217  	b.bg.Close()
   218  	b.childBalancerStateUpdate.Close()
   219  
   220  	b.mu.Lock()
   221  	defer b.mu.Unlock()
   222  	b.done.Fire()
   223  	// Clear states of the current child in use, so if there's a race in picker
   224  	// update, it will be dropped.
   225  	b.childInUse = ""
   226  	// Stop the child policies, this is necessary to stop the init timers in the
   227  	// children.
   228  	for _, child := range b.children {
   229  		child.stop()
   230  	}
   231  }
   232  
   233  func (b *priorityBalancer) ExitIdle() {
   234  	b.bg.ExitIdle()
   235  }
   236  
   237  // UpdateState implements balancergroup.BalancerStateAggregator interface. The
   238  // balancer group sends new connectivity state and picker here.
   239  func (b *priorityBalancer) UpdateState(childName string, state balancer.State) {
   240  	b.childBalancerStateUpdate.Put(childBalancerState{
   241  		name: childName,
   242  		s:    state,
   243  	})
   244  }
   245  
   246  type childBalancerState struct {
   247  	name string
   248  	s    balancer.State
   249  }
   250  
   251  type resumePickerUpdates struct {
   252  	done chan struct{}
   253  }
   254  
   255  // run handles child update in a separate goroutine, so if the child sends
   256  // updates inline (when called by parent), it won't cause deadlocks (by trying
   257  // to hold the same mutex).
   258  func (b *priorityBalancer) run() {
   259  	for {
   260  		select {
   261  		case u, ok := <-b.childBalancerStateUpdate.Get():
   262  			if !ok {
   263  				return
   264  			}
   265  			b.childBalancerStateUpdate.Load()
   266  			// Needs to handle state update in a goroutine, because each state
   267  			// update needs to start/close child policy, could result in
   268  			// deadlock.
   269  			b.mu.Lock()
   270  			if b.done.HasFired() {
   271  				return
   272  			}
   273  			switch s := u.(type) {
   274  			case childBalancerState:
   275  				b.handleChildStateUpdate(s.name, s.s)
   276  			case resumePickerUpdates:
   277  				b.inhibitPickerUpdates = false
   278  				b.syncPriority(b.childInUse)
   279  				close(s.done)
   280  			}
   281  			b.mu.Unlock()
   282  		case <-b.done.Done():
   283  			return
   284  		}
   285  	}
   286  }