gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/balancer/grpclb/grpclb.go (about)

     1  /*
     2   *
     3   * Copyright 2016 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // package grpclb defines a grpclb balancer.
    20  //
    21  // To install grpclb balancer, import this package as:
    22  //
    23  //	import _ "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb"
    24  package grpclb
    25  
    26  import (
    27  	"context"
    28  	"errors"
    29  	"fmt"
    30  	"sync"
    31  	"time"
    32  
    33  	grpc "gitee.com/ks-custle/core-gm/grpc"
    34  	"gitee.com/ks-custle/core-gm/grpc/balancer"
    35  	grpclbstate "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb/state"
    36  	"gitee.com/ks-custle/core-gm/grpc/connectivity"
    37  	"gitee.com/ks-custle/core-gm/grpc/credentials"
    38  	"gitee.com/ks-custle/core-gm/grpc/grpclog"
    39  	"gitee.com/ks-custle/core-gm/grpc/internal"
    40  	"gitee.com/ks-custle/core-gm/grpc/internal/backoff"
    41  	"gitee.com/ks-custle/core-gm/grpc/internal/resolver/dns"
    42  	"gitee.com/ks-custle/core-gm/grpc/resolver"
    43  
    44  	lbpb "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb/grpc_lb_v1"
    45  	durationpb "github.com/golang/protobuf/ptypes/duration"
    46  )
    47  
    48  const (
    49  	lbTokenKey             = "lb-token"
    50  	defaultFallbackTimeout = 10 * time.Second
    51  	grpclbName             = "grpclb"
    52  )
    53  
    54  var errServerTerminatedConnection = errors.New("grpclb: failed to recv server list: server terminated connection")
    55  var logger = grpclog.Component("grpclb")
    56  
    57  func convertDuration(d *durationpb.Duration) time.Duration {
    58  	if d == nil {
    59  		return 0
    60  	}
    61  	return time.Duration(d.Seconds)*time.Second + time.Duration(d.Nanos)*time.Nanosecond
    62  }
    63  
    64  // Client API for LoadBalancer service.
    65  // Mostly copied from generated pb.go file.
    66  // To avoid circular dependency.
    67  type loadBalancerClient struct {
    68  	cc *grpc.ClientConn
    69  }
    70  
    71  func (c *loadBalancerClient) BalanceLoad(ctx context.Context, opts ...grpc.CallOption) (*balanceLoadClientStream, error) {
    72  	desc := &grpc.StreamDesc{
    73  		StreamName:    "BalanceLoad",
    74  		ServerStreams: true,
    75  		ClientStreams: true,
    76  	}
    77  	stream, err := c.cc.NewStream(ctx, desc, "/grpc.lb.v1.LoadBalancer/BalanceLoad", opts...)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	x := &balanceLoadClientStream{stream}
    82  	return x, nil
    83  }
    84  
    85  type balanceLoadClientStream struct {
    86  	grpc.ClientStream
    87  }
    88  
    89  func (x *balanceLoadClientStream) Send(m *lbpb.LoadBalanceRequest) error {
    90  	return x.ClientStream.SendMsg(m)
    91  }
    92  
    93  func (x *balanceLoadClientStream) Recv() (*lbpb.LoadBalanceResponse, error) {
    94  	m := new(lbpb.LoadBalanceResponse)
    95  	if err := x.ClientStream.RecvMsg(m); err != nil {
    96  		return nil, err
    97  	}
    98  	return m, nil
    99  }
   100  
   101  func init() {
   102  	balancer.Register(newLBBuilder())
   103  	dns.EnableSRVLookups = true
   104  }
   105  
   106  // newLBBuilder creates a builder for grpclb.
   107  func newLBBuilder() balancer.Builder {
   108  	return newLBBuilderWithFallbackTimeout(defaultFallbackTimeout)
   109  }
   110  
   111  // newLBBuilderWithFallbackTimeout creates a grpclb builder with the given
   112  // fallbackTimeout. If no response is received from the remote balancer within
   113  // fallbackTimeout, the backend addresses from the resolved address list will be
   114  // used.
   115  //
   116  // Only call this function when a non-default fallback timeout is needed.
   117  func newLBBuilderWithFallbackTimeout(fallbackTimeout time.Duration) balancer.Builder {
   118  	return &lbBuilder{
   119  		fallbackTimeout: fallbackTimeout,
   120  	}
   121  }
   122  
   123  type lbBuilder struct {
   124  	fallbackTimeout time.Duration
   125  }
   126  
   127  func (b *lbBuilder) Name() string {
   128  	return grpclbName
   129  }
   130  
   131  func (b *lbBuilder) Build(cc balancer.ClientConn, opt balancer.BuildOptions) balancer.Balancer {
   132  	// This generates a manual resolver builder with a fixed scheme. This
   133  	// scheme will be used to dial to remote LB, so we can send filtered
   134  	// address updates to remote LB ClientConn using this manual resolver.
   135  	r := &lbManualResolver{scheme: "grpclb-internal", ccb: cc}
   136  
   137  	lb := &lbBalancer{
   138  		cc: newLBCacheClientConn(cc),
   139  		// Endpoint is deprecated, use GetEndpoint() instead.
   140  		//dialTarget:      opt.Target.Endpoint,
   141  		//target:          opt.Target.Endpoint,
   142  		dialTarget:      opt.Target.GetEndpoint(),
   143  		target:          opt.Target.GetEndpoint(),
   144  		opt:             opt,
   145  		fallbackTimeout: b.fallbackTimeout,
   146  		doneCh:          make(chan struct{}),
   147  
   148  		manualResolver: r,
   149  		subConns:       make(map[resolver.Address]balancer.SubConn),
   150  		scStates:       make(map[balancer.SubConn]connectivity.State),
   151  		picker:         &errPicker{err: balancer.ErrNoSubConnAvailable},
   152  		clientStats:    newRPCStats(),
   153  		backoff:        backoff.DefaultExponential, // TODO: make backoff configurable.
   154  	}
   155  
   156  	var err error
   157  	if opt.CredsBundle != nil {
   158  		lb.grpclbClientConnCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBalancer)
   159  		if err != nil {
   160  			logger.Warningf("lbBalancer: client connection creds NewWithMode failed: %v", err)
   161  		}
   162  		lb.grpclbBackendCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBackendFromBalancer)
   163  		if err != nil {
   164  			logger.Warningf("lbBalancer: backend creds NewWithMode failed: %v", err)
   165  		}
   166  	}
   167  
   168  	return lb
   169  }
   170  
   171  type lbBalancer struct {
   172  	cc         *lbCacheClientConn
   173  	dialTarget string // user's dial target
   174  	target     string // same as dialTarget unless overridden in service config
   175  	opt        balancer.BuildOptions
   176  
   177  	usePickFirst bool
   178  
   179  	// grpclbClientConnCreds is the creds bundle to be used to connect to grpclb
   180  	// servers. If it's nil, use the TransportCredentials from BuildOptions
   181  	// instead.
   182  	grpclbClientConnCreds credentials.Bundle
   183  	// grpclbBackendCreds is the creds bundle to be used for addresses that are
   184  	// returned by grpclb server. If it's nil, don't set anything when creating
   185  	// SubConns.
   186  	grpclbBackendCreds credentials.Bundle
   187  
   188  	fallbackTimeout time.Duration
   189  	doneCh          chan struct{}
   190  
   191  	// manualResolver is used in the remote LB ClientConn inside grpclb. When
   192  	// resolved address updates are received by grpclb, filtered updates will be
   193  	// send to remote LB ClientConn through this resolver.
   194  	manualResolver *lbManualResolver
   195  	// The ClientConn to talk to the remote balancer.
   196  	ccRemoteLB *remoteBalancerCCWrapper
   197  	// backoff for calling remote balancer.
   198  	backoff backoff.Strategy
   199  
   200  	// Support client side load reporting. Each picker gets a reference to this,
   201  	// and will update its content.
   202  	clientStats *rpcStats
   203  
   204  	mu sync.Mutex // guards everything following.
   205  	// The full server list including drops, used to check if the newly received
   206  	// serverList contains anything new. Each generate picker will also have
   207  	// reference to this list to do the first layer pick.
   208  	fullServerList []*lbpb.Server
   209  	// Backend addresses. It's kept so the addresses are available when
   210  	// switching between round_robin and pickfirst.
   211  	backendAddrs []resolver.Address
   212  	// All backends addresses, with metadata set to nil. This list contains all
   213  	// backend addresses in the same order and with the same duplicates as in
   214  	// serverlist. When generating picker, a SubConn slice with the same order
   215  	// but with only READY SCs will be gerenated.
   216  	backendAddrsWithoutMetadata []resolver.Address
   217  	// Roundrobin functionalities.
   218  	state    connectivity.State
   219  	subConns map[resolver.Address]balancer.SubConn   // Used to new/remove SubConn.
   220  	scStates map[balancer.SubConn]connectivity.State // Used to filter READY SubConns.
   221  	picker   balancer.Picker
   222  	// Support fallback to resolved backend addresses if there's no response
   223  	// from remote balancer within fallbackTimeout.
   224  	remoteBalancerConnected bool
   225  	serverListReceived      bool
   226  	inFallback              bool
   227  	// resolvedBackendAddrs is resolvedAddrs minus remote balancers. It's set
   228  	// when resolved address updates are received, and read in the goroutine
   229  	// handling fallback.
   230  	resolvedBackendAddrs []resolver.Address
   231  	connErr              error // the last connection error
   232  }
   233  
   234  // regeneratePicker takes a snapshot of the balancer, and generates a picker from
   235  // it. The picker
   236  //   - always returns ErrTransientFailure if the balancer is in TransientFailure,
   237  //   - does two layer roundrobin pick otherwise.
   238  //
   239  // Caller must hold lb.mu.
   240  func (lb *lbBalancer) regeneratePicker(resetDrop bool) {
   241  	if lb.state == connectivity.TransientFailure {
   242  		lb.picker = &errPicker{err: fmt.Errorf("all SubConns are in TransientFailure, last connection error: %v", lb.connErr)}
   243  		return
   244  	}
   245  
   246  	if lb.state == connectivity.Connecting {
   247  		lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable}
   248  		return
   249  	}
   250  
   251  	var readySCs []balancer.SubConn
   252  	if lb.usePickFirst {
   253  		for _, sc := range lb.subConns {
   254  			readySCs = append(readySCs, sc)
   255  			break
   256  		}
   257  	} else {
   258  		for _, a := range lb.backendAddrsWithoutMetadata {
   259  			if sc, ok := lb.subConns[a]; ok {
   260  				if st, ok := lb.scStates[sc]; ok && st == connectivity.Ready {
   261  					readySCs = append(readySCs, sc)
   262  				}
   263  			}
   264  		}
   265  	}
   266  
   267  	if len(readySCs) <= 0 {
   268  		// If there's no ready SubConns, always re-pick. This is to avoid drops
   269  		// unless at least one SubConn is ready. Otherwise we may drop more
   270  		// often than want because of drops + re-picks(which become re-drops).
   271  		//
   272  		// This doesn't seem to be necessary after the connecting check above.
   273  		// Kept for safety.
   274  		lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable}
   275  		return
   276  	}
   277  	if lb.inFallback {
   278  		lb.picker = newRRPicker(readySCs)
   279  		return
   280  	}
   281  	if resetDrop {
   282  		lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats)
   283  		return
   284  	}
   285  	prevLBPicker, ok := lb.picker.(*lbPicker)
   286  	if !ok {
   287  		lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats)
   288  		return
   289  	}
   290  	prevLBPicker.updateReadySCs(readySCs)
   291  }
   292  
   293  // aggregateSubConnStats calculate the aggregated state of SubConns in
   294  // lb.SubConns. These SubConns are subconns in use (when switching between
   295  // fallback and grpclb). lb.scState contains states for all SubConns, including
   296  // those in cache (SubConns are cached for 10 seconds after remove).
   297  //
   298  // The aggregated state is:
   299  //   - If at least one SubConn in Ready, the aggregated state is Ready;
   300  //   - Else if at least one SubConn in Connecting or IDLE, the aggregated state is Connecting;
   301  //   - It's OK to consider IDLE as Connecting. SubConns never stay in IDLE,
   302  //     they start to connect immediately. But there's a race between the overall
   303  //     state is reported, and when the new SubConn state arrives. And SubConns
   304  //     never go back to IDLE.
   305  //   - Else the aggregated state is TransientFailure.
   306  func (lb *lbBalancer) aggregateSubConnStates() connectivity.State {
   307  	var numConnecting uint64
   308  
   309  	for _, sc := range lb.subConns {
   310  		if state, ok := lb.scStates[sc]; ok {
   311  			switch state {
   312  			case connectivity.Ready:
   313  				return connectivity.Ready
   314  			case connectivity.Connecting, connectivity.Idle:
   315  				numConnecting++
   316  			}
   317  		}
   318  	}
   319  	if numConnecting > 0 {
   320  		return connectivity.Connecting
   321  	}
   322  	return connectivity.TransientFailure
   323  }
   324  
   325  func (lb *lbBalancer) UpdateSubConnState(sc balancer.SubConn, scs balancer.SubConnState) {
   326  	s := scs.ConnectivityState
   327  	if logger.V(2) {
   328  		logger.Infof("lbBalancer: handle SubConn state change: %p, %v", sc, s)
   329  	}
   330  	lb.mu.Lock()
   331  	defer lb.mu.Unlock()
   332  
   333  	oldS, ok := lb.scStates[sc]
   334  	if !ok {
   335  		if logger.V(2) {
   336  			logger.Infof("lbBalancer: got state changes for an unknown SubConn: %p, %v", sc, s)
   337  		}
   338  		return
   339  	}
   340  	lb.scStates[sc] = s
   341  	switch s {
   342  	case connectivity.Idle:
   343  		sc.Connect()
   344  	case connectivity.Shutdown:
   345  		// When an address was removed by resolver, b called RemoveSubConn but
   346  		// kept the sc's state in scStates. Remove state for this sc here.
   347  		delete(lb.scStates, sc)
   348  	case connectivity.TransientFailure:
   349  		lb.connErr = scs.ConnectionError
   350  	}
   351  	// Force regenerate picker if
   352  	//  - this sc became ready from not-ready
   353  	//  - this sc became not-ready from ready
   354  	lb.updateStateAndPicker((oldS == connectivity.Ready) != (s == connectivity.Ready), false)
   355  
   356  	// Enter fallback when the aggregated state is not Ready and the connection
   357  	// to remote balancer is lost.
   358  	if lb.state != connectivity.Ready {
   359  		if !lb.inFallback && !lb.remoteBalancerConnected {
   360  			// Enter fallback.
   361  			lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst)
   362  		}
   363  	}
   364  }
   365  
   366  // updateStateAndPicker re-calculate the aggregated state, and regenerate picker
   367  // if overall state is changed.
   368  //
   369  // If forceRegeneratePicker is true, picker will be regenerated.
   370  func (lb *lbBalancer) updateStateAndPicker(forceRegeneratePicker bool, resetDrop bool) {
   371  	oldAggrState := lb.state
   372  	lb.state = lb.aggregateSubConnStates()
   373  	// Regenerate picker when one of the following happens:
   374  	//  - caller wants to regenerate
   375  	//  - the aggregated state changed
   376  	if forceRegeneratePicker || (lb.state != oldAggrState) {
   377  		lb.regeneratePicker(resetDrop)
   378  	}
   379  
   380  	lb.cc.UpdateState(balancer.State{ConnectivityState: lb.state, Picker: lb.picker})
   381  }
   382  
   383  // fallbackToBackendsAfter blocks for fallbackTimeout and falls back to use
   384  // resolved backends (backends received from resolver, not from remote balancer)
   385  // if no connection to remote balancers was successful.
   386  func (lb *lbBalancer) fallbackToBackendsAfter(fallbackTimeout time.Duration) {
   387  	timer := time.NewTimer(fallbackTimeout)
   388  	defer timer.Stop()
   389  	select {
   390  	case <-timer.C:
   391  	case <-lb.doneCh:
   392  		return
   393  	}
   394  	lb.mu.Lock()
   395  	if lb.inFallback || lb.serverListReceived {
   396  		lb.mu.Unlock()
   397  		return
   398  	}
   399  	// Enter fallback.
   400  	lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst)
   401  	lb.mu.Unlock()
   402  }
   403  
   404  func (lb *lbBalancer) handleServiceConfig(gc *grpclbServiceConfig) {
   405  	lb.mu.Lock()
   406  	defer lb.mu.Unlock()
   407  
   408  	// grpclb uses the user's dial target to populate the `Name` field of the
   409  	// `InitialLoadBalanceRequest` message sent to the remote balancer. But when
   410  	// grpclb is used a child policy in the context of RLS, we want the `Name`
   411  	// field to be populated with the value received from the RLS server. To
   412  	// support this use case, an optional "target_name" field has been added to
   413  	// the grpclb LB policy's config.  If specified, it overrides the name of
   414  	// the target to be sent to the remote balancer; if not, the target to be
   415  	// sent to the balancer will continue to be obtained from the target URI
   416  	// passed to the gRPC client channel. Whenever that target to be sent to the
   417  	// balancer is updated, we need to restart the stream to the balancer as
   418  	// this target is sent in the first message on the stream.
   419  	if gc != nil {
   420  		target := lb.dialTarget
   421  		if gc.TargetName != "" {
   422  			target = gc.TargetName
   423  		}
   424  		if target != lb.target {
   425  			lb.target = target
   426  			if lb.ccRemoteLB != nil {
   427  				lb.ccRemoteLB.cancelRemoteBalancerCall()
   428  			}
   429  		}
   430  	}
   431  
   432  	newUsePickFirst := childIsPickFirst(gc)
   433  	if lb.usePickFirst == newUsePickFirst {
   434  		return
   435  	}
   436  	if logger.V(2) {
   437  		logger.Infof("lbBalancer: switching mode, new usePickFirst: %+v", newUsePickFirst)
   438  	}
   439  	lb.refreshSubConns(lb.backendAddrs, lb.inFallback, newUsePickFirst)
   440  }
   441  
   442  func (lb *lbBalancer) ResolverError(error) {
   443  	// Ignore resolver errors.  GRPCLB is not selected unless the resolver
   444  	// works at least once.
   445  }
   446  
   447  func (lb *lbBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error {
   448  	if logger.V(2) {
   449  		logger.Infof("lbBalancer: UpdateClientConnState: %+v", ccs)
   450  	}
   451  	gc, _ := ccs.BalancerConfig.(*grpclbServiceConfig)
   452  	lb.handleServiceConfig(gc)
   453  
   454  	addrs := ccs.ResolverState.Addresses
   455  
   456  	var remoteBalancerAddrs, backendAddrs []resolver.Address
   457  	for _, a := range addrs {
   458  		if a.Type == resolver.GRPCLB {
   459  			a.Type = resolver.Backend
   460  			remoteBalancerAddrs = append(remoteBalancerAddrs, a)
   461  		} else {
   462  			backendAddrs = append(backendAddrs, a)
   463  		}
   464  	}
   465  	if sd := grpclbstate.Get(ccs.ResolverState); sd != nil {
   466  		// Override any balancer addresses provided via
   467  		// ccs.ResolverState.Addresses.
   468  		remoteBalancerAddrs = sd.BalancerAddresses
   469  	}
   470  
   471  	if len(backendAddrs)+len(remoteBalancerAddrs) == 0 {
   472  		// There should be at least one address, either grpclb server or
   473  		// fallback. Empty address is not valid.
   474  		return balancer.ErrBadResolverState
   475  	}
   476  
   477  	if len(remoteBalancerAddrs) == 0 {
   478  		if lb.ccRemoteLB != nil {
   479  			lb.ccRemoteLB.close()
   480  			lb.ccRemoteLB = nil
   481  		}
   482  	} else if lb.ccRemoteLB == nil {
   483  		// First time receiving resolved addresses, create a cc to remote
   484  		// balancers.
   485  		lb.newRemoteBalancerCCWrapper()
   486  		// Start the fallback goroutine.
   487  		go lb.fallbackToBackendsAfter(lb.fallbackTimeout)
   488  	}
   489  
   490  	if lb.ccRemoteLB != nil {
   491  		// cc to remote balancers uses lb.manualResolver. Send the updated remote
   492  		// balancer addresses to it through manualResolver.
   493  		lb.manualResolver.UpdateState(resolver.State{Addresses: remoteBalancerAddrs})
   494  	}
   495  
   496  	lb.mu.Lock()
   497  	lb.resolvedBackendAddrs = backendAddrs
   498  	if len(remoteBalancerAddrs) == 0 || lb.inFallback {
   499  		// If there's no remote balancer address in ClientConn update, grpclb
   500  		// enters fallback mode immediately.
   501  		//
   502  		// If a new update is received while grpclb is in fallback, update the
   503  		// list of backends being used to the new fallback backends.
   504  		lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst)
   505  	}
   506  	lb.mu.Unlock()
   507  	return nil
   508  }
   509  
   510  func (lb *lbBalancer) Close() {
   511  	select {
   512  	case <-lb.doneCh:
   513  		return
   514  	default:
   515  	}
   516  	close(lb.doneCh)
   517  	if lb.ccRemoteLB != nil {
   518  		lb.ccRemoteLB.close()
   519  	}
   520  	lb.cc.close()
   521  }
   522  
   523  func (lb *lbBalancer) ExitIdle() {}