gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/balancer/rls/internal/control_channel.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package rls
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"time"
    25  
    26  	grpc "gitee.com/ks-custle/core-gm/grpc"
    27  	"gitee.com/ks-custle/core-gm/grpc/balancer"
    28  	"gitee.com/ks-custle/core-gm/grpc/balancer/rls/internal/adaptive"
    29  	"gitee.com/ks-custle/core-gm/grpc/connectivity"
    30  	"gitee.com/ks-custle/core-gm/grpc/internal"
    31  	internalgrpclog "gitee.com/ks-custle/core-gm/grpc/internal/grpclog"
    32  	"gitee.com/ks-custle/core-gm/grpc/internal/pretty"
    33  	rlsgrpc "gitee.com/ks-custle/core-gm/grpc/internal/proto/grpc_lookup_v1"
    34  	rlspb "gitee.com/ks-custle/core-gm/grpc/internal/proto/grpc_lookup_v1"
    35  )
    36  
    37  var newAdaptiveThrottler = func() adaptiveThrottler { return adaptive.New() }
    38  
    39  type adaptiveThrottler interface {
    40  	ShouldThrottle() bool
    41  	RegisterBackendResponse(throttled bool)
    42  }
    43  
    44  // controlChannel is a wrapper around the gRPC channel to the RLS server
    45  // specified in the service config.
    46  type controlChannel struct {
    47  	// rpcTimeout specifies the timeout for the RouteLookup RPC call. The LB
    48  	// policy receives this value in its service config.
    49  	rpcTimeout time.Duration
    50  	// backToReadyCh is the channel on which an update is pushed when the
    51  	// connectivity state changes from READY --> TRANSIENT_FAILURE --> READY.
    52  	backToReadyCh chan struct{}
    53  	// throttler in an adaptive throttling implementation used to avoid
    54  	// hammering the RLS service while it is overloaded or down.
    55  	throttler adaptiveThrottler
    56  
    57  	cc     *grpc.ClientConn
    58  	client rlsgrpc.RouteLookupServiceClient
    59  	logger *internalgrpclog.PrefixLogger
    60  }
    61  
    62  func newControlChannel(rlsServerName string, rpcTimeout time.Duration, bOpts balancer.BuildOptions, backToReadyCh chan struct{}) (*controlChannel, error) {
    63  	ctrlCh := &controlChannel{
    64  		rpcTimeout:    rpcTimeout,
    65  		backToReadyCh: backToReadyCh,
    66  		throttler:     newAdaptiveThrottler(),
    67  	}
    68  	ctrlCh.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-control-channel %p] ", ctrlCh))
    69  
    70  	dopts, err := ctrlCh.dialOpts(bOpts)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	ctrlCh.cc, err = grpc.Dial(rlsServerName, dopts...)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  	ctrlCh.client = rlsgrpc.NewRouteLookupServiceClient(ctrlCh.cc)
    79  	ctrlCh.logger.Infof("Control channel created to RLS server at: %v", rlsServerName)
    80  
    81  	go ctrlCh.monitorConnectivityState()
    82  	return ctrlCh, nil
    83  }
    84  
    85  // dialOpts constructs the dial options for the control plane channel.
    86  func (cc *controlChannel) dialOpts(bOpts balancer.BuildOptions) ([]grpc.DialOption, error) {
    87  	// The control plane channel will use the same authority as the parent
    88  	// channel for server authorization. This ensures that the identity of the
    89  	// RLS server and the identity of the backends is the same, so if the RLS
    90  	// config is injected by an attacker, it cannot cause leakage of private
    91  	// information contained in headers set by the application.
    92  	dopts := []grpc.DialOption{grpc.WithAuthority(bOpts.Authority)}
    93  	if bOpts.Dialer != nil {
    94  		dopts = append(dopts, grpc.WithContextDialer(bOpts.Dialer))
    95  	}
    96  
    97  	// The control channel will use the channel credentials from the parent
    98  	// channel, including any call creds associated with the channel creds.
    99  	var credsOpt grpc.DialOption
   100  	switch {
   101  	case bOpts.DialCreds != nil:
   102  		credsOpt = grpc.WithTransportCredentials(bOpts.DialCreds.Clone())
   103  	case bOpts.CredsBundle != nil:
   104  		// The "fallback" mode in google default credentials (which is the only
   105  		// type of credentials we expect to be used with RLS) uses TLS/ALTS
   106  		// creds for transport and uses the same call creds as that on the
   107  		// parent bundle.
   108  		bundle, err := bOpts.CredsBundle.NewWithMode(internal.CredsBundleModeFallback)
   109  		if err != nil {
   110  			return nil, err
   111  		}
   112  		credsOpt = grpc.WithCredentialsBundle(bundle)
   113  	default:
   114  		cc.logger.Warningf("no credentials available, using Insecure")
   115  		credsOpt = grpc.WithInsecure()
   116  	}
   117  	return append(dopts, credsOpt), nil
   118  }
   119  
   120  func (cc *controlChannel) monitorConnectivityState() {
   121  	cc.logger.Infof("Starting connectivity state monitoring goroutine")
   122  	// Since we use two mechanisms to deal with RLS server being down:
   123  	//   - adaptive throttling for the channel as a whole
   124  	//   - exponential backoff on a per-request basis
   125  	// we need a way to avoid double-penalizing requests by counting failures
   126  	// toward both mechanisms when the RLS server is unreachable.
   127  	//
   128  	// To accomplish this, we monitor the state of the control plane channel. If
   129  	// the state has been TRANSIENT_FAILURE since the last time it was in state
   130  	// READY, and it then transitions into state READY, we push on a channel
   131  	// which is being read by the LB policy.
   132  	//
   133  	// The LB the policy will iterate through the cache to reset the backoff
   134  	// timeouts in all cache entries. Specifically, this means that it will
   135  	// reset the backoff state and cancel the pending backoff timer. Note that
   136  	// when cancelling the backoff timer, just like when the backoff timer fires
   137  	// normally, a new picker is returned to the channel, to force it to
   138  	// re-process any wait-for-ready RPCs that may still be queued if we failed
   139  	// them while we were in backoff. However, we should optimize this case by
   140  	// returning only one new picker, regardless of how many backoff timers are
   141  	// cancelled.
   142  
   143  	// Using the background context is fine here since we check for the ClientConn
   144  	// entering SHUTDOWN and return early in that case.
   145  	ctx := context.Background()
   146  
   147  	first := true
   148  	for {
   149  		// Wait for the control channel to become READY.
   150  		for s := cc.cc.GetState(); s != connectivity.Ready; s = cc.cc.GetState() {
   151  			if s == connectivity.Shutdown {
   152  				return
   153  			}
   154  			cc.cc.WaitForStateChange(ctx, s)
   155  		}
   156  		cc.logger.Infof("Connectivity state is READY")
   157  
   158  		if !first {
   159  			cc.logger.Infof("Control channel back to READY")
   160  			cc.backToReadyCh <- struct{}{}
   161  		}
   162  		first = false
   163  
   164  		// Wait for the control channel to move out of READY.
   165  		cc.cc.WaitForStateChange(ctx, connectivity.Ready)
   166  		if cc.cc.GetState() == connectivity.Shutdown {
   167  			return
   168  		}
   169  		cc.logger.Infof("Connectivity state is %s", cc.cc.GetState())
   170  	}
   171  }
   172  
   173  func (cc *controlChannel) close() {
   174  	cc.logger.Infof("Closing control channel")
   175  	cc.cc.Close()
   176  }
   177  
   178  type lookupCallback func(targets []string, headerData string, err error)
   179  
   180  // lookup starts a RouteLookup RPC in a separate goroutine and returns the
   181  // results (and error, if any) in the provided callback.
   182  //
   183  // The returned boolean indicates whether the request was throttled by the
   184  // client-side adaptive throttling algorithm in which case the provided callback
   185  // will not be invoked.
   186  func (cc *controlChannel) lookup(reqKeys map[string]string, reason rlspb.RouteLookupRequest_Reason, staleHeaders string, cb lookupCallback) (throttled bool) {
   187  	if cc.throttler.ShouldThrottle() {
   188  		cc.logger.Infof("RLS request throttled by client-side adaptive throttling")
   189  		return true
   190  	}
   191  	go func() {
   192  		req := &rlspb.RouteLookupRequest{
   193  			TargetType:      "grpc",
   194  			KeyMap:          reqKeys,
   195  			Reason:          reason,
   196  			StaleHeaderData: staleHeaders,
   197  		}
   198  		cc.logger.Infof("Sending RLS request %+v", pretty.ToJSON(req))
   199  
   200  		ctx, cancel := context.WithTimeout(context.Background(), cc.rpcTimeout)
   201  		defer cancel()
   202  		resp, err := cc.client.RouteLookup(ctx, req)
   203  		cb(resp.GetTargets(), resp.GetHeaderData(), err)
   204  	}()
   205  	return false
   206  }