google.golang.org/grpc@v1.72.2/balancer/rls/control_channel.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package rls
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"time"
    25  
    26  	"google.golang.org/grpc"
    27  	"google.golang.org/grpc/balancer"
    28  	"google.golang.org/grpc/balancer/rls/internal/adaptive"
    29  	"google.golang.org/grpc/connectivity"
    30  	"google.golang.org/grpc/credentials/insecure"
    31  	"google.golang.org/grpc/internal"
    32  	"google.golang.org/grpc/internal/buffer"
    33  	internalgrpclog "google.golang.org/grpc/internal/grpclog"
    34  	"google.golang.org/grpc/internal/grpcsync"
    35  	"google.golang.org/grpc/internal/pretty"
    36  	rlsgrpc "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    37  	rlspb "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    38  )
    39  
    40  var newAdaptiveThrottler = func() adaptiveThrottler { return adaptive.New() }
    41  
    42  type adaptiveThrottler interface {
    43  	ShouldThrottle() bool
    44  	RegisterBackendResponse(throttled bool)
    45  }
    46  
    47  // controlChannel is a wrapper around the gRPC channel to the RLS server
    48  // specified in the service config.
    49  type controlChannel struct {
    50  	// rpcTimeout specifies the timeout for the RouteLookup RPC call. The LB
    51  	// policy receives this value in its service config.
    52  	rpcTimeout time.Duration
    53  	// backToReadyFunc is a callback to be invoked when the connectivity state
    54  	// changes from READY --> TRANSIENT_FAILURE --> READY.
    55  	backToReadyFunc func()
    56  	// throttler in an adaptive throttling implementation used to avoid
    57  	// hammering the RLS service while it is overloaded or down.
    58  	throttler adaptiveThrottler
    59  
    60  	cc                  *grpc.ClientConn
    61  	client              rlsgrpc.RouteLookupServiceClient
    62  	logger              *internalgrpclog.PrefixLogger
    63  	connectivityStateCh *buffer.Unbounded
    64  	unsubscribe         func()
    65  	monitorDoneCh       chan struct{}
    66  }
    67  
    68  // newControlChannel creates a controlChannel to rlsServerName and uses
    69  // serviceConfig, if non-empty, as the default service config for the underlying
    70  // gRPC channel.
    71  func newControlChannel(rlsServerName, serviceConfig string, rpcTimeout time.Duration, bOpts balancer.BuildOptions, backToReadyFunc func()) (*controlChannel, error) {
    72  	ctrlCh := &controlChannel{
    73  		rpcTimeout:          rpcTimeout,
    74  		backToReadyFunc:     backToReadyFunc,
    75  		throttler:           newAdaptiveThrottler(),
    76  		connectivityStateCh: buffer.NewUnbounded(),
    77  		monitorDoneCh:       make(chan struct{}),
    78  	}
    79  	ctrlCh.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-control-channel %p] ", ctrlCh))
    80  
    81  	dopts, err := ctrlCh.dialOpts(bOpts, serviceConfig)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	ctrlCh.cc, err = grpc.NewClient(rlsServerName, dopts...)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  	// Subscribe to connectivity state before connecting to avoid missing initial
    90  	// updates, which are only delivered to active subscribers.
    91  	ctrlCh.unsubscribe = internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(ctrlCh.cc, ctrlCh)
    92  	ctrlCh.cc.Connect()
    93  	ctrlCh.client = rlsgrpc.NewRouteLookupServiceClient(ctrlCh.cc)
    94  	ctrlCh.logger.Infof("Control channel created to RLS server at: %v", rlsServerName)
    95  	start := make(chan struct{})
    96  	go func() {
    97  		close(start)
    98  		ctrlCh.monitorConnectivityState()
    99  	}()
   100  	<-start
   101  	return ctrlCh, nil
   102  }
   103  
   104  func (cc *controlChannel) OnMessage(msg any) {
   105  	st, ok := msg.(connectivity.State)
   106  	if !ok {
   107  		panic(fmt.Sprintf("Unexpected message type %T , wanted connectectivity.State type", msg))
   108  	}
   109  	cc.connectivityStateCh.Put(st)
   110  }
   111  
   112  // dialOpts constructs the dial options for the control plane channel.
   113  func (cc *controlChannel) dialOpts(bOpts balancer.BuildOptions, serviceConfig string) ([]grpc.DialOption, error) {
   114  	// The control plane channel will use the same authority as the parent
   115  	// channel for server authorization. This ensures that the identity of the
   116  	// RLS server and the identity of the backends is the same, so if the RLS
   117  	// config is injected by an attacker, it cannot cause leakage of private
   118  	// information contained in headers set by the application.
   119  	dopts := []grpc.DialOption{grpc.WithAuthority(bOpts.Authority)}
   120  	if bOpts.Dialer != nil {
   121  		dopts = append(dopts, grpc.WithContextDialer(bOpts.Dialer))
   122  	}
   123  	// The control channel will use the channel credentials from the parent
   124  	// channel, including any call creds associated with the channel creds.
   125  	var credsOpt grpc.DialOption
   126  	switch {
   127  	case bOpts.DialCreds != nil:
   128  		credsOpt = grpc.WithTransportCredentials(bOpts.DialCreds.Clone())
   129  	case bOpts.CredsBundle != nil:
   130  		// The "fallback" mode in google default credentials (which is the only
   131  		// type of credentials we expect to be used with RLS) uses TLS/ALTS
   132  		// creds for transport and uses the same call creds as that on the
   133  		// parent bundle.
   134  		bundle, err := bOpts.CredsBundle.NewWithMode(internal.CredsBundleModeFallback)
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  		credsOpt = grpc.WithCredentialsBundle(bundle)
   139  	default:
   140  		cc.logger.Warningf("no credentials available, using Insecure")
   141  		credsOpt = grpc.WithTransportCredentials(insecure.NewCredentials())
   142  	}
   143  	dopts = append(dopts, credsOpt)
   144  
   145  	// If the RLS LB policy's configuration specified a service config for the
   146  	// control channel, use that and disable service config fetching via the name
   147  	// resolver for the control channel.
   148  	if serviceConfig != "" {
   149  		cc.logger.Infof("Disabling service config from the name resolver and instead using: %s", serviceConfig)
   150  		dopts = append(dopts, grpc.WithDisableServiceConfig(), grpc.WithDefaultServiceConfig(serviceConfig))
   151  	}
   152  
   153  	return dopts, nil
   154  }
   155  
   156  func (cc *controlChannel) monitorConnectivityState() {
   157  	cc.logger.Infof("Starting connectivity state monitoring goroutine")
   158  	defer close(cc.monitorDoneCh)
   159  
   160  	// Since we use two mechanisms to deal with RLS server being down:
   161  	//   - adaptive throttling for the channel as a whole
   162  	//   - exponential backoff on a per-request basis
   163  	// we need a way to avoid double-penalizing requests by counting failures
   164  	// toward both mechanisms when the RLS server is unreachable.
   165  	//
   166  	// To accomplish this, we monitor the state of the control plane channel. If
   167  	// the state has been TRANSIENT_FAILURE since the last time it was in state
   168  	// READY, and it then transitions into state READY, we push on a channel
   169  	// which is being read by the LB policy.
   170  	//
   171  	// The LB the policy will iterate through the cache to reset the backoff
   172  	// timeouts in all cache entries. Specifically, this means that it will
   173  	// reset the backoff state and cancel the pending backoff timer. Note that
   174  	// when cancelling the backoff timer, just like when the backoff timer fires
   175  	// normally, a new picker is returned to the channel, to force it to
   176  	// re-process any wait-for-ready RPCs that may still be queued if we failed
   177  	// them while we were in backoff. However, we should optimize this case by
   178  	// returning only one new picker, regardless of how many backoff timers are
   179  	// cancelled.
   180  
   181  	// Wait for the control channel to become READY for the first time.
   182  	for s, ok := <-cc.connectivityStateCh.Get(); s != connectivity.Ready; s, ok = <-cc.connectivityStateCh.Get() {
   183  		if !ok {
   184  			return
   185  		}
   186  
   187  		cc.connectivityStateCh.Load()
   188  		if s == connectivity.Shutdown {
   189  			return
   190  		}
   191  	}
   192  	cc.connectivityStateCh.Load()
   193  	cc.logger.Infof("Connectivity state is READY")
   194  
   195  	for {
   196  		s, ok := <-cc.connectivityStateCh.Get()
   197  		if !ok {
   198  			return
   199  		}
   200  		cc.connectivityStateCh.Load()
   201  
   202  		if s == connectivity.Shutdown {
   203  			return
   204  		}
   205  		if s == connectivity.Ready {
   206  			cc.logger.Infof("Control channel back to READY")
   207  			cc.backToReadyFunc()
   208  		}
   209  
   210  		cc.logger.Infof("Connectivity state is %s", s)
   211  	}
   212  }
   213  
   214  func (cc *controlChannel) close() {
   215  	cc.unsubscribe()
   216  	cc.connectivityStateCh.Close()
   217  	<-cc.monitorDoneCh
   218  	cc.cc.Close()
   219  	cc.logger.Infof("Shutdown")
   220  }
   221  
   222  type lookupCallback func(targets []string, headerData string, err error)
   223  
   224  // lookup starts a RouteLookup RPC in a separate goroutine and returns the
   225  // results (and error, if any) in the provided callback.
   226  //
   227  // The returned boolean indicates whether the request was throttled by the
   228  // client-side adaptive throttling algorithm in which case the provided callback
   229  // will not be invoked.
   230  func (cc *controlChannel) lookup(reqKeys map[string]string, reason rlspb.RouteLookupRequest_Reason, staleHeaders string, cb lookupCallback) (throttled bool) {
   231  	if cc.throttler.ShouldThrottle() {
   232  		cc.logger.Infof("RLS request throttled by client-side adaptive throttling")
   233  		return true
   234  	}
   235  	go func() {
   236  		req := &rlspb.RouteLookupRequest{
   237  			TargetType:      "grpc",
   238  			KeyMap:          reqKeys,
   239  			Reason:          reason,
   240  			StaleHeaderData: staleHeaders,
   241  		}
   242  		if cc.logger.V(2) {
   243  			cc.logger.Infof("Sending RLS request %+v", pretty.ToJSON(req))
   244  		}
   245  
   246  		ctx, cancel := context.WithTimeout(context.Background(), cc.rpcTimeout)
   247  		defer cancel()
   248  		resp, err := cc.client.RouteLookup(ctx, req)
   249  		cb(resp.GetTargets(), resp.GetHeaderData(), err)
   250  	}()
   251  	return false
   252  }