google.golang.org/grpc@v1.74.2/balancer/rls/control_channel.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package rls
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"time"
    25  
    26  	"google.golang.org/grpc"
    27  	"google.golang.org/grpc/balancer"
    28  	"google.golang.org/grpc/balancer/rls/internal/adaptive"
    29  	"google.golang.org/grpc/connectivity"
    30  	"google.golang.org/grpc/credentials/insecure"
    31  	"google.golang.org/grpc/internal"
    32  	"google.golang.org/grpc/internal/buffer"
    33  	internalgrpclog "google.golang.org/grpc/internal/grpclog"
    34  	"google.golang.org/grpc/internal/grpcsync"
    35  	"google.golang.org/grpc/internal/pretty"
    36  	rlsgrpc "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    37  	rlspb "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    38  )
    39  
    40  var newAdaptiveThrottler = func() adaptiveThrottler { return adaptive.New() }
    41  
    42  type adaptiveThrottler interface {
    43  	ShouldThrottle() bool
    44  	RegisterBackendResponse(throttled bool)
    45  }
    46  
    47  // controlChannel is a wrapper around the gRPC channel to the RLS server
    48  // specified in the service config.
    49  type controlChannel struct {
    50  	// rpcTimeout specifies the timeout for the RouteLookup RPC call. The LB
    51  	// policy receives this value in its service config.
    52  	rpcTimeout time.Duration
    53  	// backToReadyFunc is a callback to be invoked when the connectivity state
    54  	// changes from READY --> TRANSIENT_FAILURE --> READY.
    55  	backToReadyFunc func()
    56  	// throttler in an adaptive throttling implementation used to avoid
    57  	// hammering the RLS service while it is overloaded or down.
    58  	throttler adaptiveThrottler
    59  
    60  	cc                  *grpc.ClientConn
    61  	client              rlsgrpc.RouteLookupServiceClient
    62  	logger              *internalgrpclog.PrefixLogger
    63  	connectivityStateCh *buffer.Unbounded
    64  	unsubscribe         func()
    65  	monitorDoneCh       chan struct{}
    66  }
    67  
    68  // newControlChannel creates a controlChannel to rlsServerName and uses
    69  // serviceConfig, if non-empty, as the default service config for the underlying
    70  // gRPC channel.
    71  func newControlChannel(rlsServerName, serviceConfig string, rpcTimeout time.Duration, bOpts balancer.BuildOptions, backToReadyFunc func()) (*controlChannel, error) {
    72  	ctrlCh := &controlChannel{
    73  		rpcTimeout:          rpcTimeout,
    74  		backToReadyFunc:     backToReadyFunc,
    75  		throttler:           newAdaptiveThrottler(),
    76  		connectivityStateCh: buffer.NewUnbounded(),
    77  		monitorDoneCh:       make(chan struct{}),
    78  	}
    79  	ctrlCh.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-control-channel %p] ", ctrlCh))
    80  
    81  	dopts, err := ctrlCh.dialOpts(bOpts, serviceConfig)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	ctrlCh.cc, err = grpc.NewClient(rlsServerName, dopts...)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  	// Subscribe to connectivity state before connecting to avoid missing initial
    90  	// updates, which are only delivered to active subscribers.
    91  	ctrlCh.unsubscribe = internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(ctrlCh.cc, ctrlCh)
    92  	ctrlCh.cc.Connect()
    93  	ctrlCh.client = rlsgrpc.NewRouteLookupServiceClient(ctrlCh.cc)
    94  	ctrlCh.logger.Infof("Control channel created to RLS server at: %v", rlsServerName)
    95  	go ctrlCh.monitorConnectivityState()
    96  	return ctrlCh, nil
    97  }
    98  
    99  func (cc *controlChannel) OnMessage(msg any) {
   100  	st, ok := msg.(connectivity.State)
   101  	if !ok {
   102  		panic(fmt.Sprintf("Unexpected message type %T , wanted connectectivity.State type", msg))
   103  	}
   104  	cc.connectivityStateCh.Put(st)
   105  }
   106  
   107  // dialOpts constructs the dial options for the control plane channel.
   108  func (cc *controlChannel) dialOpts(bOpts balancer.BuildOptions, serviceConfig string) ([]grpc.DialOption, error) {
   109  	// The control plane channel will use the same authority as the parent
   110  	// channel for server authorization. This ensures that the identity of the
   111  	// RLS server and the identity of the backends is the same, so if the RLS
   112  	// config is injected by an attacker, it cannot cause leakage of private
   113  	// information contained in headers set by the application.
   114  	dopts := []grpc.DialOption{grpc.WithAuthority(bOpts.Authority)}
   115  	if bOpts.Dialer != nil {
   116  		dopts = append(dopts, grpc.WithContextDialer(bOpts.Dialer))
   117  	}
   118  	// The control channel will use the channel credentials from the parent
   119  	// channel, including any call creds associated with the channel creds.
   120  	var credsOpt grpc.DialOption
   121  	switch {
   122  	case bOpts.DialCreds != nil:
   123  		credsOpt = grpc.WithTransportCredentials(bOpts.DialCreds.Clone())
   124  	case bOpts.CredsBundle != nil:
   125  		// The "fallback" mode in google default credentials (which is the only
   126  		// type of credentials we expect to be used with RLS) uses TLS/ALTS
   127  		// creds for transport and uses the same call creds as that on the
   128  		// parent bundle.
   129  		bundle, err := bOpts.CredsBundle.NewWithMode(internal.CredsBundleModeFallback)
   130  		if err != nil {
   131  			return nil, err
   132  		}
   133  		credsOpt = grpc.WithCredentialsBundle(bundle)
   134  	default:
   135  		cc.logger.Warningf("no credentials available, using Insecure")
   136  		credsOpt = grpc.WithTransportCredentials(insecure.NewCredentials())
   137  	}
   138  	dopts = append(dopts, credsOpt)
   139  
   140  	// If the RLS LB policy's configuration specified a service config for the
   141  	// control channel, use that and disable service config fetching via the name
   142  	// resolver for the control channel.
   143  	if serviceConfig != "" {
   144  		cc.logger.Infof("Disabling service config from the name resolver and instead using: %s", serviceConfig)
   145  		dopts = append(dopts, grpc.WithDisableServiceConfig(), grpc.WithDefaultServiceConfig(serviceConfig))
   146  	}
   147  
   148  	return dopts, nil
   149  }
   150  
   151  func (cc *controlChannel) monitorConnectivityState() {
   152  	cc.logger.Infof("Starting connectivity state monitoring goroutine")
   153  	defer close(cc.monitorDoneCh)
   154  
   155  	// Since we use two mechanisms to deal with RLS server being down:
   156  	//   - adaptive throttling for the channel as a whole
   157  	//   - exponential backoff on a per-request basis
   158  	// we need a way to avoid double-penalizing requests by counting failures
   159  	// toward both mechanisms when the RLS server is unreachable.
   160  	//
   161  	// To accomplish this, we monitor the state of the control plane channel. If
   162  	// the state has been TRANSIENT_FAILURE since the last time it was in state
   163  	// READY, and it then transitions into state READY, we push on a channel
   164  	// which is being read by the LB policy.
   165  	//
   166  	// The LB the policy will iterate through the cache to reset the backoff
   167  	// timeouts in all cache entries. Specifically, this means that it will
   168  	// reset the backoff state and cancel the pending backoff timer. Note that
   169  	// when cancelling the backoff timer, just like when the backoff timer fires
   170  	// normally, a new picker is returned to the channel, to force it to
   171  	// re-process any wait-for-ready RPCs that may still be queued if we failed
   172  	// them while we were in backoff. However, we should optimize this case by
   173  	// returning only one new picker, regardless of how many backoff timers are
   174  	// cancelled.
   175  
   176  	// Wait for the control channel to become READY for the first time.
   177  	for s, ok := <-cc.connectivityStateCh.Get(); s != connectivity.Ready; s, ok = <-cc.connectivityStateCh.Get() {
   178  		if !ok {
   179  			return
   180  		}
   181  
   182  		cc.connectivityStateCh.Load()
   183  		if s == connectivity.Shutdown {
   184  			return
   185  		}
   186  	}
   187  	cc.connectivityStateCh.Load()
   188  	cc.logger.Infof("Connectivity state is READY")
   189  
   190  	for {
   191  		s, ok := <-cc.connectivityStateCh.Get()
   192  		if !ok {
   193  			return
   194  		}
   195  		cc.connectivityStateCh.Load()
   196  
   197  		if s == connectivity.Shutdown {
   198  			return
   199  		}
   200  		if s == connectivity.Ready {
   201  			cc.logger.Infof("Control channel back to READY")
   202  			cc.backToReadyFunc()
   203  		}
   204  
   205  		cc.logger.Infof("Connectivity state is %s", s)
   206  	}
   207  }
   208  
   209  func (cc *controlChannel) close() {
   210  	cc.unsubscribe()
   211  	cc.connectivityStateCh.Close()
   212  	<-cc.monitorDoneCh
   213  	cc.cc.Close()
   214  	cc.logger.Infof("Shutdown")
   215  }
   216  
   217  type lookupCallback func(targets []string, headerData string, err error)
   218  
   219  // lookup starts a RouteLookup RPC in a separate goroutine and returns the
   220  // results (and error, if any) in the provided callback.
   221  //
   222  // The returned boolean indicates whether the request was throttled by the
   223  // client-side adaptive throttling algorithm in which case the provided callback
   224  // will not be invoked.
   225  func (cc *controlChannel) lookup(reqKeys map[string]string, reason rlspb.RouteLookupRequest_Reason, staleHeaders string, cb lookupCallback) (throttled bool) {
   226  	if cc.throttler.ShouldThrottle() {
   227  		cc.logger.Infof("RLS request throttled by client-side adaptive throttling")
   228  		return true
   229  	}
   230  	go func() {
   231  		req := &rlspb.RouteLookupRequest{
   232  			TargetType:      "grpc",
   233  			KeyMap:          reqKeys,
   234  			Reason:          reason,
   235  			StaleHeaderData: staleHeaders,
   236  		}
   237  		if cc.logger.V(2) {
   238  			cc.logger.Infof("Sending RLS request %+v", pretty.ToJSON(req))
   239  		}
   240  
   241  		ctx, cancel := context.WithTimeout(context.Background(), cc.rpcTimeout)
   242  		defer cancel()
   243  		resp, err := cc.client.RouteLookup(ctx, req)
   244  		cb(resp.GetTargets(), resp.GetHeaderData(), err)
   245  	}()
   246  	return false
   247  }