google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/transport/ads/ads_stream.go (about)

     1  /*
     2   *
     3   * Copyright 2024 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  // Package ads provides the implementation of an ADS (Aggregated Discovery
    19  // Service) stream for the xDS client.
    20  package ads
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"google.golang.org/grpc/codes"
    30  	"google.golang.org/grpc/grpclog"
    31  	"google.golang.org/grpc/internal/backoff"
    32  	"google.golang.org/grpc/internal/buffer"
    33  	igrpclog "google.golang.org/grpc/internal/grpclog"
    34  	"google.golang.org/grpc/internal/pretty"
    35  	"google.golang.org/grpc/xds/internal/xdsclient/transport"
    36  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    37  	"google.golang.org/protobuf/types/known/anypb"
    38  
    39  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    40  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    41  	statuspb "google.golang.org/genproto/googleapis/rpc/status"
    42  )
    43  
    44  // Any per-RPC level logs which print complete request or response messages
    45  // should be gated at this verbosity level. Other per-RPC level logs which print
    46  // terse output should be at `INFO` and verbosity 2.
    47  const perRPCVerbosityLevel = 9
    48  
    49  // Response represents a response received on the ADS stream. It contains the
    50  // type URL, version, and resources for the response.
    51  type Response struct {
    52  	TypeURL   string
    53  	Version   string
    54  	Resources []*anypb.Any
    55  }
    56  
    57  // DataAndErrTuple is a struct that holds a resource and an error. It is used to
    58  // return a resource and any associated error from a function.
    59  type DataAndErrTuple struct {
    60  	Resource xdsresource.ResourceData
    61  	Err      error
    62  }
    63  
    64  // StreamEventHandler is an interface that defines the callbacks for events that
    65  // occur on the ADS stream. Methods on this interface may be invoked
    66  // concurrently and implementations need to handle them in a thread-safe manner.
    67  type StreamEventHandler interface {
    68  	OnADSStreamError(error)                           // Called when the ADS stream breaks.
    69  	OnADSWatchExpiry(xdsresource.Type, string)        // Called when the watch timer expires for a resource.
    70  	OnADSResponse(Response, func()) ([]string, error) // Called when a response is received on the ADS stream.
    71  }
    72  
    73  // WatchState is a enum that describes the watch state of a particular
    74  // resource.
    75  type WatchState int
    76  
    77  const (
    78  	// ResourceWatchStateStarted is the state where a watch for a resource was
    79  	// started, but a request asking for that resource is yet to be sent to the
    80  	// management server.
    81  	ResourceWatchStateStarted WatchState = iota
    82  	// ResourceWatchStateRequested is the state when a request has been sent for
    83  	// the resource being watched.
    84  	ResourceWatchStateRequested
    85  	// ResourceWatchStateReceived is the state when a response has been received
    86  	// for the resource being watched.
    87  	ResourceWatchStateReceived
    88  	// ResourceWatchStateTimeout is the state when the watch timer associated
    89  	// with the resource expired because no response was received.
    90  	ResourceWatchStateTimeout
    91  )
    92  
    93  // ResourceWatchState is the state corresponding to a resource being watched.
    94  type ResourceWatchState struct {
    95  	State       WatchState  // Watch state of the resource.
    96  	ExpiryTimer *time.Timer // Timer for the expiry of the watch.
    97  }
    98  
    99  // State corresponding to a resource type.
   100  type resourceTypeState struct {
   101  	version             string                         // Last acked version. Should not be reset when the stream breaks.
   102  	nonce               string                         // Last received nonce. Should be reset when the stream breaks.
   103  	bufferedRequests    chan struct{}                  // Channel to buffer requests when writing is blocked.
   104  	subscribedResources map[string]*ResourceWatchState // Map of subscribed resource names to their state.
   105  	pendingWrite        bool                           // True if there is a pending write for this resource type.
   106  }
   107  
   108  // StreamImpl provides the functionality associated with an ADS (Aggregated
   109  // Discovery Service) stream on the client side. It manages the lifecycle of the
   110  // ADS stream, including creating the stream, sending requests, and handling
   111  // responses. It also handles flow control and retries for the stream.
   112  type StreamImpl struct {
   113  	// The following fields are initialized from arguments passed to the
   114  	// constructor and are read-only afterwards, and hence can be accessed
   115  	// without a mutex.
   116  	transport          transport.Transport     // Transport to use for ADS stream.
   117  	eventHandler       StreamEventHandler      // Callbacks into the xdsChannel.
   118  	backoff            func(int) time.Duration // Backoff for retries, after stream failures.
   119  	nodeProto          *v3corepb.Node          // Identifies the gRPC application.
   120  	watchExpiryTimeout time.Duration           // Resource watch expiry timeout
   121  	logger             *igrpclog.PrefixLogger
   122  
   123  	// The following fields are initialized in the constructor and are not
   124  	// written to afterwards, and hence can be accessed without a mutex.
   125  	streamCh     chan transport.StreamingCall // New ADS streams are pushed here.
   126  	requestCh    *buffer.Unbounded            // Subscriptions and unsubscriptions are pushed here.
   127  	runnerDoneCh chan struct{}                // Notify completion of runner goroutine.
   128  	cancel       context.CancelFunc           // To cancel the context passed to the runner goroutine.
   129  
   130  	// Guards access to the below fields (and to the contents of the map).
   131  	mu                sync.Mutex
   132  	resourceTypeState map[xdsresource.Type]*resourceTypeState // Map of resource types to their state.
   133  	fc                *adsFlowControl                         // Flow control for ADS stream.
   134  	firstRequest      bool                                    // False after the first request is sent out.
   135  }
   136  
   137  // StreamOpts contains the options for creating a new ADS Stream.
   138  type StreamOpts struct {
   139  	Transport          transport.Transport     // xDS transport to create the stream on.
   140  	EventHandler       StreamEventHandler      // Callbacks for stream events.
   141  	Backoff            func(int) time.Duration // Backoff for retries, after stream failures.
   142  	NodeProto          *v3corepb.Node          // Node proto to identify the gRPC application.
   143  	WatchExpiryTimeout time.Duration           // Resource watch expiry timeout.
   144  	LogPrefix          string                  // Prefix to be used for log messages.
   145  }
   146  
   147  // NewStreamImpl initializes a new StreamImpl instance using the given
   148  // parameters.  It also launches goroutines responsible for managing reads and
   149  // writes for messages of the underlying stream.
   150  func NewStreamImpl(opts StreamOpts) *StreamImpl {
   151  	s := &StreamImpl{
   152  		transport:          opts.Transport,
   153  		eventHandler:       opts.EventHandler,
   154  		backoff:            opts.Backoff,
   155  		nodeProto:          opts.NodeProto,
   156  		watchExpiryTimeout: opts.WatchExpiryTimeout,
   157  
   158  		streamCh:          make(chan transport.StreamingCall, 1),
   159  		requestCh:         buffer.NewUnbounded(),
   160  		runnerDoneCh:      make(chan struct{}),
   161  		resourceTypeState: make(map[xdsresource.Type]*resourceTypeState),
   162  	}
   163  
   164  	l := grpclog.Component("xds")
   165  	s.logger = igrpclog.NewPrefixLogger(l, opts.LogPrefix+fmt.Sprintf("[ads-stream %p] ", s))
   166  
   167  	ctx, cancel := context.WithCancel(context.Background())
   168  	s.cancel = cancel
   169  	go s.runner(ctx)
   170  	return s
   171  }
   172  
   173  // Stop blocks until the stream is closed and all spawned goroutines exit.
   174  func (s *StreamImpl) Stop() {
   175  	s.cancel()
   176  	s.requestCh.Close()
   177  	<-s.runnerDoneCh
   178  	s.logger.Infof("Stopping ADS stream")
   179  }
   180  
   181  // Subscribe subscribes to the given resource. It is assumed that multiple
   182  // subscriptions for the same resource is deduped at the caller. A discovery
   183  // request is sent out on the underlying stream for the resource type when there
   184  // is sufficient flow control quota.
   185  func (s *StreamImpl) Subscribe(typ xdsresource.Type, name string) {
   186  	if s.logger.V(2) {
   187  		s.logger.Infof("Subscribing to resource %q of type %q", name, typ.TypeName())
   188  	}
   189  
   190  	s.mu.Lock()
   191  	defer s.mu.Unlock()
   192  
   193  	state, ok := s.resourceTypeState[typ]
   194  	if !ok {
   195  		// An entry in the type state map is created as part of the first
   196  		// subscription request for this type.
   197  		state = &resourceTypeState{
   198  			subscribedResources: make(map[string]*ResourceWatchState),
   199  			bufferedRequests:    make(chan struct{}, 1),
   200  		}
   201  		s.resourceTypeState[typ] = state
   202  	}
   203  
   204  	// Create state for the newly subscribed resource. The watch timer will
   205  	// be started when a request for this resource is actually sent out.
   206  	state.subscribedResources[name] = &ResourceWatchState{State: ResourceWatchStateStarted}
   207  	state.pendingWrite = true
   208  
   209  	// Send a request for the resource type with updated subscriptions.
   210  	s.requestCh.Put(typ)
   211  }
   212  
   213  // Unsubscribe cancels the subscription to the given resource. It is a no-op if
   214  // the given resource does not exist. The watch expiry timer associated with the
   215  // resource is stopped if one is active. A discovery request is sent out on the
   216  // stream for the resource type when there is sufficient flow control quota.
   217  func (s *StreamImpl) Unsubscribe(typ xdsresource.Type, name string) {
   218  	if s.logger.V(2) {
   219  		s.logger.Infof("Unsubscribing to resource %q of type %q", name, typ.TypeName())
   220  	}
   221  
   222  	s.mu.Lock()
   223  	defer s.mu.Unlock()
   224  
   225  	state, ok := s.resourceTypeState[typ]
   226  	if !ok {
   227  		return
   228  	}
   229  
   230  	rs, ok := state.subscribedResources[name]
   231  	if !ok {
   232  		return
   233  	}
   234  	if rs.ExpiryTimer != nil {
   235  		rs.ExpiryTimer.Stop()
   236  	}
   237  	delete(state.subscribedResources, name)
   238  	state.pendingWrite = true
   239  
   240  	// Send a request for the resource type with updated subscriptions.
   241  	s.requestCh.Put(typ)
   242  }
   243  
   244  // runner is a long-running goroutine that handles the lifecycle of the ADS
   245  // stream. It spwans another goroutine to handle writes of discovery request
   246  // messages on the stream. Whenever an existing stream fails, it performs
   247  // exponential backoff (if no messages were received on that stream) before
   248  // creating a new stream.
   249  func (s *StreamImpl) runner(ctx context.Context) {
   250  	defer close(s.runnerDoneCh)
   251  
   252  	go s.send(ctx)
   253  
   254  	runStreamWithBackoff := func() error {
   255  		stream, err := s.transport.CreateStreamingCall(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources")
   256  		if err != nil {
   257  			s.logger.Warningf("Failed to create a new ADS streaming RPC: %v", err)
   258  			s.onError(err, false)
   259  			return nil
   260  		}
   261  		if s.logger.V(2) {
   262  			s.logger.Infof("ADS stream created")
   263  		}
   264  
   265  		s.mu.Lock()
   266  		// Flow control is a property of the underlying streaming RPC call and
   267  		// needs to be initialized everytime a new one is created.
   268  		s.fc = newADSFlowControl(s.logger)
   269  		s.firstRequest = true
   270  		s.mu.Unlock()
   271  
   272  		// Ensure that the most recently created stream is pushed on the
   273  		// channel for the `send` goroutine to consume.
   274  		select {
   275  		case <-s.streamCh:
   276  		default:
   277  		}
   278  		s.streamCh <- stream
   279  
   280  		// Backoff state is reset upon successful receipt of at least one
   281  		// message from the server.
   282  		if s.recv(ctx, stream) {
   283  			return backoff.ErrResetBackoff
   284  		}
   285  		return nil
   286  	}
   287  	backoff.RunF(ctx, runStreamWithBackoff, s.backoff)
   288  }
   289  
   290  // send is a long running goroutine that handles sending discovery requests for
   291  // two scenarios:
   292  // - a new subscription or unsubscription request is received
   293  // - a new stream is created after the previous one failed
   294  func (s *StreamImpl) send(ctx context.Context) {
   295  	// Stores the most recent stream instance received on streamCh.
   296  	var stream transport.StreamingCall
   297  	for {
   298  		select {
   299  		case <-ctx.Done():
   300  			return
   301  		case stream = <-s.streamCh:
   302  			if err := s.sendExisting(stream); err != nil {
   303  				// Send failed, clear the current stream. Attempt to resend will
   304  				// only be made after a new stream is created.
   305  				stream = nil
   306  				continue
   307  			}
   308  		case req, ok := <-s.requestCh.Get():
   309  			if !ok {
   310  				return
   311  			}
   312  			s.requestCh.Load()
   313  
   314  			typ := req.(xdsresource.Type)
   315  			if err := s.sendNew(stream, typ); err != nil {
   316  				stream = nil
   317  				continue
   318  			}
   319  		}
   320  	}
   321  }
   322  
   323  // sendNew attempts to send a discovery request based on a new subscription or
   324  // unsubscription. If there is no flow control quota, the request is buffered
   325  // and will be sent later. This method also starts the watch expiry timer for
   326  // resources that were sent in the request for the first time, i.e. their watch
   327  // state is `watchStateStarted`.
   328  func (s *StreamImpl) sendNew(stream transport.StreamingCall, typ xdsresource.Type) error {
   329  	s.mu.Lock()
   330  	defer s.mu.Unlock()
   331  
   332  	// If there's no stream yet, skip the request. This request will be resent
   333  	// when a new stream is created. If no stream is created, the watcher will
   334  	// timeout (same as server not sending response back).
   335  	if stream == nil {
   336  		return nil
   337  	}
   338  
   339  	// If local processing of the most recently received response is not yet
   340  	// complete, i.e. fc.pending == true, queue this write and return early.
   341  	// This allows us to batch writes for requests which are generated as part
   342  	// of local processing of a received response.
   343  	state := s.resourceTypeState[typ]
   344  	if s.fc.pending.Load() {
   345  		select {
   346  		case state.bufferedRequests <- struct{}{}:
   347  		default:
   348  		}
   349  		return nil
   350  	}
   351  
   352  	return s.sendMessageIfWritePendingLocked(stream, typ, state)
   353  }
   354  
   355  // sendExisting sends out discovery requests for existing resources when
   356  // recovering from a broken stream.
   357  //
   358  // The stream argument is guaranteed to be non-nil.
   359  func (s *StreamImpl) sendExisting(stream transport.StreamingCall) error {
   360  	s.mu.Lock()
   361  	defer s.mu.Unlock()
   362  
   363  	for typ, state := range s.resourceTypeState {
   364  		// Reset only the nonces map when the stream restarts.
   365  		//
   366  		// xDS spec says the following. See section:
   367  		// https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version
   368  		//
   369  		// Note that the version for a resource type is not a property of an
   370  		// individual xDS stream but rather a property of the resources
   371  		// themselves. If the stream becomes broken and the client creates a new
   372  		// stream, the client’s initial request on the new stream should
   373  		// indicate the most recent version seen by the client on the previous
   374  		// stream
   375  		state.nonce = ""
   376  
   377  		if len(state.subscribedResources) == 0 {
   378  			continue
   379  		}
   380  
   381  		state.pendingWrite = true
   382  		if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil {
   383  			return err
   384  		}
   385  	}
   386  	return nil
   387  }
   388  
   389  // sendBuffered sends out discovery requests for resources that were buffered
   390  // when they were subscribed to, because local processing of the previously
   391  // received response was not yet complete.
   392  //
   393  // The stream argument is guaranteed to be non-nil.
   394  func (s *StreamImpl) sendBuffered(stream transport.StreamingCall) error {
   395  	s.mu.Lock()
   396  	defer s.mu.Unlock()
   397  
   398  	for typ, state := range s.resourceTypeState {
   399  		select {
   400  		case <-state.bufferedRequests:
   401  			if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil {
   402  				return err
   403  			}
   404  		default:
   405  			// No buffered request.
   406  			continue
   407  		}
   408  	}
   409  	return nil
   410  }
   411  
   412  // sendMessageIfWritePendingLocked attempts to sends a discovery request to the
   413  // server, if there is a pending write for the given resource type.
   414  //
   415  // If the request is successfully sent, the pending write field is cleared and
   416  // watch timers are started for the resources in the request.
   417  //
   418  // Caller needs to hold c.mu.
   419  func (s *StreamImpl) sendMessageIfWritePendingLocked(stream transport.StreamingCall, typ xdsresource.Type, state *resourceTypeState) error {
   420  	if !state.pendingWrite {
   421  		if s.logger.V(2) {
   422  			s.logger.Infof("Skipping sending request for type %q, because all subscribed resources were already sent", typ.TypeURL())
   423  		}
   424  		return nil
   425  	}
   426  
   427  	names := resourceNames(state.subscribedResources)
   428  	if err := s.sendMessageLocked(stream, names, typ.TypeURL(), state.version, state.nonce, nil); err != nil {
   429  		return err
   430  	}
   431  	state.pendingWrite = false
   432  
   433  	// Drain the buffered requests channel because we just sent a request for this
   434  	// resource type.
   435  	select {
   436  	case <-state.bufferedRequests:
   437  	default:
   438  	}
   439  
   440  	s.startWatchTimersLocked(typ, names)
   441  	return nil
   442  }
   443  
   444  // sendMessageLocked sends a discovery request to the server, populating the
   445  // different fields of the message with the given parameters. Returns a non-nil
   446  // error if the request could not be sent.
   447  //
   448  // Caller needs to hold c.mu.
   449  func (s *StreamImpl) sendMessageLocked(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) error {
   450  	req := &v3discoverypb.DiscoveryRequest{
   451  		ResourceNames: names,
   452  		TypeUrl:       url,
   453  		VersionInfo:   version,
   454  		ResponseNonce: nonce,
   455  	}
   456  
   457  	// The xDS protocol only requires that we send the node proto in the first
   458  	// discovery request on every stream. Sending the node proto in every
   459  	// request wastes CPU resources on the client and the server.
   460  	if s.firstRequest {
   461  		req.Node = s.nodeProto
   462  	}
   463  
   464  	if nackErr != nil {
   465  		req.ErrorDetail = &statuspb.Status{
   466  			Code: int32(codes.InvalidArgument), Message: nackErr.Error(),
   467  		}
   468  	}
   469  
   470  	if err := stream.Send(req); err != nil {
   471  		s.logger.Warningf("Sending ADS request for type %q, resources: %v, version: %q, nonce: %q failed: %v", url, names, version, nonce, err)
   472  		return err
   473  	}
   474  	s.firstRequest = false
   475  
   476  	if s.logger.V(perRPCVerbosityLevel) {
   477  		s.logger.Infof("ADS request sent: %v", pretty.ToJSON(req))
   478  	} else if s.logger.V(2) {
   479  		s.logger.Warningf("ADS request sent for type %q, resources: %v, version: %q, nonce: %q", url, names, version, nonce)
   480  	}
   481  	return nil
   482  }
   483  
   484  // recv is responsible for receiving messages from the ADS stream.
   485  //
   486  // It performs the following actions:
   487  //   - Waits for local flow control to be available before sending buffered
   488  //     requests, if any.
   489  //   - Receives a message from the ADS stream. If an error is encountered here,
   490  //     it is handled by the onError method which propagates the error to all
   491  //     watchers.
   492  //   - Invokes the event handler's OnADSResponse method to process the message.
   493  //   - Sends an ACK or NACK to the server based on the response.
   494  //
   495  // It returns a boolean indicating whether at least one message was received
   496  // from the server.
   497  func (s *StreamImpl) recv(ctx context.Context, stream transport.StreamingCall) bool {
   498  	msgReceived := false
   499  	for {
   500  		// Wait for ADS stream level flow control to be available, and send out
   501  		// a request if anything was buffered while we were waiting for local
   502  		// processing of the previous response to complete.
   503  		if !s.fc.wait(ctx) {
   504  			if s.logger.V(2) {
   505  				s.logger.Infof("ADS stream context canceled")
   506  			}
   507  			return msgReceived
   508  		}
   509  		s.sendBuffered(stream)
   510  
   511  		resources, url, version, nonce, err := s.recvMessage(stream)
   512  		if err != nil {
   513  			s.onError(err, msgReceived)
   514  			s.logger.Warningf("ADS stream closed: %v", err)
   515  			return msgReceived
   516  		}
   517  		msgReceived = true
   518  
   519  		// Invoke the onResponse event handler to parse the incoming message and
   520  		// decide whether to send an ACK or NACK.
   521  		resp := Response{
   522  			Resources: resources,
   523  			TypeURL:   url,
   524  			Version:   version,
   525  		}
   526  		var resourceNames []string
   527  		var nackErr error
   528  		s.fc.setPending()
   529  		resourceNames, nackErr = s.eventHandler.OnADSResponse(resp, s.fc.onDone)
   530  		if xdsresource.ErrType(nackErr) == xdsresource.ErrorTypeResourceTypeUnsupported {
   531  			// Based on gRFC A27, a general guiding principle is that if the
   532  			// server sends something the client didn't actually subscribe to,
   533  			// then the client ignores it. Here, we have received a response
   534  			// with resources of a type that we don't know about.
   535  			//
   536  			// Sending a NACK doesn't really seem appropriate here, since we're
   537  			// not actually validating what the server sent and therefore don't
   538  			// know that it's invalid.  But we shouldn't ACK either, because we
   539  			// don't know that it is valid.
   540  			s.logger.Warningf("%v", nackErr)
   541  			continue
   542  		}
   543  
   544  		s.onRecv(stream, resourceNames, url, version, nonce, nackErr)
   545  	}
   546  }
   547  
   548  func (s *StreamImpl) recvMessage(stream transport.StreamingCall) (resources []*anypb.Any, url, version, nonce string, err error) {
   549  	r, err := stream.Recv()
   550  	if err != nil {
   551  		return nil, "", "", "", err
   552  	}
   553  	resp, ok := r.(*v3discoverypb.DiscoveryResponse)
   554  	if !ok {
   555  		s.logger.Infof("Message received on ADS stream of unexpected type: %T", r)
   556  		return nil, "", "", "", fmt.Errorf("unexpected message type %T", r)
   557  	}
   558  
   559  	if s.logger.V(perRPCVerbosityLevel) {
   560  		s.logger.Infof("ADS response received: %v", pretty.ToJSON(resp))
   561  	} else if s.logger.V(2) {
   562  		s.logger.Infof("ADS response received for type %q, version %q, nonce %q", resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce())
   563  	}
   564  	return resp.GetResources(), resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce(), nil
   565  }
   566  
   567  // onRecv is invoked when a response is received from the server. The arguments
   568  // passed to this method correspond to the most recently received response.
   569  //
   570  // It performs the following actions:
   571  //   - updates resource type specific state
   572  //   - updates resource specific state for resources in the response
   573  //   - sends an ACK or NACK to the server based on the response
   574  func (s *StreamImpl) onRecv(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) {
   575  	s.mu.Lock()
   576  	defer s.mu.Unlock()
   577  
   578  	// Lookup the resource type specific state based on the type URL.
   579  	var typ xdsresource.Type
   580  	for t := range s.resourceTypeState {
   581  		if t.TypeURL() == url {
   582  			typ = t
   583  			break
   584  		}
   585  	}
   586  	typeState, ok := s.resourceTypeState[typ]
   587  	if !ok {
   588  		s.logger.Warningf("ADS stream received a response for type %q, but no state exists for it", url)
   589  		return
   590  	}
   591  
   592  	// Update the resource type specific state. This includes:
   593  	//   - updating the nonce unconditionally
   594  	//   - updating the version only if the response is to be ACKed
   595  	previousVersion := typeState.version
   596  	typeState.nonce = nonce
   597  	if nackErr == nil {
   598  		typeState.version = version
   599  	}
   600  
   601  	// Update the resource specific state. For all resources received as
   602  	// part of this response that are in state `started` or `requested`,
   603  	// this includes:
   604  	//   - setting the watch state to watchstateReceived
   605  	//   - stopping the expiry timer, if one exists
   606  	for _, name := range names {
   607  		rs, ok := typeState.subscribedResources[name]
   608  		if !ok {
   609  			s.logger.Warningf("ADS stream received a response for resource %q, but no state exists for it", name)
   610  			continue
   611  		}
   612  		if ws := rs.State; ws == ResourceWatchStateStarted || ws == ResourceWatchStateRequested {
   613  			rs.State = ResourceWatchStateReceived
   614  			if rs.ExpiryTimer != nil {
   615  				rs.ExpiryTimer.Stop()
   616  				rs.ExpiryTimer = nil
   617  			}
   618  		}
   619  	}
   620  
   621  	// Send an ACK or NACK.
   622  	subscribedResourceNames := resourceNames(typeState.subscribedResources)
   623  	if nackErr != nil {
   624  		s.logger.Warningf("Sending NACK for resource type: %q, version: %q, nonce: %q, reason: %v", url, version, nonce, nackErr)
   625  		s.sendMessageLocked(stream, subscribedResourceNames, url, previousVersion, nonce, nackErr)
   626  		return
   627  	}
   628  
   629  	if s.logger.V(2) {
   630  		s.logger.Infof("Sending ACK for resource type: %q, version: %q, nonce: %q", url, version, nonce)
   631  	}
   632  	s.sendMessageLocked(stream, subscribedResourceNames, url, version, nonce, nil)
   633  }
   634  
   635  // onError is called when an error occurs on the ADS stream. It stops any
   636  // outstanding resource timers and resets the watch state to started for any
   637  // resources that were in the requested state. It also handles the case where
   638  // the ADS stream was closed after receiving a response, which is not
   639  // considered an error.
   640  func (s *StreamImpl) onError(err error, msgReceived bool) {
   641  	// For resources that been requested but not yet responded to by the
   642  	// management server, stop the resource timers and reset the watch state to
   643  	// watchStateStarted. This is because we don't want the expiry timer to be
   644  	// running when we don't have a stream open to the management server.
   645  	s.mu.Lock()
   646  	for _, state := range s.resourceTypeState {
   647  		for _, rs := range state.subscribedResources {
   648  			if rs.State != ResourceWatchStateRequested {
   649  				continue
   650  			}
   651  			if rs.ExpiryTimer != nil {
   652  				rs.ExpiryTimer.Stop()
   653  				rs.ExpiryTimer = nil
   654  			}
   655  			rs.State = ResourceWatchStateStarted
   656  		}
   657  	}
   658  	s.mu.Unlock()
   659  
   660  	// Note that we do not consider it an error if the ADS stream was closed
   661  	// after having received a response on the stream. This is because there
   662  	// are legitimate reasons why the server may need to close the stream during
   663  	// normal operations, such as needing to rebalance load or the underlying
   664  	// connection hitting its max connection age limit.
   665  	// (see [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md)).
   666  	if msgReceived {
   667  		err = xdsresource.NewError(xdsresource.ErrTypeStreamFailedAfterRecv, err.Error())
   668  	}
   669  
   670  	s.eventHandler.OnADSStreamError(err)
   671  }
   672  
   673  // startWatchTimersLocked starts the expiry timers for the given resource names
   674  // of the specified resource type.  For each resource name, if the resource
   675  // watch state is in the "started" state, it transitions the state to
   676  // "requested" and starts an expiry timer. When the timer expires, the resource
   677  // watch state is set to "timeout" and the event handler callback is called.
   678  //
   679  // The caller must hold the s.mu lock.
   680  func (s *StreamImpl) startWatchTimersLocked(typ xdsresource.Type, names []string) {
   681  	typeState := s.resourceTypeState[typ]
   682  	for _, name := range names {
   683  		resourceState, ok := typeState.subscribedResources[name]
   684  		if !ok {
   685  			continue
   686  		}
   687  		if resourceState.State != ResourceWatchStateStarted {
   688  			continue
   689  		}
   690  		resourceState.State = ResourceWatchStateRequested
   691  
   692  		rs := resourceState
   693  		resourceState.ExpiryTimer = time.AfterFunc(s.watchExpiryTimeout, func() {
   694  			s.mu.Lock()
   695  			rs.State = ResourceWatchStateTimeout
   696  			rs.ExpiryTimer = nil
   697  			s.mu.Unlock()
   698  			s.eventHandler.OnADSWatchExpiry(typ, name)
   699  		})
   700  	}
   701  }
   702  
   703  func resourceNames(m map[string]*ResourceWatchState) []string {
   704  	ret := make([]string, len(m))
   705  	idx := 0
   706  	for name := range m {
   707  		ret[idx] = name
   708  		idx++
   709  	}
   710  	return ret
   711  }
   712  
   713  // TriggerResourceNotFoundForTesting triggers a resource not found event for the
   714  // given resource type and name.  This is intended for testing purposes only, to
   715  // simulate a resource not found scenario.
   716  func (s *StreamImpl) TriggerResourceNotFoundForTesting(typ xdsresource.Type, resourceName string) {
   717  	s.mu.Lock()
   718  
   719  	state, ok := s.resourceTypeState[typ]
   720  	if !ok {
   721  		s.mu.Unlock()
   722  		return
   723  	}
   724  	resourceState, ok := state.subscribedResources[resourceName]
   725  	if !ok {
   726  		s.mu.Unlock()
   727  		return
   728  	}
   729  
   730  	if s.logger.V(2) {
   731  		s.logger.Infof("Triggering resource not found for type: %s, resource name: %s", typ.TypeName(), resourceName)
   732  	}
   733  	resourceState.State = ResourceWatchStateTimeout
   734  	if resourceState.ExpiryTimer != nil {
   735  		resourceState.ExpiryTimer.Stop()
   736  		resourceState.ExpiryTimer = nil
   737  	}
   738  	s.mu.Unlock()
   739  	go s.eventHandler.OnADSWatchExpiry(typ, resourceName)
   740  }
   741  
   742  // ResourceWatchStateForTesting returns the ResourceWatchState for the given
   743  // resource type and name.  This is intended for testing purposes only, to
   744  // inspect the internal state of the ADS stream.
   745  func (s *StreamImpl) ResourceWatchStateForTesting(typ xdsresource.Type, resourceName string) (ResourceWatchState, error) {
   746  	s.mu.Lock()
   747  	defer s.mu.Unlock()
   748  
   749  	state, ok := s.resourceTypeState[typ]
   750  	if !ok {
   751  		return ResourceWatchState{}, fmt.Errorf("unknown resource type: %v", typ)
   752  	}
   753  	resourceState, ok := state.subscribedResources[resourceName]
   754  	if !ok {
   755  		return ResourceWatchState{}, fmt.Errorf("unknown resource name: %v", resourceName)
   756  	}
   757  	return *resourceState, nil
   758  }
   759  
   760  // adsFlowControl implements ADS stream level flow control that enables the
   761  // transport to block the reading of the next message off of the stream until
   762  // the previous update is consumed by all watchers.
   763  //
   764  // The lifetime of the flow control is tied to the lifetime of the stream.
   765  type adsFlowControl struct {
   766  	logger *igrpclog.PrefixLogger
   767  
   768  	// Whether the most recent update is pending consumption by all watchers.
   769  	pending atomic.Bool
   770  	// Channel used to notify when all the watchers have consumed the most
   771  	// recent update. Wait() blocks on reading a value from this channel.
   772  	readyCh chan struct{}
   773  }
   774  
   775  // newADSFlowControl returns a new adsFlowControl.
   776  func newADSFlowControl(logger *igrpclog.PrefixLogger) *adsFlowControl {
   777  	return &adsFlowControl{
   778  		logger:  logger,
   779  		readyCh: make(chan struct{}, 1),
   780  	}
   781  }
   782  
   783  // setPending changes the internal state to indicate that there is an update
   784  // pending consumption by all watchers.
   785  func (fc *adsFlowControl) setPending() {
   786  	fc.pending.Store(true)
   787  }
   788  
   789  // wait blocks until all the watchers have consumed the most recent update and
   790  // returns true. If the context expires before that, it returns false.
   791  func (fc *adsFlowControl) wait(ctx context.Context) bool {
   792  	// If there is no pending update, there is no need to block.
   793  	if !fc.pending.Load() {
   794  		// If all watchers finished processing the most recent update before the
   795  		// `recv` goroutine made the next call to `Wait()`, there would be an
   796  		// entry in the readyCh channel that needs to be drained to ensure that
   797  		// the next call to `Wait()` doesn't unblock before it actually should.
   798  		select {
   799  		case <-fc.readyCh:
   800  		default:
   801  		}
   802  		return true
   803  	}
   804  
   805  	select {
   806  	case <-ctx.Done():
   807  		return false
   808  	case <-fc.readyCh:
   809  		return true
   810  	}
   811  }
   812  
   813  // onDone indicates that all watchers have consumed the most recent update.
   814  func (fc *adsFlowControl) onDone() {
   815  	select {
   816  	// Writes to the readyCh channel should not block ideally. The default
   817  	// branch here is to appease the paranoid mind.
   818  	case fc.readyCh <- struct{}{}:
   819  	default:
   820  		if fc.logger.V(2) {
   821  			fc.logger.Infof("ADS stream flow control readyCh is full")
   822  		}
   823  	}
   824  	fc.pending.Store(false)
   825  }