google.golang.org/grpc@v1.72.2/xds/internal/clients/xdsclient/ads_stream.go (about)

     1  /*
     2   *
     3   * Copyright 2025 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xdsclient
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"google.golang.org/grpc/grpclog"
    29  	igrpclog "google.golang.org/grpc/internal/grpclog"
    30  	"google.golang.org/grpc/xds/internal/clients"
    31  	"google.golang.org/grpc/xds/internal/clients/internal/backoff"
    32  	"google.golang.org/grpc/xds/internal/clients/internal/buffer"
    33  	"google.golang.org/grpc/xds/internal/clients/internal/pretty"
    34  	"google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource"
    35  
    36  	"google.golang.org/protobuf/proto"
    37  	"google.golang.org/protobuf/types/known/anypb"
    38  
    39  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    40  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    41  	cpb "google.golang.org/genproto/googleapis/rpc/code"
    42  	statuspb "google.golang.org/genproto/googleapis/rpc/status"
    43  )
    44  
    45  const (
    46  	// Any per-RPC level logs which print complete request or response messages
    47  	// should be gated at this verbosity level. Other per-RPC level logs which print
    48  	// terse output should be at `INFO` and verbosity 2.
    49  	perRPCVerbosityLevel = 9
    50  )
    51  
    52  // response represents a response received on the ADS stream. It contains the
    53  // type URL, version, and resources for the response.
    54  type response struct {
    55  	typeURL   string
    56  	version   string
    57  	resources []*anypb.Any
    58  }
    59  
    60  // dataAndErrTuple is a struct that holds a resource and an error. It is used to
    61  // return a resource and any associated error from a function.
    62  type dataAndErrTuple struct {
    63  	Resource ResourceData
    64  	Err      error
    65  }
    66  
    67  // adsStreamEventHandler is an interface that defines the callbacks for events that
    68  // occur on the ADS stream. Methods on this interface may be invoked
    69  // concurrently and implementations need to handle them in a thread-safe manner.
    70  type adsStreamEventHandler interface {
    71  	onStreamError(error)                           // Called when the ADS stream breaks.
    72  	onWatchExpiry(ResourceType, string)            // Called when the watch timer expires for a resource.
    73  	onResponse(response, func()) ([]string, error) // Called when a response is received on the ADS stream.
    74  }
    75  
    76  // watchState is a enum that describes the watch state of a particular
    77  // resource.
    78  type watchState int
    79  
    80  const (
    81  	// resourceWatchStateStarted is the state where a watch for a resource was
    82  	// started, but a request asking for that resource is yet to be sent to the
    83  	// management server.
    84  	resourceWatchStateStarted watchState = iota
    85  	// resourceWatchStateRequested is the state when a request has been sent for
    86  	// the resource being watched.
    87  	resourceWatchStateRequested
    88  	// ResourceWatchStateReceived is the state when a response has been received
    89  	// for the resource being watched.
    90  	resourceWatchStateReceived
    91  	// resourceWatchStateTimeout is the state when the watch timer associated
    92  	// with the resource expired because no response was received.
    93  	resourceWatchStateTimeout
    94  )
    95  
    96  // resourceWatchState is the state corresponding to a resource being watched.
    97  type resourceWatchState struct {
    98  	State       watchState  // Watch state of the resource.
    99  	ExpiryTimer *time.Timer // Timer for the expiry of the watch.
   100  }
   101  
   102  // state corresponding to a resource type.
   103  type resourceTypeState struct {
   104  	version             string                         // Last acked version. Should not be reset when the stream breaks.
   105  	nonce               string                         // Last received nonce. Should be reset when the stream breaks.
   106  	bufferedRequests    chan struct{}                  // Channel to buffer requests when writing is blocked.
   107  	subscribedResources map[string]*resourceWatchState // Map of subscribed resource names to their state.
   108  	pendingWrite        bool                           // True if there is a pending write for this resource type.
   109  }
   110  
   111  // adsStreamImpl provides the functionality associated with an ADS (Aggregated
   112  // Discovery Service) stream on the client side. It manages the lifecycle of the
   113  // ADS stream, including creating the stream, sending requests, and handling
   114  // responses. It also handles flow control and retries for the stream.
   115  type adsStreamImpl struct {
   116  	// The following fields are initialized from arguments passed to the
   117  	// constructor and are read-only afterwards, and hence can be accessed
   118  	// without a mutex.
   119  	transport          clients.Transport       // Transport to use for ADS stream.
   120  	eventHandler       adsStreamEventHandler   // Callbacks into the xdsChannel.
   121  	backoff            func(int) time.Duration // Backoff for retries, after stream failures.
   122  	nodeProto          *v3corepb.Node          // Identifies the gRPC application.
   123  	watchExpiryTimeout time.Duration           // Resource watch expiry timeout
   124  	logger             *igrpclog.PrefixLogger
   125  
   126  	// The following fields are initialized in the constructor and are not
   127  	// written to afterwards, and hence can be accessed without a mutex.
   128  	streamCh     chan clients.Stream // New ADS streams are pushed here.
   129  	requestCh    *buffer.Unbounded   // Subscriptions and unsubscriptions are pushed here.
   130  	runnerDoneCh chan struct{}       // Notify completion of runner goroutine.
   131  	cancel       context.CancelFunc  // To cancel the context passed to the runner goroutine.
   132  
   133  	// Guards access to the below fields (and to the contents of the map).
   134  	mu                sync.Mutex
   135  	resourceTypeState map[ResourceType]*resourceTypeState // Map of resource types to their state.
   136  	fc                *adsFlowControl                     // Flow control for ADS stream.
   137  	firstRequest      bool                                // False after the first request is sent out.
   138  }
   139  
   140  // adsStreamOpts contains the options for creating a new ADS Stream.
   141  type adsStreamOpts struct {
   142  	transport          clients.Transport       // xDS transport to create the stream on.
   143  	eventHandler       adsStreamEventHandler   // Callbacks for stream events.
   144  	backoff            func(int) time.Duration // Backoff for retries, after stream failures.
   145  	nodeProto          *v3corepb.Node          // Node proto to identify the gRPC application.
   146  	watchExpiryTimeout time.Duration           // Resource watch expiry timeout.
   147  	logPrefix          string                  // Prefix to be used for log messages.
   148  }
   149  
   150  // newADSStreamImpl initializes a new adsStreamImpl instance using the given
   151  // parameters.  It also launches goroutines responsible for managing reads and
   152  // writes for messages of the underlying stream.
   153  func newADSStreamImpl(opts adsStreamOpts) *adsStreamImpl {
   154  	s := &adsStreamImpl{
   155  		transport:          opts.transport,
   156  		eventHandler:       opts.eventHandler,
   157  		backoff:            opts.backoff,
   158  		nodeProto:          opts.nodeProto,
   159  		watchExpiryTimeout: opts.watchExpiryTimeout,
   160  
   161  		streamCh:          make(chan clients.Stream, 1),
   162  		requestCh:         buffer.NewUnbounded(),
   163  		runnerDoneCh:      make(chan struct{}),
   164  		resourceTypeState: make(map[ResourceType]*resourceTypeState),
   165  	}
   166  
   167  	l := grpclog.Component("xds")
   168  	s.logger = igrpclog.NewPrefixLogger(l, opts.logPrefix+fmt.Sprintf("[ads-stream %p] ", s))
   169  
   170  	ctx, cancel := context.WithCancel(context.Background())
   171  	s.cancel = cancel
   172  	go s.runner(ctx)
   173  	return s
   174  }
   175  
   176  // Stop blocks until the stream is closed and all spawned goroutines exit.
   177  func (s *adsStreamImpl) Stop() {
   178  	s.cancel()
   179  	s.requestCh.Close()
   180  	<-s.runnerDoneCh
   181  	s.logger.Infof("Shutdown ADS stream")
   182  }
   183  
   184  // subscribe subscribes to the given resource. It is assumed that multiple
   185  // subscriptions for the same resource is deduped at the caller. A discovery
   186  // request is sent out on the underlying stream for the resource type when there
   187  // is sufficient flow control quota.
   188  func (s *adsStreamImpl) subscribe(typ ResourceType, name string) {
   189  	if s.logger.V(2) {
   190  		s.logger.Infof("Subscribing to resource %q of type %q", name, typ.TypeName)
   191  	}
   192  
   193  	s.mu.Lock()
   194  	defer s.mu.Unlock()
   195  
   196  	state, ok := s.resourceTypeState[typ]
   197  	if !ok {
   198  		// An entry in the type state map is created as part of the first
   199  		// subscription request for this type.
   200  		state = &resourceTypeState{
   201  			subscribedResources: make(map[string]*resourceWatchState),
   202  			bufferedRequests:    make(chan struct{}, 1),
   203  		}
   204  		s.resourceTypeState[typ] = state
   205  	}
   206  
   207  	// Create state for the newly subscribed resource. The watch timer will
   208  	// be started when a request for this resource is actually sent out.
   209  	state.subscribedResources[name] = &resourceWatchState{State: resourceWatchStateStarted}
   210  	state.pendingWrite = true
   211  
   212  	// Send a request for the resource type with updated subscriptions.
   213  	s.requestCh.Put(typ)
   214  }
   215  
   216  // Unsubscribe cancels the subscription to the given resource. It is a no-op if
   217  // the given resource does not exist. The watch expiry timer associated with the
   218  // resource is stopped if one is active. A discovery request is sent out on the
   219  // stream for the resource type when there is sufficient flow control quota.
   220  func (s *adsStreamImpl) Unsubscribe(typ ResourceType, name string) {
   221  	if s.logger.V(2) {
   222  		s.logger.Infof("Unsubscribing to resource %q of type %q", name, typ.TypeName)
   223  	}
   224  
   225  	s.mu.Lock()
   226  	defer s.mu.Unlock()
   227  
   228  	state, ok := s.resourceTypeState[typ]
   229  	if !ok {
   230  		return
   231  	}
   232  
   233  	rs, ok := state.subscribedResources[name]
   234  	if !ok {
   235  		return
   236  	}
   237  	if rs.ExpiryTimer != nil {
   238  		rs.ExpiryTimer.Stop()
   239  	}
   240  	delete(state.subscribedResources, name)
   241  	state.pendingWrite = true
   242  
   243  	// Send a request for the resource type with updated subscriptions.
   244  	s.requestCh.Put(typ)
   245  }
   246  
   247  // runner is a long-running goroutine that handles the lifecycle of the ADS
   248  // stream. It spwans another goroutine to handle writes of discovery request
   249  // messages on the stream. Whenever an existing stream fails, it performs
   250  // exponential backoff (if no messages were received on that stream) before
   251  // creating a new stream.
   252  func (s *adsStreamImpl) runner(ctx context.Context) {
   253  	defer close(s.runnerDoneCh)
   254  
   255  	go s.send(ctx)
   256  
   257  	runStreamWithBackoff := func() error {
   258  		stream, err := s.transport.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources")
   259  		if err != nil {
   260  			s.logger.Warningf("Failed to create a new ADS streaming RPC: %v", err)
   261  			s.onError(err, false)
   262  			return nil
   263  		}
   264  		if s.logger.V(2) {
   265  			s.logger.Infof("ADS stream created")
   266  		}
   267  
   268  		s.mu.Lock()
   269  		// Flow control is a property of the underlying streaming RPC call and
   270  		// needs to be initialized everytime a new one is created.
   271  		s.fc = newADSFlowControl(s.logger)
   272  		s.firstRequest = true
   273  		s.mu.Unlock()
   274  
   275  		// Ensure that the most recently created stream is pushed on the
   276  		// channel for the `send` goroutine to consume.
   277  		select {
   278  		case <-s.streamCh:
   279  		default:
   280  		}
   281  		s.streamCh <- stream
   282  
   283  		// Backoff state is reset upon successful receipt of at least one
   284  		// message from the server.
   285  		if s.recv(ctx, stream) {
   286  			return backoff.ErrResetBackoff
   287  		}
   288  		return nil
   289  	}
   290  	backoff.RunF(ctx, runStreamWithBackoff, s.backoff)
   291  }
   292  
   293  // send is a long running goroutine that handles sending discovery requests for
   294  // two scenarios:
   295  // - a new subscription or unsubscription request is received
   296  // - a new stream is created after the previous one failed
   297  func (s *adsStreamImpl) send(ctx context.Context) {
   298  	// Stores the most recent stream instance received on streamCh.
   299  	var stream clients.Stream
   300  	for {
   301  		select {
   302  		case <-ctx.Done():
   303  			return
   304  		case stream = <-s.streamCh:
   305  			if err := s.sendExisting(stream); err != nil {
   306  				// Send failed, clear the current stream. Attempt to resend will
   307  				// only be made after a new stream is created.
   308  				stream = nil
   309  				continue
   310  			}
   311  		case req, ok := <-s.requestCh.Get():
   312  			if !ok {
   313  				return
   314  			}
   315  			s.requestCh.Load()
   316  
   317  			typ := req.(ResourceType)
   318  			if err := s.sendNew(stream, typ); err != nil {
   319  				stream = nil
   320  				continue
   321  			}
   322  		}
   323  	}
   324  }
   325  
   326  // sendNew attempts to send a discovery request based on a new subscription or
   327  // unsubscription. If there is no flow control quota, the request is buffered
   328  // and will be sent later. This method also starts the watch expiry timer for
   329  // resources that were sent in the request for the first time, i.e. their watch
   330  // state is `watchStateStarted`.
   331  func (s *adsStreamImpl) sendNew(stream clients.Stream, typ ResourceType) error {
   332  	s.mu.Lock()
   333  	defer s.mu.Unlock()
   334  
   335  	// If there's no stream yet, skip the request. This request will be resent
   336  	// when a new stream is created. If no stream is created, the watcher will
   337  	// timeout (same as server not sending response back).
   338  	if stream == nil {
   339  		return nil
   340  	}
   341  
   342  	// If local processing of the most recently received response is not yet
   343  	// complete, i.e. fc.pending == true, queue this write and return early.
   344  	// This allows us to batch writes for requests which are generated as part
   345  	// of local processing of a received response.
   346  	state := s.resourceTypeState[typ]
   347  	if s.fc.pending.Load() {
   348  		select {
   349  		case state.bufferedRequests <- struct{}{}:
   350  		default:
   351  		}
   352  		return nil
   353  	}
   354  
   355  	return s.sendMessageIfWritePendingLocked(stream, typ, state)
   356  }
   357  
   358  // sendExisting sends out discovery requests for existing resources when
   359  // recovering from a broken stream.
   360  //
   361  // The stream argument is guaranteed to be non-nil.
   362  func (s *adsStreamImpl) sendExisting(stream clients.Stream) error {
   363  	s.mu.Lock()
   364  	defer s.mu.Unlock()
   365  
   366  	for typ, state := range s.resourceTypeState {
   367  		// Reset only the nonces map when the stream restarts.
   368  		//
   369  		// xDS spec says the following. See section:
   370  		// https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version
   371  		//
   372  		// Note that the version for a resource type is not a property of an
   373  		// individual xDS stream but rather a property of the resources
   374  		// themselves. If the stream becomes broken and the client creates a new
   375  		// stream, the client’s initial request on the new stream should
   376  		// indicate the most recent version seen by the client on the previous
   377  		// stream
   378  		state.nonce = ""
   379  
   380  		if len(state.subscribedResources) == 0 {
   381  			continue
   382  		}
   383  
   384  		state.pendingWrite = true
   385  		if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil {
   386  			return err
   387  		}
   388  	}
   389  	return nil
   390  }
   391  
   392  // sendBuffered sends out discovery requests for resources that were buffered
   393  // when they were subscribed to, because local processing of the previously
   394  // received response was not yet complete.
   395  //
   396  // The stream argument is guaranteed to be non-nil.
   397  func (s *adsStreamImpl) sendBuffered(stream clients.Stream) error {
   398  	s.mu.Lock()
   399  	defer s.mu.Unlock()
   400  
   401  	for typ, state := range s.resourceTypeState {
   402  		select {
   403  		case <-state.bufferedRequests:
   404  			if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil {
   405  				return err
   406  			}
   407  		default:
   408  			// No buffered request.
   409  			continue
   410  		}
   411  	}
   412  	return nil
   413  }
   414  
   415  // sendMessageIfWritePendingLocked attempts to sends a discovery request to the
   416  // server, if there is a pending write for the given resource type.
   417  //
   418  // If the request is successfully sent, the pending write field is cleared and
   419  // watch timers are started for the resources in the request.
   420  //
   421  // Caller needs to hold c.mu.
   422  func (s *adsStreamImpl) sendMessageIfWritePendingLocked(stream clients.Stream, typ ResourceType, state *resourceTypeState) error {
   423  	if !state.pendingWrite {
   424  		if s.logger.V(2) {
   425  			s.logger.Infof("Skipping sending request for type %q, because all subscribed resources were already sent", typ.TypeURL)
   426  		}
   427  		return nil
   428  	}
   429  
   430  	names := resourceNames(state.subscribedResources)
   431  	if err := s.sendMessageLocked(stream, names, typ.TypeURL, state.version, state.nonce, nil); err != nil {
   432  		return err
   433  	}
   434  	state.pendingWrite = false
   435  
   436  	// Drain the buffered requests channel because we just sent a request for this
   437  	// resource type.
   438  	select {
   439  	case <-state.bufferedRequests:
   440  	default:
   441  	}
   442  
   443  	s.startWatchTimersLocked(typ, names)
   444  	return nil
   445  }
   446  
   447  // sendMessageLocked sends a discovery request to the server, populating the
   448  // different fields of the message with the given parameters. Returns a non-nil
   449  // error if the request could not be sent.
   450  //
   451  // Caller needs to hold c.mu.
   452  func (s *adsStreamImpl) sendMessageLocked(stream clients.Stream, names []string, url, version, nonce string, nackErr error) error {
   453  	req := &v3discoverypb.DiscoveryRequest{
   454  		ResourceNames: names,
   455  		TypeUrl:       url,
   456  		VersionInfo:   version,
   457  		ResponseNonce: nonce,
   458  	}
   459  
   460  	// The xDS protocol only requires that we send the node proto in the first
   461  	// discovery request on every stream. Sending the node proto in every
   462  	// request wastes CPU resources on the client and the server.
   463  	if s.firstRequest {
   464  		req.Node = s.nodeProto
   465  	}
   466  
   467  	if nackErr != nil {
   468  		req.ErrorDetail = &statuspb.Status{
   469  			Code: int32(cpb.Code_INVALID_ARGUMENT), Message: nackErr.Error(),
   470  		}
   471  	}
   472  
   473  	msg, err := proto.Marshal(req)
   474  	if err != nil {
   475  		s.logger.Warningf("Failed to marshal DiscoveryRequest: %v", err)
   476  		return err
   477  	}
   478  	if err := stream.Send(msg); err != nil {
   479  		s.logger.Warningf("Sending ADS request for type %q, resources: %v, version: %q, nonce: %q failed: %v", url, names, version, nonce, err)
   480  		return err
   481  	}
   482  	s.firstRequest = false
   483  
   484  	if s.logger.V(perRPCVerbosityLevel) {
   485  		s.logger.Infof("ADS request sent: %v", pretty.ToJSON(req))
   486  	} else if s.logger.V(2) {
   487  		s.logger.Warningf("ADS request sent for type %q, resources: %v, version: %q, nonce: %q", url, names, version, nonce)
   488  	}
   489  	return nil
   490  }
   491  
   492  // recv is responsible for receiving messages from the ADS stream.
   493  //
   494  // It performs the following actions:
   495  //   - Waits for local flow control to be available before sending buffered
   496  //     requests, if any.
   497  //   - Receives a message from the ADS stream. If an error is encountered here,
   498  //     it is handled by the onError method which propagates the error to all
   499  //     watchers.
   500  //   - Invokes the event handler's OnADSResponse method to process the message.
   501  //   - Sends an ACK or NACK to the server based on the response.
   502  //
   503  // It returns a boolean indicating whether at least one message was received
   504  // from the server.
   505  func (s *adsStreamImpl) recv(ctx context.Context, stream clients.Stream) bool {
   506  	msgReceived := false
   507  	for {
   508  		// Wait for ADS stream level flow control to be available, and send out
   509  		// a request if anything was buffered while we were waiting for local
   510  		// processing of the previous response to complete.
   511  		if !s.fc.wait(ctx) {
   512  			if s.logger.V(2) {
   513  				s.logger.Infof("ADS stream context canceled")
   514  			}
   515  			return msgReceived
   516  		}
   517  		s.sendBuffered(stream)
   518  
   519  		resources, url, version, nonce, err := s.recvMessage(stream)
   520  		if err != nil {
   521  			s.onError(err, msgReceived)
   522  			s.logger.Warningf("ADS stream closed: %v", err)
   523  			return msgReceived
   524  		}
   525  		msgReceived = true
   526  
   527  		// Invoke the onResponse event handler to parse the incoming message and
   528  		// decide whether to send an ACK or NACK.
   529  		resp := response{
   530  			resources: resources,
   531  			typeURL:   url,
   532  			version:   version,
   533  		}
   534  		var resourceNames []string
   535  		var nackErr error
   536  		s.fc.setPending()
   537  		resourceNames, nackErr = s.eventHandler.onResponse(resp, s.fc.onDone)
   538  		if xdsresource.ErrType(nackErr) == xdsresource.ErrorTypeResourceTypeUnsupported {
   539  			// A general guiding principle is that if the server sends
   540  			// something the client didn't actually subscribe to, then the
   541  			// client ignores it. Here, we have received a response with
   542  			// resources of a type that we don't know about.
   543  			//
   544  			// Sending a NACK doesn't really seem appropriate here, since we're
   545  			// not actually validating what the server sent and therefore don't
   546  			// know that it's invalid.  But we shouldn't ACK either, because we
   547  			// don't know that it is valid.
   548  			s.logger.Warningf("%v", nackErr)
   549  			continue
   550  		}
   551  
   552  		s.onRecv(stream, resourceNames, url, version, nonce, nackErr)
   553  	}
   554  }
   555  
   556  func (s *adsStreamImpl) recvMessage(stream clients.Stream) (resources []*anypb.Any, url, version, nonce string, err error) {
   557  	r, err := stream.Recv()
   558  	if err != nil {
   559  		return nil, "", "", "", err
   560  	}
   561  	var resp v3discoverypb.DiscoveryResponse
   562  	if err := proto.Unmarshal(r, &resp); err != nil {
   563  		s.logger.Infof("Failed to unmarshal response to DiscoveryResponse: %v", err)
   564  		return nil, "", "", "", fmt.Errorf("unexpected message type %T", r)
   565  	}
   566  	if s.logger.V(perRPCVerbosityLevel) {
   567  		s.logger.Infof("ADS response received: %v", pretty.ToJSON(&resp))
   568  	} else if s.logger.V(2) {
   569  		s.logger.Infof("ADS response received for type %q, version %q, nonce %q", resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce())
   570  	}
   571  	return resp.GetResources(), resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce(), nil
   572  }
   573  
   574  // onRecv is invoked when a response is received from the server. The arguments
   575  // passed to this method correspond to the most recently received response.
   576  //
   577  // It performs the following actions:
   578  //   - updates resource type specific state
   579  //   - updates resource specific state for resources in the response
   580  //   - sends an ACK or NACK to the server based on the response
   581  func (s *adsStreamImpl) onRecv(stream clients.Stream, names []string, url, version, nonce string, nackErr error) {
   582  	s.mu.Lock()
   583  	defer s.mu.Unlock()
   584  
   585  	// Lookup the resource type specific state based on the type URL.
   586  	var typ ResourceType
   587  	for t := range s.resourceTypeState {
   588  		if t.TypeURL == url {
   589  			typ = t
   590  			break
   591  		}
   592  	}
   593  	typeState, ok := s.resourceTypeState[typ]
   594  	if !ok {
   595  		s.logger.Warningf("ADS stream received a response for type %q, but no state exists for it", url)
   596  		return
   597  	}
   598  
   599  	// Update the resource type specific state. This includes:
   600  	//   - updating the nonce unconditionally
   601  	//   - updating the version only if the response is to be ACKed
   602  	previousVersion := typeState.version
   603  	typeState.nonce = nonce
   604  	if nackErr == nil {
   605  		typeState.version = version
   606  	}
   607  
   608  	// Update the resource specific state. For all resources received as
   609  	// part of this response that are in state `started` or `requested`,
   610  	// this includes:
   611  	//   - setting the watch state to watchstateReceived
   612  	//   - stopping the expiry timer, if one exists
   613  	for _, name := range names {
   614  		rs, ok := typeState.subscribedResources[name]
   615  		if !ok {
   616  			s.logger.Warningf("ADS stream received a response for resource %q, but no state exists for it", name)
   617  			continue
   618  		}
   619  		if ws := rs.State; ws == resourceWatchStateStarted || ws == resourceWatchStateRequested {
   620  			rs.State = resourceWatchStateReceived
   621  			if rs.ExpiryTimer != nil {
   622  				rs.ExpiryTimer.Stop()
   623  				rs.ExpiryTimer = nil
   624  			}
   625  		}
   626  	}
   627  
   628  	// Send an ACK or NACK.
   629  	subscribedResourceNames := resourceNames(typeState.subscribedResources)
   630  	if nackErr != nil {
   631  		s.logger.Warningf("Sending NACK for resource type: %q, version: %q, nonce: %q, reason: %v", url, version, nonce, nackErr)
   632  		s.sendMessageLocked(stream, subscribedResourceNames, url, previousVersion, nonce, nackErr)
   633  		return
   634  	}
   635  
   636  	if s.logger.V(2) {
   637  		s.logger.Infof("Sending ACK for resource type: %q, version: %q, nonce: %q", url, version, nonce)
   638  	}
   639  	s.sendMessageLocked(stream, subscribedResourceNames, url, version, nonce, nil)
   640  }
   641  
   642  // onError is called when an error occurs on the ADS stream. It stops any
   643  // outstanding resource timers and resets the watch state to started for any
   644  // resources that were in the requested state. It also handles the case where
   645  // the ADS stream was closed after receiving a response, which is not
   646  // considered an error.
   647  func (s *adsStreamImpl) onError(err error, msgReceived bool) {
   648  	// For resources that been requested but not yet responded to by the
   649  	// management server, stop the resource timers and reset the watch state to
   650  	// watchStateStarted. This is because we don't want the expiry timer to be
   651  	// running when we don't have a stream open to the management server.
   652  	s.mu.Lock()
   653  	for _, state := range s.resourceTypeState {
   654  		for _, rs := range state.subscribedResources {
   655  			if rs.State != resourceWatchStateRequested {
   656  				continue
   657  			}
   658  			if rs.ExpiryTimer != nil {
   659  				rs.ExpiryTimer.Stop()
   660  				rs.ExpiryTimer = nil
   661  			}
   662  			rs.State = resourceWatchStateStarted
   663  		}
   664  	}
   665  	s.mu.Unlock()
   666  
   667  	// Note that we do not consider it an error if the ADS stream was closed
   668  	// after having received a response on the stream. This is because there
   669  	// are legitimate reasons why the server may need to close the stream during
   670  	// normal operations, such as needing to rebalance load or the underlying
   671  	// connection hitting its max connection age limit.
   672  	// (see [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md)).
   673  	if msgReceived {
   674  		err = xdsresource.NewError(xdsresource.ErrTypeStreamFailedAfterRecv, err.Error())
   675  	}
   676  
   677  	s.eventHandler.onStreamError(err)
   678  }
   679  
   680  // startWatchTimersLocked starts the expiry timers for the given resource names
   681  // of the specified resource type.  For each resource name, if the resource
   682  // watch state is in the "started" state, it transitions the state to
   683  // "requested" and starts an expiry timer. When the timer expires, the resource
   684  // watch state is set to "timeout" and the event handler callback is called.
   685  //
   686  // The caller must hold the s.mu lock.
   687  func (s *adsStreamImpl) startWatchTimersLocked(typ ResourceType, names []string) {
   688  	typeState := s.resourceTypeState[typ]
   689  	for _, name := range names {
   690  		resourceState, ok := typeState.subscribedResources[name]
   691  		if !ok {
   692  			continue
   693  		}
   694  		if resourceState.State != resourceWatchStateStarted {
   695  			continue
   696  		}
   697  		resourceState.State = resourceWatchStateRequested
   698  
   699  		rs := resourceState
   700  		resourceState.ExpiryTimer = time.AfterFunc(s.watchExpiryTimeout, func() {
   701  			s.mu.Lock()
   702  			rs.State = resourceWatchStateTimeout
   703  			rs.ExpiryTimer = nil
   704  			s.mu.Unlock()
   705  			s.eventHandler.onWatchExpiry(typ, name)
   706  		})
   707  	}
   708  }
   709  
   710  func resourceNames(m map[string]*resourceWatchState) []string {
   711  	ret := make([]string, len(m))
   712  	idx := 0
   713  	for name := range m {
   714  		ret[idx] = name
   715  		idx++
   716  	}
   717  	return ret
   718  }
   719  
   720  // adsFlowControl implements ADS stream level flow control that enables the
   721  // transport to block the reading of the next message off of the stream until
   722  // the previous update is consumed by all watchers.
   723  //
   724  // The lifetime of the flow control is tied to the lifetime of the stream.
   725  type adsFlowControl struct {
   726  	logger *igrpclog.PrefixLogger
   727  
   728  	// Whether the most recent update is pending consumption by all watchers.
   729  	pending atomic.Bool
   730  	// Channel used to notify when all the watchers have consumed the most
   731  	// recent update. Wait() blocks on reading a value from this channel.
   732  	readyCh chan struct{}
   733  }
   734  
   735  // newADSFlowControl returns a new adsFlowControl.
   736  func newADSFlowControl(logger *igrpclog.PrefixLogger) *adsFlowControl {
   737  	return &adsFlowControl{
   738  		logger:  logger,
   739  		readyCh: make(chan struct{}, 1),
   740  	}
   741  }
   742  
   743  // setPending changes the internal state to indicate that there is an update
   744  // pending consumption by all watchers.
   745  func (fc *adsFlowControl) setPending() {
   746  	fc.pending.Store(true)
   747  }
   748  
   749  // wait blocks until all the watchers have consumed the most recent update and
   750  // returns true. If the context expires before that, it returns false.
   751  func (fc *adsFlowControl) wait(ctx context.Context) bool {
   752  	// If there is no pending update, there is no need to block.
   753  	if !fc.pending.Load() {
   754  		// If all watchers finished processing the most recent update before the
   755  		// `recv` goroutine made the next call to `Wait()`, there would be an
   756  		// entry in the readyCh channel that needs to be drained to ensure that
   757  		// the next call to `Wait()` doesn't unblock before it actually should.
   758  		select {
   759  		case <-fc.readyCh:
   760  		default:
   761  		}
   762  		return true
   763  	}
   764  
   765  	select {
   766  	case <-ctx.Done():
   767  		return false
   768  	case <-fc.readyCh:
   769  		return true
   770  	}
   771  }
   772  
   773  // onDone indicates that all watchers have consumed the most recent update.
   774  func (fc *adsFlowControl) onDone() {
   775  	select {
   776  	// Writes to the readyCh channel should not block ideally. The default
   777  	// branch here is to appease the paranoid mind.
   778  	case fc.readyCh <- struct{}{}:
   779  	default:
   780  		if fc.logger.V(2) {
   781  			fc.logger.Infof("ADS stream flow control readyCh is full")
   782  		}
   783  	}
   784  	fc.pending.Store(false)
   785  }