google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/transport/ads/ads_stream.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 // Package ads provides the implementation of an ADS (Aggregated Discovery 19 // Service) stream for the xDS client. 20 package ads 21 22 import ( 23 "context" 24 "fmt" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "google.golang.org/grpc/codes" 30 "google.golang.org/grpc/grpclog" 31 "google.golang.org/grpc/internal/backoff" 32 "google.golang.org/grpc/internal/buffer" 33 igrpclog "google.golang.org/grpc/internal/grpclog" 34 "google.golang.org/grpc/internal/pretty" 35 "google.golang.org/grpc/xds/internal/xdsclient/transport" 36 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" 37 "google.golang.org/protobuf/types/known/anypb" 38 39 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 40 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 41 statuspb "google.golang.org/genproto/googleapis/rpc/status" 42 ) 43 44 // Any per-RPC level logs which print complete request or response messages 45 // should be gated at this verbosity level. Other per-RPC level logs which print 46 // terse output should be at `INFO` and verbosity 2. 47 const perRPCVerbosityLevel = 9 48 49 // Response represents a response received on the ADS stream. It contains the 50 // type URL, version, and resources for the response. 51 type Response struct { 52 TypeURL string 53 Version string 54 Resources []*anypb.Any 55 } 56 57 // DataAndErrTuple is a struct that holds a resource and an error. It is used to 58 // return a resource and any associated error from a function. 59 type DataAndErrTuple struct { 60 Resource xdsresource.ResourceData 61 Err error 62 } 63 64 // StreamEventHandler is an interface that defines the callbacks for events that 65 // occur on the ADS stream. Methods on this interface may be invoked 66 // concurrently and implementations need to handle them in a thread-safe manner. 67 type StreamEventHandler interface { 68 OnADSStreamError(error) // Called when the ADS stream breaks. 69 OnADSWatchExpiry(xdsresource.Type, string) // Called when the watch timer expires for a resource. 70 OnADSResponse(Response, func()) ([]string, error) // Called when a response is received on the ADS stream. 71 } 72 73 // WatchState is a enum that describes the watch state of a particular 74 // resource. 75 type WatchState int 76 77 const ( 78 // ResourceWatchStateStarted is the state where a watch for a resource was 79 // started, but a request asking for that resource is yet to be sent to the 80 // management server. 81 ResourceWatchStateStarted WatchState = iota 82 // ResourceWatchStateRequested is the state when a request has been sent for 83 // the resource being watched. 84 ResourceWatchStateRequested 85 // ResourceWatchStateReceived is the state when a response has been received 86 // for the resource being watched. 87 ResourceWatchStateReceived 88 // ResourceWatchStateTimeout is the state when the watch timer associated 89 // with the resource expired because no response was received. 90 ResourceWatchStateTimeout 91 ) 92 93 // ResourceWatchState is the state corresponding to a resource being watched. 94 type ResourceWatchState struct { 95 State WatchState // Watch state of the resource. 96 ExpiryTimer *time.Timer // Timer for the expiry of the watch. 97 } 98 99 // State corresponding to a resource type. 100 type resourceTypeState struct { 101 version string // Last acked version. Should not be reset when the stream breaks. 102 nonce string // Last received nonce. Should be reset when the stream breaks. 103 bufferedRequests chan struct{} // Channel to buffer requests when writing is blocked. 104 subscribedResources map[string]*ResourceWatchState // Map of subscribed resource names to their state. 105 pendingWrite bool // True if there is a pending write for this resource type. 106 } 107 108 // StreamImpl provides the functionality associated with an ADS (Aggregated 109 // Discovery Service) stream on the client side. It manages the lifecycle of the 110 // ADS stream, including creating the stream, sending requests, and handling 111 // responses. It also handles flow control and retries for the stream. 112 type StreamImpl struct { 113 // The following fields are initialized from arguments passed to the 114 // constructor and are read-only afterwards, and hence can be accessed 115 // without a mutex. 116 transport transport.Transport // Transport to use for ADS stream. 117 eventHandler StreamEventHandler // Callbacks into the xdsChannel. 118 backoff func(int) time.Duration // Backoff for retries, after stream failures. 119 nodeProto *v3corepb.Node // Identifies the gRPC application. 120 watchExpiryTimeout time.Duration // Resource watch expiry timeout 121 logger *igrpclog.PrefixLogger 122 123 // The following fields are initialized in the constructor and are not 124 // written to afterwards, and hence can be accessed without a mutex. 125 streamCh chan transport.StreamingCall // New ADS streams are pushed here. 126 requestCh *buffer.Unbounded // Subscriptions and unsubscriptions are pushed here. 127 runnerDoneCh chan struct{} // Notify completion of runner goroutine. 128 cancel context.CancelFunc // To cancel the context passed to the runner goroutine. 129 130 // Guards access to the below fields (and to the contents of the map). 131 mu sync.Mutex 132 resourceTypeState map[xdsresource.Type]*resourceTypeState // Map of resource types to their state. 133 fc *adsFlowControl // Flow control for ADS stream. 134 firstRequest bool // False after the first request is sent out. 135 } 136 137 // StreamOpts contains the options for creating a new ADS Stream. 138 type StreamOpts struct { 139 Transport transport.Transport // xDS transport to create the stream on. 140 EventHandler StreamEventHandler // Callbacks for stream events. 141 Backoff func(int) time.Duration // Backoff for retries, after stream failures. 142 NodeProto *v3corepb.Node // Node proto to identify the gRPC application. 143 WatchExpiryTimeout time.Duration // Resource watch expiry timeout. 144 LogPrefix string // Prefix to be used for log messages. 145 } 146 147 // NewStreamImpl initializes a new StreamImpl instance using the given 148 // parameters. It also launches goroutines responsible for managing reads and 149 // writes for messages of the underlying stream. 150 func NewStreamImpl(opts StreamOpts) *StreamImpl { 151 s := &StreamImpl{ 152 transport: opts.Transport, 153 eventHandler: opts.EventHandler, 154 backoff: opts.Backoff, 155 nodeProto: opts.NodeProto, 156 watchExpiryTimeout: opts.WatchExpiryTimeout, 157 158 streamCh: make(chan transport.StreamingCall, 1), 159 requestCh: buffer.NewUnbounded(), 160 runnerDoneCh: make(chan struct{}), 161 resourceTypeState: make(map[xdsresource.Type]*resourceTypeState), 162 } 163 164 l := grpclog.Component("xds") 165 s.logger = igrpclog.NewPrefixLogger(l, opts.LogPrefix+fmt.Sprintf("[ads-stream %p] ", s)) 166 167 ctx, cancel := context.WithCancel(context.Background()) 168 s.cancel = cancel 169 go s.runner(ctx) 170 return s 171 } 172 173 // Stop blocks until the stream is closed and all spawned goroutines exit. 174 func (s *StreamImpl) Stop() { 175 s.cancel() 176 s.requestCh.Close() 177 <-s.runnerDoneCh 178 s.logger.Infof("Stopping ADS stream") 179 } 180 181 // Subscribe subscribes to the given resource. It is assumed that multiple 182 // subscriptions for the same resource is deduped at the caller. A discovery 183 // request is sent out on the underlying stream for the resource type when there 184 // is sufficient flow control quota. 185 func (s *StreamImpl) Subscribe(typ xdsresource.Type, name string) { 186 if s.logger.V(2) { 187 s.logger.Infof("Subscribing to resource %q of type %q", name, typ.TypeName()) 188 } 189 190 s.mu.Lock() 191 defer s.mu.Unlock() 192 193 state, ok := s.resourceTypeState[typ] 194 if !ok { 195 // An entry in the type state map is created as part of the first 196 // subscription request for this type. 197 state = &resourceTypeState{ 198 subscribedResources: make(map[string]*ResourceWatchState), 199 bufferedRequests: make(chan struct{}, 1), 200 } 201 s.resourceTypeState[typ] = state 202 } 203 204 // Create state for the newly subscribed resource. The watch timer will 205 // be started when a request for this resource is actually sent out. 206 state.subscribedResources[name] = &ResourceWatchState{State: ResourceWatchStateStarted} 207 state.pendingWrite = true 208 209 // Send a request for the resource type with updated subscriptions. 210 s.requestCh.Put(typ) 211 } 212 213 // Unsubscribe cancels the subscription to the given resource. It is a no-op if 214 // the given resource does not exist. The watch expiry timer associated with the 215 // resource is stopped if one is active. A discovery request is sent out on the 216 // stream for the resource type when there is sufficient flow control quota. 217 func (s *StreamImpl) Unsubscribe(typ xdsresource.Type, name string) { 218 if s.logger.V(2) { 219 s.logger.Infof("Unsubscribing to resource %q of type %q", name, typ.TypeName()) 220 } 221 222 s.mu.Lock() 223 defer s.mu.Unlock() 224 225 state, ok := s.resourceTypeState[typ] 226 if !ok { 227 return 228 } 229 230 rs, ok := state.subscribedResources[name] 231 if !ok { 232 return 233 } 234 if rs.ExpiryTimer != nil { 235 rs.ExpiryTimer.Stop() 236 } 237 delete(state.subscribedResources, name) 238 state.pendingWrite = true 239 240 // Send a request for the resource type with updated subscriptions. 241 s.requestCh.Put(typ) 242 } 243 244 // runner is a long-running goroutine that handles the lifecycle of the ADS 245 // stream. It spwans another goroutine to handle writes of discovery request 246 // messages on the stream. Whenever an existing stream fails, it performs 247 // exponential backoff (if no messages were received on that stream) before 248 // creating a new stream. 249 func (s *StreamImpl) runner(ctx context.Context) { 250 defer close(s.runnerDoneCh) 251 252 go s.send(ctx) 253 254 runStreamWithBackoff := func() error { 255 stream, err := s.transport.CreateStreamingCall(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources") 256 if err != nil { 257 s.logger.Warningf("Failed to create a new ADS streaming RPC: %v", err) 258 s.onError(err, false) 259 return nil 260 } 261 if s.logger.V(2) { 262 s.logger.Infof("ADS stream created") 263 } 264 265 s.mu.Lock() 266 // Flow control is a property of the underlying streaming RPC call and 267 // needs to be initialized everytime a new one is created. 268 s.fc = newADSFlowControl(s.logger) 269 s.firstRequest = true 270 s.mu.Unlock() 271 272 // Ensure that the most recently created stream is pushed on the 273 // channel for the `send` goroutine to consume. 274 select { 275 case <-s.streamCh: 276 default: 277 } 278 s.streamCh <- stream 279 280 // Backoff state is reset upon successful receipt of at least one 281 // message from the server. 282 if s.recv(ctx, stream) { 283 return backoff.ErrResetBackoff 284 } 285 return nil 286 } 287 backoff.RunF(ctx, runStreamWithBackoff, s.backoff) 288 } 289 290 // send is a long running goroutine that handles sending discovery requests for 291 // two scenarios: 292 // - a new subscription or unsubscription request is received 293 // - a new stream is created after the previous one failed 294 func (s *StreamImpl) send(ctx context.Context) { 295 // Stores the most recent stream instance received on streamCh. 296 var stream transport.StreamingCall 297 for { 298 select { 299 case <-ctx.Done(): 300 return 301 case stream = <-s.streamCh: 302 if err := s.sendExisting(stream); err != nil { 303 // Send failed, clear the current stream. Attempt to resend will 304 // only be made after a new stream is created. 305 stream = nil 306 continue 307 } 308 case req, ok := <-s.requestCh.Get(): 309 if !ok { 310 return 311 } 312 s.requestCh.Load() 313 314 typ := req.(xdsresource.Type) 315 if err := s.sendNew(stream, typ); err != nil { 316 stream = nil 317 continue 318 } 319 } 320 } 321 } 322 323 // sendNew attempts to send a discovery request based on a new subscription or 324 // unsubscription. If there is no flow control quota, the request is buffered 325 // and will be sent later. This method also starts the watch expiry timer for 326 // resources that were sent in the request for the first time, i.e. their watch 327 // state is `watchStateStarted`. 328 func (s *StreamImpl) sendNew(stream transport.StreamingCall, typ xdsresource.Type) error { 329 s.mu.Lock() 330 defer s.mu.Unlock() 331 332 // If there's no stream yet, skip the request. This request will be resent 333 // when a new stream is created. If no stream is created, the watcher will 334 // timeout (same as server not sending response back). 335 if stream == nil { 336 return nil 337 } 338 339 // If local processing of the most recently received response is not yet 340 // complete, i.e. fc.pending == true, queue this write and return early. 341 // This allows us to batch writes for requests which are generated as part 342 // of local processing of a received response. 343 state := s.resourceTypeState[typ] 344 if s.fc.pending.Load() { 345 select { 346 case state.bufferedRequests <- struct{}{}: 347 default: 348 } 349 return nil 350 } 351 352 return s.sendMessageIfWritePendingLocked(stream, typ, state) 353 } 354 355 // sendExisting sends out discovery requests for existing resources when 356 // recovering from a broken stream. 357 // 358 // The stream argument is guaranteed to be non-nil. 359 func (s *StreamImpl) sendExisting(stream transport.StreamingCall) error { 360 s.mu.Lock() 361 defer s.mu.Unlock() 362 363 for typ, state := range s.resourceTypeState { 364 // Reset only the nonces map when the stream restarts. 365 // 366 // xDS spec says the following. See section: 367 // https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version 368 // 369 // Note that the version for a resource type is not a property of an 370 // individual xDS stream but rather a property of the resources 371 // themselves. If the stream becomes broken and the client creates a new 372 // stream, the client’s initial request on the new stream should 373 // indicate the most recent version seen by the client on the previous 374 // stream 375 state.nonce = "" 376 377 if len(state.subscribedResources) == 0 { 378 continue 379 } 380 381 state.pendingWrite = true 382 if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { 383 return err 384 } 385 } 386 return nil 387 } 388 389 // sendBuffered sends out discovery requests for resources that were buffered 390 // when they were subscribed to, because local processing of the previously 391 // received response was not yet complete. 392 // 393 // The stream argument is guaranteed to be non-nil. 394 func (s *StreamImpl) sendBuffered(stream transport.StreamingCall) error { 395 s.mu.Lock() 396 defer s.mu.Unlock() 397 398 for typ, state := range s.resourceTypeState { 399 select { 400 case <-state.bufferedRequests: 401 if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { 402 return err 403 } 404 default: 405 // No buffered request. 406 continue 407 } 408 } 409 return nil 410 } 411 412 // sendMessageIfWritePendingLocked attempts to sends a discovery request to the 413 // server, if there is a pending write for the given resource type. 414 // 415 // If the request is successfully sent, the pending write field is cleared and 416 // watch timers are started for the resources in the request. 417 // 418 // Caller needs to hold c.mu. 419 func (s *StreamImpl) sendMessageIfWritePendingLocked(stream transport.StreamingCall, typ xdsresource.Type, state *resourceTypeState) error { 420 if !state.pendingWrite { 421 if s.logger.V(2) { 422 s.logger.Infof("Skipping sending request for type %q, because all subscribed resources were already sent", typ.TypeURL()) 423 } 424 return nil 425 } 426 427 names := resourceNames(state.subscribedResources) 428 if err := s.sendMessageLocked(stream, names, typ.TypeURL(), state.version, state.nonce, nil); err != nil { 429 return err 430 } 431 state.pendingWrite = false 432 433 // Drain the buffered requests channel because we just sent a request for this 434 // resource type. 435 select { 436 case <-state.bufferedRequests: 437 default: 438 } 439 440 s.startWatchTimersLocked(typ, names) 441 return nil 442 } 443 444 // sendMessageLocked sends a discovery request to the server, populating the 445 // different fields of the message with the given parameters. Returns a non-nil 446 // error if the request could not be sent. 447 // 448 // Caller needs to hold c.mu. 449 func (s *StreamImpl) sendMessageLocked(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) error { 450 req := &v3discoverypb.DiscoveryRequest{ 451 ResourceNames: names, 452 TypeUrl: url, 453 VersionInfo: version, 454 ResponseNonce: nonce, 455 } 456 457 // The xDS protocol only requires that we send the node proto in the first 458 // discovery request on every stream. Sending the node proto in every 459 // request wastes CPU resources on the client and the server. 460 if s.firstRequest { 461 req.Node = s.nodeProto 462 } 463 464 if nackErr != nil { 465 req.ErrorDetail = &statuspb.Status{ 466 Code: int32(codes.InvalidArgument), Message: nackErr.Error(), 467 } 468 } 469 470 if err := stream.Send(req); err != nil { 471 s.logger.Warningf("Sending ADS request for type %q, resources: %v, version: %q, nonce: %q failed: %v", url, names, version, nonce, err) 472 return err 473 } 474 s.firstRequest = false 475 476 if s.logger.V(perRPCVerbosityLevel) { 477 s.logger.Infof("ADS request sent: %v", pretty.ToJSON(req)) 478 } else if s.logger.V(2) { 479 s.logger.Warningf("ADS request sent for type %q, resources: %v, version: %q, nonce: %q", url, names, version, nonce) 480 } 481 return nil 482 } 483 484 // recv is responsible for receiving messages from the ADS stream. 485 // 486 // It performs the following actions: 487 // - Waits for local flow control to be available before sending buffered 488 // requests, if any. 489 // - Receives a message from the ADS stream. If an error is encountered here, 490 // it is handled by the onError method which propagates the error to all 491 // watchers. 492 // - Invokes the event handler's OnADSResponse method to process the message. 493 // - Sends an ACK or NACK to the server based on the response. 494 // 495 // It returns a boolean indicating whether at least one message was received 496 // from the server. 497 func (s *StreamImpl) recv(ctx context.Context, stream transport.StreamingCall) bool { 498 msgReceived := false 499 for { 500 // Wait for ADS stream level flow control to be available, and send out 501 // a request if anything was buffered while we were waiting for local 502 // processing of the previous response to complete. 503 if !s.fc.wait(ctx) { 504 if s.logger.V(2) { 505 s.logger.Infof("ADS stream context canceled") 506 } 507 return msgReceived 508 } 509 s.sendBuffered(stream) 510 511 resources, url, version, nonce, err := s.recvMessage(stream) 512 if err != nil { 513 s.onError(err, msgReceived) 514 s.logger.Warningf("ADS stream closed: %v", err) 515 return msgReceived 516 } 517 msgReceived = true 518 519 // Invoke the onResponse event handler to parse the incoming message and 520 // decide whether to send an ACK or NACK. 521 resp := Response{ 522 Resources: resources, 523 TypeURL: url, 524 Version: version, 525 } 526 var resourceNames []string 527 var nackErr error 528 s.fc.setPending() 529 resourceNames, nackErr = s.eventHandler.OnADSResponse(resp, s.fc.onDone) 530 if xdsresource.ErrType(nackErr) == xdsresource.ErrorTypeResourceTypeUnsupported { 531 // Based on gRFC A27, a general guiding principle is that if the 532 // server sends something the client didn't actually subscribe to, 533 // then the client ignores it. Here, we have received a response 534 // with resources of a type that we don't know about. 535 // 536 // Sending a NACK doesn't really seem appropriate here, since we're 537 // not actually validating what the server sent and therefore don't 538 // know that it's invalid. But we shouldn't ACK either, because we 539 // don't know that it is valid. 540 s.logger.Warningf("%v", nackErr) 541 continue 542 } 543 544 s.onRecv(stream, resourceNames, url, version, nonce, nackErr) 545 } 546 } 547 548 func (s *StreamImpl) recvMessage(stream transport.StreamingCall) (resources []*anypb.Any, url, version, nonce string, err error) { 549 r, err := stream.Recv() 550 if err != nil { 551 return nil, "", "", "", err 552 } 553 resp, ok := r.(*v3discoverypb.DiscoveryResponse) 554 if !ok { 555 s.logger.Infof("Message received on ADS stream of unexpected type: %T", r) 556 return nil, "", "", "", fmt.Errorf("unexpected message type %T", r) 557 } 558 559 if s.logger.V(perRPCVerbosityLevel) { 560 s.logger.Infof("ADS response received: %v", pretty.ToJSON(resp)) 561 } else if s.logger.V(2) { 562 s.logger.Infof("ADS response received for type %q, version %q, nonce %q", resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce()) 563 } 564 return resp.GetResources(), resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce(), nil 565 } 566 567 // onRecv is invoked when a response is received from the server. The arguments 568 // passed to this method correspond to the most recently received response. 569 // 570 // It performs the following actions: 571 // - updates resource type specific state 572 // - updates resource specific state for resources in the response 573 // - sends an ACK or NACK to the server based on the response 574 func (s *StreamImpl) onRecv(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) { 575 s.mu.Lock() 576 defer s.mu.Unlock() 577 578 // Lookup the resource type specific state based on the type URL. 579 var typ xdsresource.Type 580 for t := range s.resourceTypeState { 581 if t.TypeURL() == url { 582 typ = t 583 break 584 } 585 } 586 typeState, ok := s.resourceTypeState[typ] 587 if !ok { 588 s.logger.Warningf("ADS stream received a response for type %q, but no state exists for it", url) 589 return 590 } 591 592 // Update the resource type specific state. This includes: 593 // - updating the nonce unconditionally 594 // - updating the version only if the response is to be ACKed 595 previousVersion := typeState.version 596 typeState.nonce = nonce 597 if nackErr == nil { 598 typeState.version = version 599 } 600 601 // Update the resource specific state. For all resources received as 602 // part of this response that are in state `started` or `requested`, 603 // this includes: 604 // - setting the watch state to watchstateReceived 605 // - stopping the expiry timer, if one exists 606 for _, name := range names { 607 rs, ok := typeState.subscribedResources[name] 608 if !ok { 609 s.logger.Warningf("ADS stream received a response for resource %q, but no state exists for it", name) 610 continue 611 } 612 if ws := rs.State; ws == ResourceWatchStateStarted || ws == ResourceWatchStateRequested { 613 rs.State = ResourceWatchStateReceived 614 if rs.ExpiryTimer != nil { 615 rs.ExpiryTimer.Stop() 616 rs.ExpiryTimer = nil 617 } 618 } 619 } 620 621 // Send an ACK or NACK. 622 subscribedResourceNames := resourceNames(typeState.subscribedResources) 623 if nackErr != nil { 624 s.logger.Warningf("Sending NACK for resource type: %q, version: %q, nonce: %q, reason: %v", url, version, nonce, nackErr) 625 s.sendMessageLocked(stream, subscribedResourceNames, url, previousVersion, nonce, nackErr) 626 return 627 } 628 629 if s.logger.V(2) { 630 s.logger.Infof("Sending ACK for resource type: %q, version: %q, nonce: %q", url, version, nonce) 631 } 632 s.sendMessageLocked(stream, subscribedResourceNames, url, version, nonce, nil) 633 } 634 635 // onError is called when an error occurs on the ADS stream. It stops any 636 // outstanding resource timers and resets the watch state to started for any 637 // resources that were in the requested state. It also handles the case where 638 // the ADS stream was closed after receiving a response, which is not 639 // considered an error. 640 func (s *StreamImpl) onError(err error, msgReceived bool) { 641 // For resources that been requested but not yet responded to by the 642 // management server, stop the resource timers and reset the watch state to 643 // watchStateStarted. This is because we don't want the expiry timer to be 644 // running when we don't have a stream open to the management server. 645 s.mu.Lock() 646 for _, state := range s.resourceTypeState { 647 for _, rs := range state.subscribedResources { 648 if rs.State != ResourceWatchStateRequested { 649 continue 650 } 651 if rs.ExpiryTimer != nil { 652 rs.ExpiryTimer.Stop() 653 rs.ExpiryTimer = nil 654 } 655 rs.State = ResourceWatchStateStarted 656 } 657 } 658 s.mu.Unlock() 659 660 // Note that we do not consider it an error if the ADS stream was closed 661 // after having received a response on the stream. This is because there 662 // are legitimate reasons why the server may need to close the stream during 663 // normal operations, such as needing to rebalance load or the underlying 664 // connection hitting its max connection age limit. 665 // (see [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md)). 666 if msgReceived { 667 err = xdsresource.NewError(xdsresource.ErrTypeStreamFailedAfterRecv, err.Error()) 668 } 669 670 s.eventHandler.OnADSStreamError(err) 671 } 672 673 // startWatchTimersLocked starts the expiry timers for the given resource names 674 // of the specified resource type. For each resource name, if the resource 675 // watch state is in the "started" state, it transitions the state to 676 // "requested" and starts an expiry timer. When the timer expires, the resource 677 // watch state is set to "timeout" and the event handler callback is called. 678 // 679 // The caller must hold the s.mu lock. 680 func (s *StreamImpl) startWatchTimersLocked(typ xdsresource.Type, names []string) { 681 typeState := s.resourceTypeState[typ] 682 for _, name := range names { 683 resourceState, ok := typeState.subscribedResources[name] 684 if !ok { 685 continue 686 } 687 if resourceState.State != ResourceWatchStateStarted { 688 continue 689 } 690 resourceState.State = ResourceWatchStateRequested 691 692 rs := resourceState 693 resourceState.ExpiryTimer = time.AfterFunc(s.watchExpiryTimeout, func() { 694 s.mu.Lock() 695 rs.State = ResourceWatchStateTimeout 696 rs.ExpiryTimer = nil 697 s.mu.Unlock() 698 s.eventHandler.OnADSWatchExpiry(typ, name) 699 }) 700 } 701 } 702 703 func resourceNames(m map[string]*ResourceWatchState) []string { 704 ret := make([]string, len(m)) 705 idx := 0 706 for name := range m { 707 ret[idx] = name 708 idx++ 709 } 710 return ret 711 } 712 713 // TriggerResourceNotFoundForTesting triggers a resource not found event for the 714 // given resource type and name. This is intended for testing purposes only, to 715 // simulate a resource not found scenario. 716 func (s *StreamImpl) TriggerResourceNotFoundForTesting(typ xdsresource.Type, resourceName string) { 717 s.mu.Lock() 718 719 state, ok := s.resourceTypeState[typ] 720 if !ok { 721 s.mu.Unlock() 722 return 723 } 724 resourceState, ok := state.subscribedResources[resourceName] 725 if !ok { 726 s.mu.Unlock() 727 return 728 } 729 730 if s.logger.V(2) { 731 s.logger.Infof("Triggering resource not found for type: %s, resource name: %s", typ.TypeName(), resourceName) 732 } 733 resourceState.State = ResourceWatchStateTimeout 734 if resourceState.ExpiryTimer != nil { 735 resourceState.ExpiryTimer.Stop() 736 resourceState.ExpiryTimer = nil 737 } 738 s.mu.Unlock() 739 go s.eventHandler.OnADSWatchExpiry(typ, resourceName) 740 } 741 742 // ResourceWatchStateForTesting returns the ResourceWatchState for the given 743 // resource type and name. This is intended for testing purposes only, to 744 // inspect the internal state of the ADS stream. 745 func (s *StreamImpl) ResourceWatchStateForTesting(typ xdsresource.Type, resourceName string) (ResourceWatchState, error) { 746 s.mu.Lock() 747 defer s.mu.Unlock() 748 749 state, ok := s.resourceTypeState[typ] 750 if !ok { 751 return ResourceWatchState{}, fmt.Errorf("unknown resource type: %v", typ) 752 } 753 resourceState, ok := state.subscribedResources[resourceName] 754 if !ok { 755 return ResourceWatchState{}, fmt.Errorf("unknown resource name: %v", resourceName) 756 } 757 return *resourceState, nil 758 } 759 760 // adsFlowControl implements ADS stream level flow control that enables the 761 // transport to block the reading of the next message off of the stream until 762 // the previous update is consumed by all watchers. 763 // 764 // The lifetime of the flow control is tied to the lifetime of the stream. 765 type adsFlowControl struct { 766 logger *igrpclog.PrefixLogger 767 768 // Whether the most recent update is pending consumption by all watchers. 769 pending atomic.Bool 770 // Channel used to notify when all the watchers have consumed the most 771 // recent update. Wait() blocks on reading a value from this channel. 772 readyCh chan struct{} 773 } 774 775 // newADSFlowControl returns a new adsFlowControl. 776 func newADSFlowControl(logger *igrpclog.PrefixLogger) *adsFlowControl { 777 return &adsFlowControl{ 778 logger: logger, 779 readyCh: make(chan struct{}, 1), 780 } 781 } 782 783 // setPending changes the internal state to indicate that there is an update 784 // pending consumption by all watchers. 785 func (fc *adsFlowControl) setPending() { 786 fc.pending.Store(true) 787 } 788 789 // wait blocks until all the watchers have consumed the most recent update and 790 // returns true. If the context expires before that, it returns false. 791 func (fc *adsFlowControl) wait(ctx context.Context) bool { 792 // If there is no pending update, there is no need to block. 793 if !fc.pending.Load() { 794 // If all watchers finished processing the most recent update before the 795 // `recv` goroutine made the next call to `Wait()`, there would be an 796 // entry in the readyCh channel that needs to be drained to ensure that 797 // the next call to `Wait()` doesn't unblock before it actually should. 798 select { 799 case <-fc.readyCh: 800 default: 801 } 802 return true 803 } 804 805 select { 806 case <-ctx.Done(): 807 return false 808 case <-fc.readyCh: 809 return true 810 } 811 } 812 813 // onDone indicates that all watchers have consumed the most recent update. 814 func (fc *adsFlowControl) onDone() { 815 select { 816 // Writes to the readyCh channel should not block ideally. The default 817 // branch here is to appease the paranoid mind. 818 case fc.readyCh <- struct{}{}: 819 default: 820 if fc.logger.V(2) { 821 fc.logger.Infof("ADS stream flow control readyCh is full") 822 } 823 } 824 fc.pending.Store(false) 825 }