google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/ads_stream.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient 20 21 import ( 22 "context" 23 "fmt" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "google.golang.org/grpc/grpclog" 29 igrpclog "google.golang.org/grpc/internal/grpclog" 30 "google.golang.org/grpc/xds/internal/clients" 31 "google.golang.org/grpc/xds/internal/clients/internal/backoff" 32 "google.golang.org/grpc/xds/internal/clients/internal/buffer" 33 "google.golang.org/grpc/xds/internal/clients/internal/pretty" 34 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 35 36 "google.golang.org/protobuf/proto" 37 "google.golang.org/protobuf/types/known/anypb" 38 39 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 40 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 41 cpb "google.golang.org/genproto/googleapis/rpc/code" 42 statuspb "google.golang.org/genproto/googleapis/rpc/status" 43 ) 44 45 const ( 46 // Any per-RPC level logs which print complete request or response messages 47 // should be gated at this verbosity level. Other per-RPC level logs which print 48 // terse output should be at `INFO` and verbosity 2. 49 perRPCVerbosityLevel = 9 50 ) 51 52 // response represents a response received on the ADS stream. It contains the 53 // type URL, version, and resources for the response. 54 type response struct { 55 typeURL string 56 version string 57 resources []*anypb.Any 58 } 59 60 // dataAndErrTuple is a struct that holds a resource and an error. It is used to 61 // return a resource and any associated error from a function. 62 type dataAndErrTuple struct { 63 Resource ResourceData 64 Err error 65 } 66 67 // adsStreamEventHandler is an interface that defines the callbacks for events that 68 // occur on the ADS stream. Methods on this interface may be invoked 69 // concurrently and implementations need to handle them in a thread-safe manner. 70 type adsStreamEventHandler interface { 71 onStreamError(error) // Called when the ADS stream breaks. 72 onWatchExpiry(ResourceType, string) // Called when the watch timer expires for a resource. 73 onResponse(response, func()) ([]string, error) // Called when a response is received on the ADS stream. 74 } 75 76 // state corresponding to a resource type. 77 type resourceTypeState struct { 78 version string // Last acked version. Should not be reset when the stream breaks. 79 nonce string // Last received nonce. Should be reset when the stream breaks. 80 bufferedRequests chan struct{} // Channel to buffer requests when writing is blocked. 81 subscribedResources map[string]*xdsresource.ResourceWatchState // Map of subscribed resource names to their state. 82 pendingWrite bool // True if there is a pending write for this resource type. 83 } 84 85 // adsStreamImpl provides the functionality associated with an ADS (Aggregated 86 // Discovery Service) stream on the client side. It manages the lifecycle of the 87 // ADS stream, including creating the stream, sending requests, and handling 88 // responses. It also handles flow control and retries for the stream. 89 type adsStreamImpl struct { 90 // The following fields are initialized from arguments passed to the 91 // constructor and are read-only afterwards, and hence can be accessed 92 // without a mutex. 93 transport clients.Transport // Transport to use for ADS stream. 94 eventHandler adsStreamEventHandler // Callbacks into the xdsChannel. 95 backoff func(int) time.Duration // Backoff for retries, after stream failures. 96 nodeProto *v3corepb.Node // Identifies the gRPC application. 97 watchExpiryTimeout time.Duration // Resource watch expiry timeout 98 logger *igrpclog.PrefixLogger 99 100 // The following fields are initialized in the constructor and are not 101 // written to afterwards, and hence can be accessed without a mutex. 102 streamCh chan clients.Stream // New ADS streams are pushed here. 103 requestCh *buffer.Unbounded // Subscriptions and unsubscriptions are pushed here. 104 runnerDoneCh chan struct{} // Notify completion of runner goroutine. 105 cancel context.CancelFunc // To cancel the context passed to the runner goroutine. 106 107 // Guards access to the below fields (and to the contents of the map). 108 mu sync.Mutex 109 resourceTypeState map[ResourceType]*resourceTypeState // Map of resource types to their state. 110 fc *adsFlowControl // Flow control for ADS stream. 111 firstRequest bool // False after the first request is sent out. 112 } 113 114 // adsStreamOpts contains the options for creating a new ADS Stream. 115 type adsStreamOpts struct { 116 transport clients.Transport // xDS transport to create the stream on. 117 eventHandler adsStreamEventHandler // Callbacks for stream events. 118 backoff func(int) time.Duration // Backoff for retries, after stream failures. 119 nodeProto *v3corepb.Node // Node proto to identify the gRPC application. 120 watchExpiryTimeout time.Duration // Resource watch expiry timeout. 121 logPrefix string // Prefix to be used for log messages. 122 } 123 124 // newADSStreamImpl initializes a new adsStreamImpl instance using the given 125 // parameters. It also launches goroutines responsible for managing reads and 126 // writes for messages of the underlying stream. 127 func newADSStreamImpl(opts adsStreamOpts) *adsStreamImpl { 128 s := &adsStreamImpl{ 129 transport: opts.transport, 130 eventHandler: opts.eventHandler, 131 backoff: opts.backoff, 132 nodeProto: opts.nodeProto, 133 watchExpiryTimeout: opts.watchExpiryTimeout, 134 135 streamCh: make(chan clients.Stream, 1), 136 requestCh: buffer.NewUnbounded(), 137 runnerDoneCh: make(chan struct{}), 138 resourceTypeState: make(map[ResourceType]*resourceTypeState), 139 } 140 141 l := grpclog.Component("xds") 142 s.logger = igrpclog.NewPrefixLogger(l, opts.logPrefix+fmt.Sprintf("[ads-stream %p] ", s)) 143 144 ctx, cancel := context.WithCancel(context.Background()) 145 s.cancel = cancel 146 go s.runner(ctx) 147 return s 148 } 149 150 // Stop blocks until the stream is closed and all spawned goroutines exit. 151 func (s *adsStreamImpl) Stop() { 152 s.cancel() 153 s.requestCh.Close() 154 <-s.runnerDoneCh 155 s.logger.Infof("Shutdown ADS stream") 156 } 157 158 // subscribe subscribes to the given resource. It is assumed that multiple 159 // subscriptions for the same resource is deduped at the caller. A discovery 160 // request is sent out on the underlying stream for the resource type when there 161 // is sufficient flow control quota. 162 func (s *adsStreamImpl) subscribe(typ ResourceType, name string) { 163 if s.logger.V(2) { 164 s.logger.Infof("Subscribing to resource %q of type %q", name, typ.TypeName) 165 } 166 167 s.mu.Lock() 168 defer s.mu.Unlock() 169 170 state, ok := s.resourceTypeState[typ] 171 if !ok { 172 // An entry in the type state map is created as part of the first 173 // subscription request for this type. 174 state = &resourceTypeState{ 175 subscribedResources: make(map[string]*xdsresource.ResourceWatchState), 176 bufferedRequests: make(chan struct{}, 1), 177 } 178 s.resourceTypeState[typ] = state 179 } 180 181 // Create state for the newly subscribed resource. The watch timer will 182 // be started when a request for this resource is actually sent out. 183 state.subscribedResources[name] = &xdsresource.ResourceWatchState{State: xdsresource.ResourceWatchStateStarted} 184 state.pendingWrite = true 185 186 // Send a request for the resource type with updated subscriptions. 187 s.requestCh.Put(typ) 188 } 189 190 // Unsubscribe cancels the subscription to the given resource. It is a no-op if 191 // the given resource does not exist. The watch expiry timer associated with the 192 // resource is stopped if one is active. A discovery request is sent out on the 193 // stream for the resource type when there is sufficient flow control quota. 194 func (s *adsStreamImpl) Unsubscribe(typ ResourceType, name string) { 195 if s.logger.V(2) { 196 s.logger.Infof("Unsubscribing to resource %q of type %q", name, typ.TypeName) 197 } 198 199 s.mu.Lock() 200 defer s.mu.Unlock() 201 202 state, ok := s.resourceTypeState[typ] 203 if !ok { 204 return 205 } 206 207 rs, ok := state.subscribedResources[name] 208 if !ok { 209 return 210 } 211 if rs.ExpiryTimer != nil { 212 rs.ExpiryTimer.Stop() 213 } 214 delete(state.subscribedResources, name) 215 state.pendingWrite = true 216 217 // Send a request for the resource type with updated subscriptions. 218 s.requestCh.Put(typ) 219 } 220 221 // runner is a long-running goroutine that handles the lifecycle of the ADS 222 // stream. It spwans another goroutine to handle writes of discovery request 223 // messages on the stream. Whenever an existing stream fails, it performs 224 // exponential backoff (if no messages were received on that stream) before 225 // creating a new stream. 226 func (s *adsStreamImpl) runner(ctx context.Context) { 227 defer close(s.runnerDoneCh) 228 229 go s.send(ctx) 230 231 runStreamWithBackoff := func() error { 232 stream, err := s.transport.NewStream(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources") 233 if err != nil { 234 s.logger.Warningf("Failed to create a new ADS streaming RPC: %v", err) 235 s.onError(err, false) 236 return nil 237 } 238 if s.logger.V(2) { 239 s.logger.Infof("ADS stream created") 240 } 241 242 s.mu.Lock() 243 // Flow control is a property of the underlying streaming RPC call and 244 // needs to be initialized everytime a new one is created. 245 s.fc = newADSFlowControl(s.logger) 246 s.firstRequest = true 247 s.mu.Unlock() 248 249 // Ensure that the most recently created stream is pushed on the 250 // channel for the `send` goroutine to consume. 251 select { 252 case <-s.streamCh: 253 default: 254 } 255 s.streamCh <- stream 256 257 // Backoff state is reset upon successful receipt of at least one 258 // message from the server. 259 if s.recv(ctx, stream) { 260 return backoff.ErrResetBackoff 261 } 262 return nil 263 } 264 backoff.RunF(ctx, runStreamWithBackoff, s.backoff) 265 } 266 267 // send is a long running goroutine that handles sending discovery requests for 268 // two scenarios: 269 // - a new subscription or unsubscription request is received 270 // - a new stream is created after the previous one failed 271 func (s *adsStreamImpl) send(ctx context.Context) { 272 // Stores the most recent stream instance received on streamCh. 273 var stream clients.Stream 274 for { 275 select { 276 case <-ctx.Done(): 277 return 278 case stream = <-s.streamCh: 279 if err := s.sendExisting(stream); err != nil { 280 // Send failed, clear the current stream. Attempt to resend will 281 // only be made after a new stream is created. 282 stream = nil 283 continue 284 } 285 case req, ok := <-s.requestCh.Get(): 286 if !ok { 287 return 288 } 289 s.requestCh.Load() 290 291 typ := req.(ResourceType) 292 if err := s.sendNew(stream, typ); err != nil { 293 stream = nil 294 continue 295 } 296 } 297 } 298 } 299 300 // sendNew attempts to send a discovery request based on a new subscription or 301 // unsubscription. If there is no flow control quota, the request is buffered 302 // and will be sent later. This method also starts the watch expiry timer for 303 // resources that were sent in the request for the first time, i.e. their watch 304 // state is `watchStateStarted`. 305 func (s *adsStreamImpl) sendNew(stream clients.Stream, typ ResourceType) error { 306 s.mu.Lock() 307 defer s.mu.Unlock() 308 309 // If there's no stream yet, skip the request. This request will be resent 310 // when a new stream is created. If no stream is created, the watcher will 311 // timeout (same as server not sending response back). 312 if stream == nil { 313 return nil 314 } 315 316 // If local processing of the most recently received response is not yet 317 // complete, i.e. fc.pending == true, queue this write and return early. 318 // This allows us to batch writes for requests which are generated as part 319 // of local processing of a received response. 320 state := s.resourceTypeState[typ] 321 if s.fc.pending.Load() { 322 select { 323 case state.bufferedRequests <- struct{}{}: 324 default: 325 } 326 return nil 327 } 328 329 return s.sendMessageIfWritePendingLocked(stream, typ, state) 330 } 331 332 // sendExisting sends out discovery requests for existing resources when 333 // recovering from a broken stream. 334 // 335 // The stream argument is guaranteed to be non-nil. 336 func (s *adsStreamImpl) sendExisting(stream clients.Stream) error { 337 s.mu.Lock() 338 defer s.mu.Unlock() 339 340 for typ, state := range s.resourceTypeState { 341 // Reset only the nonces map when the stream restarts. 342 // 343 // xDS spec says the following. See section: 344 // https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version 345 // 346 // Note that the version for a resource type is not a property of an 347 // individual xDS stream but rather a property of the resources 348 // themselves. If the stream becomes broken and the client creates a new 349 // stream, the client’s initial request on the new stream should 350 // indicate the most recent version seen by the client on the previous 351 // stream 352 state.nonce = "" 353 354 if len(state.subscribedResources) == 0 { 355 continue 356 } 357 358 state.pendingWrite = true 359 if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { 360 return err 361 } 362 } 363 return nil 364 } 365 366 // sendBuffered sends out discovery requests for resources that were buffered 367 // when they were subscribed to, because local processing of the previously 368 // received response was not yet complete. 369 // 370 // The stream argument is guaranteed to be non-nil. 371 func (s *adsStreamImpl) sendBuffered(stream clients.Stream) error { 372 s.mu.Lock() 373 defer s.mu.Unlock() 374 375 for typ, state := range s.resourceTypeState { 376 select { 377 case <-state.bufferedRequests: 378 if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { 379 return err 380 } 381 default: 382 // No buffered request. 383 continue 384 } 385 } 386 return nil 387 } 388 389 // sendMessageIfWritePendingLocked attempts to sends a discovery request to the 390 // server, if there is a pending write for the given resource type. 391 // 392 // If the request is successfully sent, the pending write field is cleared and 393 // watch timers are started for the resources in the request. 394 // 395 // Caller needs to hold c.mu. 396 func (s *adsStreamImpl) sendMessageIfWritePendingLocked(stream clients.Stream, typ ResourceType, state *resourceTypeState) error { 397 if !state.pendingWrite { 398 if s.logger.V(2) { 399 s.logger.Infof("Skipping sending request for type %q, because all subscribed resources were already sent", typ.TypeURL) 400 } 401 return nil 402 } 403 404 names := resourceNames(state.subscribedResources) 405 if err := s.sendMessageLocked(stream, names, typ.TypeURL, state.version, state.nonce, nil); err != nil { 406 return err 407 } 408 state.pendingWrite = false 409 410 // Drain the buffered requests channel because we just sent a request for this 411 // resource type. 412 select { 413 case <-state.bufferedRequests: 414 default: 415 } 416 417 s.startWatchTimersLocked(typ, names) 418 return nil 419 } 420 421 // sendMessageLocked sends a discovery request to the server, populating the 422 // different fields of the message with the given parameters. Returns a non-nil 423 // error if the request could not be sent. 424 // 425 // Caller needs to hold c.mu. 426 func (s *adsStreamImpl) sendMessageLocked(stream clients.Stream, names []string, url, version, nonce string, nackErr error) error { 427 req := &v3discoverypb.DiscoveryRequest{ 428 ResourceNames: names, 429 TypeUrl: url, 430 VersionInfo: version, 431 ResponseNonce: nonce, 432 } 433 434 // The xDS protocol only requires that we send the node proto in the first 435 // discovery request on every stream. Sending the node proto in every 436 // request wastes CPU resources on the client and the server. 437 if s.firstRequest { 438 req.Node = s.nodeProto 439 } 440 441 if nackErr != nil { 442 req.ErrorDetail = &statuspb.Status{ 443 Code: int32(cpb.Code_INVALID_ARGUMENT), Message: nackErr.Error(), 444 } 445 } 446 447 msg, err := proto.Marshal(req) 448 if err != nil { 449 s.logger.Warningf("Failed to marshal DiscoveryRequest: %v", err) 450 return err 451 } 452 if err := stream.Send(msg); err != nil { 453 s.logger.Warningf("Sending ADS request for type %q, resources: %v, version: %q, nonce: %q failed: %v", url, names, version, nonce, err) 454 return err 455 } 456 s.firstRequest = false 457 458 if s.logger.V(perRPCVerbosityLevel) { 459 s.logger.Infof("ADS request sent: %v", pretty.ToJSON(req)) 460 } else if s.logger.V(2) { 461 s.logger.Warningf("ADS request sent for type %q, resources: %v, version: %q, nonce: %q", url, names, version, nonce) 462 } 463 return nil 464 } 465 466 // recv is responsible for receiving messages from the ADS stream. 467 // 468 // It performs the following actions: 469 // - Waits for local flow control to be available before sending buffered 470 // requests, if any. 471 // - Receives a message from the ADS stream. If an error is encountered here, 472 // it is handled by the onError method which propagates the error to all 473 // watchers. 474 // - Invokes the event handler's OnADSResponse method to process the message. 475 // - Sends an ACK or NACK to the server based on the response. 476 // 477 // It returns a boolean indicating whether at least one message was received 478 // from the server. 479 func (s *adsStreamImpl) recv(ctx context.Context, stream clients.Stream) bool { 480 msgReceived := false 481 for { 482 // Wait for ADS stream level flow control to be available, and send out 483 // a request if anything was buffered while we were waiting for local 484 // processing of the previous response to complete. 485 if !s.fc.wait(ctx) { 486 if s.logger.V(2) { 487 s.logger.Infof("ADS stream context canceled") 488 } 489 return msgReceived 490 } 491 s.sendBuffered(stream) 492 493 resources, url, version, nonce, err := s.recvMessage(stream) 494 if err != nil { 495 s.onError(err, msgReceived) 496 s.logger.Warningf("ADS stream closed: %v", err) 497 return msgReceived 498 } 499 msgReceived = true 500 501 // Invoke the onResponse event handler to parse the incoming message and 502 // decide whether to send an ACK or NACK. 503 resp := response{ 504 resources: resources, 505 typeURL: url, 506 version: version, 507 } 508 var resourceNames []string 509 var nackErr error 510 s.fc.setPending() 511 resourceNames, nackErr = s.eventHandler.onResponse(resp, s.fc.onDone) 512 if xdsresource.ErrType(nackErr) == xdsresource.ErrorTypeResourceTypeUnsupported { 513 // A general guiding principle is that if the server sends 514 // something the client didn't actually subscribe to, then the 515 // client ignores it. Here, we have received a response with 516 // resources of a type that we don't know about. 517 // 518 // Sending a NACK doesn't really seem appropriate here, since we're 519 // not actually validating what the server sent and therefore don't 520 // know that it's invalid. But we shouldn't ACK either, because we 521 // don't know that it is valid. 522 s.logger.Warningf("%v", nackErr) 523 continue 524 } 525 526 s.onRecv(stream, resourceNames, url, version, nonce, nackErr) 527 } 528 } 529 530 func (s *adsStreamImpl) recvMessage(stream clients.Stream) (resources []*anypb.Any, url, version, nonce string, err error) { 531 r, err := stream.Recv() 532 if err != nil { 533 return nil, "", "", "", err 534 } 535 var resp v3discoverypb.DiscoveryResponse 536 if err := proto.Unmarshal(r, &resp); err != nil { 537 s.logger.Infof("Failed to unmarshal response to DiscoveryResponse: %v", err) 538 return nil, "", "", "", fmt.Errorf("unexpected message type %T", r) 539 } 540 if s.logger.V(perRPCVerbosityLevel) { 541 s.logger.Infof("ADS response received: %v", pretty.ToJSON(&resp)) 542 } else if s.logger.V(2) { 543 s.logger.Infof("ADS response received for type %q, version %q, nonce %q", resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce()) 544 } 545 return resp.GetResources(), resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce(), nil 546 } 547 548 // onRecv is invoked when a response is received from the server. The arguments 549 // passed to this method correspond to the most recently received response. 550 // 551 // It performs the following actions: 552 // - updates resource type specific state 553 // - updates resource specific state for resources in the response 554 // - sends an ACK or NACK to the server based on the response 555 func (s *adsStreamImpl) onRecv(stream clients.Stream, names []string, url, version, nonce string, nackErr error) { 556 s.mu.Lock() 557 defer s.mu.Unlock() 558 559 // Lookup the resource type specific state based on the type URL. 560 var typ ResourceType 561 for t := range s.resourceTypeState { 562 if t.TypeURL == url { 563 typ = t 564 break 565 } 566 } 567 typeState, ok := s.resourceTypeState[typ] 568 if !ok { 569 s.logger.Warningf("ADS stream received a response for type %q, but no state exists for it", url) 570 return 571 } 572 573 // Update the resource type specific state. This includes: 574 // - updating the nonce unconditionally 575 // - updating the version only if the response is to be ACKed 576 previousVersion := typeState.version 577 typeState.nonce = nonce 578 if nackErr == nil { 579 typeState.version = version 580 } 581 582 // Update the resource specific state. For all resources received as 583 // part of this response that are in state `started` or `requested`, 584 // this includes: 585 // - setting the watch state to watchstateReceived 586 // - stopping the expiry timer, if one exists 587 for _, name := range names { 588 rs, ok := typeState.subscribedResources[name] 589 if !ok { 590 s.logger.Warningf("ADS stream received a response for resource %q, but no state exists for it", name) 591 continue 592 } 593 if ws := rs.State; ws == xdsresource.ResourceWatchStateStarted || ws == xdsresource.ResourceWatchStateRequested { 594 rs.State = xdsresource.ResourceWatchStateReceived 595 if rs.ExpiryTimer != nil { 596 rs.ExpiryTimer.Stop() 597 rs.ExpiryTimer = nil 598 } 599 } 600 } 601 602 // Send an ACK or NACK. 603 subscribedResourceNames := resourceNames(typeState.subscribedResources) 604 if nackErr != nil { 605 s.logger.Warningf("Sending NACK for resource type: %q, version: %q, nonce: %q, reason: %v", url, version, nonce, nackErr) 606 s.sendMessageLocked(stream, subscribedResourceNames, url, previousVersion, nonce, nackErr) 607 return 608 } 609 610 if s.logger.V(2) { 611 s.logger.Infof("Sending ACK for resource type: %q, version: %q, nonce: %q", url, version, nonce) 612 } 613 s.sendMessageLocked(stream, subscribedResourceNames, url, version, nonce, nil) 614 } 615 616 // onError is called when an error occurs on the ADS stream. It stops any 617 // outstanding resource timers and resets the watch state to started for any 618 // resources that were in the requested state. It also handles the case where 619 // the ADS stream was closed after receiving a response, which is not 620 // considered an error. 621 func (s *adsStreamImpl) onError(err error, msgReceived bool) { 622 // For resources that been requested but not yet responded to by the 623 // management server, stop the resource timers and reset the watch state to 624 // watchStateStarted. This is because we don't want the expiry timer to be 625 // running when we don't have a stream open to the management server. 626 s.mu.Lock() 627 for _, state := range s.resourceTypeState { 628 for _, rs := range state.subscribedResources { 629 if rs.State != xdsresource.ResourceWatchStateRequested { 630 continue 631 } 632 if rs.ExpiryTimer != nil { 633 rs.ExpiryTimer.Stop() 634 rs.ExpiryTimer = nil 635 } 636 rs.State = xdsresource.ResourceWatchStateStarted 637 } 638 } 639 s.mu.Unlock() 640 641 // Note that we do not consider it an error if the ADS stream was closed 642 // after having received a response on the stream. This is because there 643 // are legitimate reasons why the server may need to close the stream during 644 // normal operations, such as needing to rebalance load or the underlying 645 // connection hitting its max connection age limit. 646 // (see [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md)). 647 if msgReceived { 648 err = xdsresource.NewError(xdsresource.ErrTypeStreamFailedAfterRecv, err.Error()) 649 } 650 651 s.eventHandler.onStreamError(err) 652 } 653 654 // startWatchTimersLocked starts the expiry timers for the given resource names 655 // of the specified resource type. For each resource name, if the resource 656 // watch state is in the "started" state, it transitions the state to 657 // "requested" and starts an expiry timer. When the timer expires, the resource 658 // watch state is set to "timeout" and the event handler callback is called. 659 // 660 // The caller must hold the s.mu lock. 661 func (s *adsStreamImpl) startWatchTimersLocked(typ ResourceType, names []string) { 662 typeState := s.resourceTypeState[typ] 663 for _, name := range names { 664 resourceState, ok := typeState.subscribedResources[name] 665 if !ok { 666 continue 667 } 668 if resourceState.State != xdsresource.ResourceWatchStateStarted { 669 continue 670 } 671 resourceState.State = xdsresource.ResourceWatchStateRequested 672 673 rs := resourceState 674 resourceState.ExpiryTimer = time.AfterFunc(s.watchExpiryTimeout, func() { 675 s.mu.Lock() 676 rs.State = xdsresource.ResourceWatchStateTimeout 677 rs.ExpiryTimer = nil 678 s.mu.Unlock() 679 s.eventHandler.onWatchExpiry(typ, name) 680 }) 681 } 682 } 683 684 func (s *adsStreamImpl) adsResourceWatchStateForTesting(rType ResourceType, resourceName string) (xdsresource.ResourceWatchState, error) { 685 s.mu.Lock() 686 defer s.mu.Unlock() 687 688 state, ok := s.resourceTypeState[rType] 689 if !ok { 690 return xdsresource.ResourceWatchState{}, fmt.Errorf("unknown resource type: %v", rType) 691 } 692 resourceState, ok := state.subscribedResources[resourceName] 693 if !ok { 694 return xdsresource.ResourceWatchState{}, fmt.Errorf("unknown resource name: %v", resourceName) 695 } 696 return *resourceState, nil 697 } 698 699 func resourceNames(m map[string]*xdsresource.ResourceWatchState) []string { 700 ret := make([]string, len(m)) 701 idx := 0 702 for name := range m { 703 ret[idx] = name 704 idx++ 705 } 706 return ret 707 } 708 709 // adsFlowControl implements ADS stream level flow control that enables the 710 // transport to block the reading of the next message off of the stream until 711 // the previous update is consumed by all watchers. 712 // 713 // The lifetime of the flow control is tied to the lifetime of the stream. 714 type adsFlowControl struct { 715 logger *igrpclog.PrefixLogger 716 717 // Whether the most recent update is pending consumption by all watchers. 718 pending atomic.Bool 719 // Channel used to notify when all the watchers have consumed the most 720 // recent update. Wait() blocks on reading a value from this channel. 721 readyCh chan struct{} 722 } 723 724 // newADSFlowControl returns a new adsFlowControl. 725 func newADSFlowControl(logger *igrpclog.PrefixLogger) *adsFlowControl { 726 return &adsFlowControl{ 727 logger: logger, 728 readyCh: make(chan struct{}, 1), 729 } 730 } 731 732 // setPending changes the internal state to indicate that there is an update 733 // pending consumption by all watchers. 734 func (fc *adsFlowControl) setPending() { 735 fc.pending.Store(true) 736 } 737 738 // wait blocks until all the watchers have consumed the most recent update and 739 // returns true. If the context expires before that, it returns false. 740 func (fc *adsFlowControl) wait(ctx context.Context) bool { 741 // If there is no pending update, there is no need to block. 742 if !fc.pending.Load() { 743 // If all watchers finished processing the most recent update before the 744 // `recv` goroutine made the next call to `Wait()`, there would be an 745 // entry in the readyCh channel that needs to be drained to ensure that 746 // the next call to `Wait()` doesn't unblock before it actually should. 747 select { 748 case <-fc.readyCh: 749 default: 750 } 751 return true 752 } 753 754 select { 755 case <-ctx.Done(): 756 return false 757 case <-fc.readyCh: 758 return true 759 } 760 } 761 762 // onDone indicates that all watchers have consumed the most recent update. 763 func (fc *adsFlowControl) onDone() { 764 select { 765 // Writes to the readyCh channel should not block ideally. The default 766 // branch here is to appease the paranoid mind. 767 case fc.readyCh <- struct{}{}: 768 default: 769 if fc.logger.V(2) { 770 fc.logger.Infof("ADS stream flow control readyCh is full") 771 } 772 } 773 fc.pending.Store(false) 774 }