google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/test/ads_stream_flow_control_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient_test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "slices" 26 "sort" 27 "testing" 28 "time" 29 30 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 31 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 32 "github.com/google/uuid" 33 "google.golang.org/grpc" 34 "google.golang.org/grpc/credentials/insecure" 35 "google.golang.org/grpc/internal/testutils/xds/e2e" 36 "google.golang.org/grpc/xds/internal/clients" 37 "google.golang.org/grpc/xds/internal/clients/xdsclient" 38 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 39 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 40 ) 41 42 // blockingListenerWatcher implements xdsresource.ListenerWatcher. It writes to 43 // a channel when it receives a callback from the watch. It also makes the 44 // DoneNotifier passed to the callback available to the test, thereby enabling 45 // the test to block this watcher for as long as required. 46 type blockingListenerWatcher struct { 47 doneNotifierCh chan func() // DoneNotifier passed to the callback. 48 updateCh chan struct{} // Written to when an update is received. 49 ambientErrCh chan struct{} // Written to when an ambient error is received. 50 resourceErrCh chan struct{} // Written to when a resource error is received. 51 } 52 53 func newBLockingListenerWatcher() *blockingListenerWatcher { 54 return &blockingListenerWatcher{ 55 doneNotifierCh: make(chan func(), 1), 56 updateCh: make(chan struct{}, 1), 57 ambientErrCh: make(chan struct{}, 1), 58 resourceErrCh: make(chan struct{}, 1), 59 } 60 } 61 62 func (lw *blockingListenerWatcher) ResourceChanged(_ xdsclient.ResourceData, done func()) { 63 // Notify receipt of the update. 64 select { 65 case lw.updateCh <- struct{}{}: 66 default: 67 } 68 69 select { 70 case lw.doneNotifierCh <- done: 71 default: 72 } 73 } 74 75 func (lw *blockingListenerWatcher) ResourceError(_ error, done func()) { 76 // Notify receipt of an error. 77 select { 78 case lw.resourceErrCh <- struct{}{}: 79 default: 80 } 81 82 select { 83 case lw.doneNotifierCh <- done: 84 default: 85 } 86 } 87 88 func (lw *blockingListenerWatcher) AmbientError(_ error, done func()) { 89 // Notify receipt of an error. 90 select { 91 case lw.ambientErrCh <- struct{}{}: 92 default: 93 } 94 95 select { 96 case lw.doneNotifierCh <- done: 97 default: 98 } 99 } 100 101 type transportBuilder struct { 102 adsStreamCh chan *stream 103 } 104 105 func (b *transportBuilder) Build(si clients.ServerIdentifier) (clients.Transport, error) { 106 cc, err := grpc.NewClient(si.ServerURI, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultCallOptions(grpc.ForceCodec(&byteCodec{}))) 107 if err != nil { 108 return nil, err 109 } 110 111 return &transport{cc: cc, adsStreamCh: b.adsStreamCh}, nil 112 } 113 114 type transport struct { 115 cc *grpc.ClientConn 116 adsStreamCh chan *stream 117 } 118 119 func (t *transport) NewStream(ctx context.Context, method string) (clients.Stream, error) { 120 s, err := t.cc.NewStream(ctx, &grpc.StreamDesc{ClientStreams: true, ServerStreams: true}, method) 121 if err != nil { 122 return nil, err 123 } 124 125 stream := &stream{ 126 stream: s, 127 recvCh: make(chan struct{}, 1), 128 doneCh: make(chan struct{}), 129 } 130 t.adsStreamCh <- stream 131 132 return stream, nil 133 } 134 135 func (t *transport) Close() { 136 t.cc.Close() 137 } 138 139 type stream struct { 140 stream grpc.ClientStream 141 142 recvCh chan struct{} 143 doneCh <-chan struct{} 144 } 145 146 func (s *stream) Send(msg []byte) error { 147 return s.stream.SendMsg(msg) 148 } 149 150 func (s *stream) Recv() ([]byte, error) { 151 select { 152 case s.recvCh <- struct{}{}: 153 case <-s.doneCh: 154 return nil, errors.New("Recv() called after the test has finished") 155 } 156 157 var typedRes []byte 158 if err := s.stream.RecvMsg(&typedRes); err != nil { 159 return nil, err 160 } 161 return typedRes, nil 162 } 163 164 type byteCodec struct{} 165 166 func (c *byteCodec) Marshal(v any) ([]byte, error) { 167 if b, ok := v.([]byte); ok { 168 return b, nil 169 } 170 return nil, fmt.Errorf("transport: message is %T, but must be a []byte", v) 171 } 172 173 func (c *byteCodec) Unmarshal(data []byte, v any) error { 174 if b, ok := v.(*[]byte); ok { 175 *b = data 176 return nil 177 } 178 return fmt.Errorf("transport: target is %T, but must be *[]byte", v) 179 } 180 181 func (c *byteCodec) Name() string { 182 return "transport.byteCodec" 183 } 184 185 // Tests ADS stream level flow control with a single resource. The test does the 186 // following: 187 // - Starts a management server and configures a listener resource on it. 188 // - Creates an xDS client to the above management server, starts a couple of 189 // listener watchers for the above resource, and verifies that the update 190 // reaches these watchers. 191 // - These watchers don't invoke the onDone callback until explicitly 192 // triggered by the test. This allows the test to verify that the next 193 // Recv() call on the ADS stream does not happen until both watchers have 194 // completely processed the update, i.e invoke the onDone callback. 195 // - Resource is updated on the management server, and the test verifies that 196 // the update reaches the watchers. 197 func (s) TestADSFlowControl_ResourceUpdates_SingleResource(t *testing.T) { 198 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 199 defer cancel() 200 201 // Start an xDS management server. 202 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) 203 204 nodeID := uuid.New().String() 205 206 // Create an xDS client pointing to the above server with a test transport 207 // that allow monitoring the underlying stream through adsStreamCh. 208 adsStreamCh := make(chan *stream, 1) 209 client := createXDSClient(t, mgmtServer.Address, nodeID, &transportBuilder{adsStreamCh: adsStreamCh}) 210 211 // Configure two watchers for the same listener resource. 212 const listenerResourceName = "test-listener-resource" 213 const routeConfigurationName = "test-route-configuration-resource" 214 watcher1 := newBLockingListenerWatcher() 215 cancel1 := client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName, watcher1) 216 defer cancel1() 217 watcher2 := newBLockingListenerWatcher() 218 cancel2 := client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName, watcher2) 219 defer cancel2() 220 221 // Wait for the ADS stream to be created. 222 var adsStream *stream 223 select { 224 case adsStream = <-adsStreamCh: 225 case <-ctx.Done(): 226 t.Fatalf("Timed out waiting for ADS stream to be created") 227 } 228 229 // Configure the listener resource on the management server. 230 resources := e2e.UpdateOptions{ 231 NodeID: nodeID, 232 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 233 SkipValidation: true, 234 } 235 if err := mgmtServer.Update(ctx, resources); err != nil { 236 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 237 } 238 239 // Ensure that there is a read on the stream. 240 select { 241 case <-adsStream.recvCh: 242 case <-ctx.Done(): 243 t.Fatalf("Timed out waiting for ADS stream to be read from") 244 } 245 246 // Wait for the update to reach the watchers. 247 select { 248 case <-watcher1.updateCh: 249 case <-ctx.Done(): 250 t.Fatalf("Timed out waiting for update to reach watcher 1") 251 } 252 select { 253 case <-watcher2.updateCh: 254 case <-ctx.Done(): 255 t.Fatalf("Timed out waiting for update to reach watcher 2") 256 } 257 258 // Update the listener resource on the management server to point to a new 259 // route configuration resource. 260 resources = e2e.UpdateOptions{ 261 NodeID: nodeID, 262 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, "new-route")}, 263 SkipValidation: true, 264 } 265 if err := mgmtServer.Update(ctx, resources); err != nil { 266 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 267 } 268 269 // Unblock one watcher. 270 onDone := <-watcher1.doneNotifierCh 271 onDone() 272 273 // Wait for a short duration and ensure that there is no read on the stream. 274 select { 275 case <-adsStream.recvCh: 276 t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") 277 case <-time.After(defaultTestShortTimeout): 278 } 279 280 // Unblock the second watcher. 281 onDone = <-watcher2.doneNotifierCh 282 onDone() 283 284 // Ensure that there is a read on the stream, now that the previous update 285 // has been consumed by all watchers. 286 select { 287 case <-adsStream.recvCh: 288 case <-ctx.Done(): 289 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") 290 } 291 292 // Wait for the new update to reach the watchers. 293 select { 294 case <-watcher1.updateCh: 295 case <-ctx.Done(): 296 t.Fatalf("Timed out waiting for update to reach watcher 1") 297 } 298 select { 299 case <-watcher2.updateCh: 300 case <-ctx.Done(): 301 t.Fatalf("Timed out waiting for update to reach watcher 2") 302 } 303 304 // At this point, the xDS client is shut down (and the associated transport 305 // is closed) without the watchers invoking their respective onDone 306 // callbacks. This verifies that the closing a transport that has pending 307 // watchers does not block. 308 } 309 310 // Tests ADS stream level flow control with a multiple resources. The test does 311 // the following: 312 // - Starts a management server and configures two listener resources on it. 313 // - Creates an xDS client to the above management server, starts a couple of 314 // listener watchers for the two resources, and verifies that the update 315 // reaches these watchers. 316 // - These watchers don't invoke the onDone callback until explicitly 317 // triggered by the test. This allows the test to verify that the next 318 // Recv() call on the ADS stream does not happen until both watchers have 319 // completely processed the update, i.e invoke the onDone callback. 320 func (s) TestADSFlowControl_ResourceUpdates_MultipleResources(t *testing.T) { 321 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 322 defer cancel() 323 324 // Start an xDS management server. 325 const listenerResourceName1 = "test-listener-resource-1" 326 const listenerResourceName2 = "test-listener-resource-2" 327 wantResourceNames := []string{listenerResourceName1, listenerResourceName2} 328 requestCh := make(chan struct{}, 1) 329 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 330 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 331 if req.GetTypeUrl() != version.V3ListenerURL { 332 return nil 333 } 334 gotResourceNames := req.GetResourceNames() 335 sort.Slice(gotResourceNames, func(i, j int) bool { return req.ResourceNames[i] < req.ResourceNames[j] }) 336 if slices.Equal(gotResourceNames, wantResourceNames) { 337 // The two resource names will be part of the initial request 338 // and also the ACK. Hence, we need to make this write 339 // non-blocking. 340 select { 341 case requestCh <- struct{}{}: 342 default: 343 } 344 } 345 return nil 346 }, 347 }) 348 349 nodeID := uuid.New().String() 350 351 // Create an xDS client pointing to the above server with a test transport 352 // that allow monitoring the underlying stream through adsStreamCh. 353 adsStreamCh := make(chan *stream, 1) 354 client := createXDSClient(t, mgmtServer.Address, nodeID, &transportBuilder{adsStreamCh: adsStreamCh}) 355 356 // Configure two watchers for two different listener resources. 357 const routeConfigurationName1 = "test-route-configuration-resource-1" 358 watcher1 := newBLockingListenerWatcher() 359 cancel1 := client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName1, watcher1) 360 defer cancel1() 361 const routeConfigurationName2 = "test-route-configuration-resource-2" 362 watcher2 := newBLockingListenerWatcher() 363 cancel2 := client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName2, watcher2) 364 defer cancel2() 365 366 // Wait for the wrapped ADS stream to be created. 367 var adsStream *stream 368 select { 369 case adsStream = <-adsStreamCh: 370 case <-ctx.Done(): 371 t.Fatalf("Timed out waiting for ADS stream to be created") 372 } 373 374 // Ensure that there is a read on the stream. 375 select { 376 case <-adsStream.recvCh: 377 case <-ctx.Done(): 378 t.Fatalf("Timed out waiting for ADS stream to be read from") 379 } 380 381 // Wait for both resource names to be requested. 382 select { 383 case <-requestCh: 384 case <-ctx.Done(): 385 t.Fatal("Timed out waiting for both resource names to be requested") 386 } 387 388 // Configure the listener resources on the management server. 389 resources := e2e.UpdateOptions{ 390 NodeID: nodeID, 391 Listeners: []*v3listenerpb.Listener{ 392 e2e.DefaultClientListener(listenerResourceName1, routeConfigurationName1), 393 e2e.DefaultClientListener(listenerResourceName2, routeConfigurationName2), 394 }, 395 SkipValidation: true, 396 } 397 if err := mgmtServer.Update(ctx, resources); err != nil { 398 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 399 } 400 401 // At this point, we expect the management server to send both resources in 402 // the same response. So, both watchers would be notified at the same time, 403 // and no more Recv() calls should happen until both of them have invoked 404 // their respective onDone() callbacks. 405 406 // The order of callback invocations among the two watchers is not 407 // guaranteed. So, we select on both of them and unblock the first watcher 408 // whose callback is invoked. 409 var otherWatcherUpdateCh chan struct{} 410 var otherWatcherDoneCh chan func() 411 select { 412 case <-watcher1.updateCh: 413 onDone := <-watcher1.doneNotifierCh 414 onDone() 415 otherWatcherUpdateCh = watcher2.updateCh 416 otherWatcherDoneCh = watcher2.doneNotifierCh 417 case <-watcher2.updateCh: 418 onDone := <-watcher2.doneNotifierCh 419 onDone() 420 otherWatcherUpdateCh = watcher1.updateCh 421 otherWatcherDoneCh = watcher1.doneNotifierCh 422 case <-ctx.Done(): 423 t.Fatal("Timed out waiting for update to reach first watchers") 424 } 425 426 // Wait for a short duration and ensure that there is no read on the stream. 427 select { 428 case <-adsStream.recvCh: 429 t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") 430 case <-time.After(defaultTestShortTimeout): 431 } 432 433 // Wait for the update on the second watcher and unblock it. 434 select { 435 case <-otherWatcherUpdateCh: 436 onDone := <-otherWatcherDoneCh 437 onDone() 438 case <-ctx.Done(): 439 t.Fatal("Timed out waiting for update to reach second watcher") 440 } 441 442 // Ensure that there is a read on the stream, now that the previous update 443 // has been consumed by all watchers. 444 select { 445 case <-adsStream.recvCh: 446 case <-ctx.Done(): 447 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") 448 } 449 } 450 451 // Test ADS stream flow control with a single resource that is expected to be 452 // NACKed by the xDS client and the watcher's ResourceError() callback is 453 // expected to be invoked because resource is not cached. Verifies that no 454 // further reads are attempted until the error is completely processed by the 455 // watcher. 456 func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { 457 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 458 defer cancel() 459 460 // Start an xDS management server. 461 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) 462 463 nodeID := uuid.New().String() 464 465 // Create an xDS client pointing to the above server with a test transport 466 // that allow monitoring the underlying stream through adsStreamCh. 467 adsStreamCh := make(chan *stream, 1) 468 client := createXDSClient(t, mgmtServer.Address, nodeID, &transportBuilder{adsStreamCh: adsStreamCh}) 469 470 // Configure a watcher for a listener resource. 471 const listenerResourceName = "test-listener-resource" 472 watcher := newBLockingListenerWatcher() 473 cancel = client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName, watcher) 474 defer cancel() 475 476 // Wait for the stream to be created. 477 var adsStream *stream 478 select { 479 case adsStream = <-adsStreamCh: 480 case <-ctx.Done(): 481 t.Fatalf("Timed out waiting for ADS stream to be created") 482 } 483 484 // Configure the management server to return a single listener resource 485 // which is expected to be NACKed by the client. 486 resources := e2e.UpdateOptions{ 487 NodeID: nodeID, 488 Listeners: []*v3listenerpb.Listener{badListenerResource(t, listenerResourceName)}, 489 SkipValidation: true, 490 } 491 if err := mgmtServer.Update(ctx, resources); err != nil { 492 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 493 } 494 495 // Ensure that there is a read on the stream. 496 select { 497 case <-adsStream.recvCh: 498 case <-ctx.Done(): 499 t.Fatalf("Timed out waiting for ADS stream to be read from") 500 } 501 502 // Wait for the resource error to reach the watcher. 503 select { 504 case <-watcher.resourceErrCh: 505 case <-ctx.Done(): 506 t.Fatalf("Timed out waiting for error to reach watcher") 507 } 508 509 // Wait for a short duration and ensure that there is no read on the stream. 510 select { 511 case <-adsStream.recvCh: 512 t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") 513 case <-time.After(defaultTestShortTimeout): 514 } 515 516 // Unblock one watcher. 517 onDone := <-watcher.doneNotifierCh 518 onDone() 519 520 // Ensure that there is a read on the stream, now that the previous error 521 // has been consumed by the watcher. 522 select { 523 case <-adsStream.recvCh: 524 case <-ctx.Done(): 525 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") 526 } 527 } 528 529 // Test ADS stream flow control with a single resource that is deleted from the 530 // management server and therefore the watcher's ResourceError() 531 // callback is expected to be invoked. Verifies that no further reads are 532 // attempted until the callback is completely handled by the watcher. 533 func (s) TestADSFlowControl_ResourceDoesNotExist(t *testing.T) { 534 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 535 defer cancel() 536 537 // Start an xDS management server. 538 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) 539 540 nodeID := uuid.New().String() 541 542 // Create an xDS client pointing to the above server with a test transport 543 // that allow monitoring the underlying stream through adsStreamCh. 544 adsStreamCh := make(chan *stream, 1) 545 client := createXDSClient(t, mgmtServer.Address, nodeID, &transportBuilder{adsStreamCh: adsStreamCh}) 546 547 // Configure a watcher for a listener resource. 548 const listenerResourceName = "test-listener-resource" 549 const routeConfigurationName = "test-route-configuration-resource" 550 watcher := newBLockingListenerWatcher() 551 cancel = client.WatchResource(xdsresource.V3ListenerURL, listenerResourceName, watcher) 552 defer cancel() 553 554 // Wait for the ADS stream to be created. 555 var adsStream *stream 556 select { 557 case adsStream = <-adsStreamCh: 558 case <-ctx.Done(): 559 t.Fatalf("Timed out waiting for ADS stream to be created") 560 } 561 562 // Configure the listener resource on the management server. 563 resources := e2e.UpdateOptions{ 564 NodeID: nodeID, 565 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 566 SkipValidation: true, 567 } 568 if err := mgmtServer.Update(ctx, resources); err != nil { 569 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 570 } 571 572 // Ensure that there is a read on the stream. 573 select { 574 case <-adsStream.recvCh: 575 case <-ctx.Done(): 576 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") 577 } 578 579 // Wait for the update to reach the watcher and unblock it. 580 select { 581 case <-watcher.updateCh: 582 onDone := <-watcher.doneNotifierCh 583 onDone() 584 case <-ctx.Done(): 585 t.Fatalf("Timed out waiting for update to reach watcher 1") 586 } 587 588 // Ensure that there is a read on the stream. 589 select { 590 case <-adsStream.recvCh: 591 case <-ctx.Done(): 592 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") 593 } 594 595 // Remove the listener resource on the management server. 596 resources = e2e.UpdateOptions{ 597 NodeID: nodeID, 598 Listeners: []*v3listenerpb.Listener{}, 599 SkipValidation: true, 600 } 601 if err := mgmtServer.Update(ctx, resources); err != nil { 602 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 603 } 604 605 // Wait for the resource not found callback to be invoked. 606 select { 607 case <-watcher.resourceErrCh: 608 case <-ctx.Done(): 609 t.Fatalf("Timed out waiting for resource not found callback to be invoked on the watcher") 610 } 611 612 // Wait for a short duration and ensure that there is no read on the stream. 613 select { 614 case <-adsStream.recvCh: 615 t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") 616 case <-time.After(defaultTestShortTimeout): 617 } 618 619 // Unblock the watcher. 620 onDone := <-watcher.doneNotifierCh 621 onDone() 622 623 // Ensure that there is a read on the stream. 624 select { 625 case <-adsStream.recvCh: 626 case <-ctx.Done(): 627 t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") 628 } 629 }