google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/test/ads_stream_backoff_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient_test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "net" 26 "testing" 27 "time" 28 29 "google.golang.org/grpc/credentials/insecure" 30 "google.golang.org/grpc/xds/internal/clients/grpctransport" 31 "google.golang.org/grpc/xds/internal/clients/internal/testutils" 32 "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" 33 "google.golang.org/grpc/xds/internal/clients/xdsclient" 34 xdsclientinternal "google.golang.org/grpc/xds/internal/clients/xdsclient/internal" 35 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 36 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 37 "google.golang.org/protobuf/testing/protocmp" 38 39 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 40 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 41 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 42 "github.com/google/go-cmp/cmp" 43 "github.com/google/go-cmp/cmp/cmpopts" 44 "github.com/google/uuid" 45 ) 46 47 func overrideStreamBackOff(t *testing.T, streamBackOff func(int) time.Duration) { 48 originalStreamBackoff := xdsclientinternal.StreamBackoff 49 xdsclientinternal.StreamBackoff = streamBackOff 50 t.Cleanup(func() { xdsclientinternal.StreamBackoff = originalStreamBackoff }) 51 } 52 53 // Creates an xDS client with the given management server address, nodeID and backoff function. 54 func createXDSClientWithBackoff(t *testing.T, mgmtServerAddress string, nodeID string, streamBackoff func(int) time.Duration) *xdsclient.XDSClient { 55 t.Helper() 56 overrideStreamBackOff(t, streamBackoff) 57 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 58 return createXDSClient(t, mgmtServerAddress, nodeID, grpctransport.NewBuilder(configs)) 59 } 60 61 // Tests the case where the management server returns an error in the ADS 62 // streaming RPC. Verifies that the ADS stream is restarted after a backoff 63 // period, and that the previously requested resources are re-requested on the 64 // new stream. 65 func (s) TestADS_BackoffAfterStreamFailure(t *testing.T) { 66 // Channels used for verifying different events in the test. 67 streamCloseCh := make(chan struct{}, 1) // ADS stream is closed. 68 ldsResourcesCh := make(chan []string, 1) // Listener resource names in the discovery request. 69 backoffCh := make(chan struct{}, 1) // Backoff after stream failure. 70 71 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 72 defer cancel() 73 74 // Create an xDS management server that returns RPC errors. 75 streamErr := errors.New("ADS stream error") 76 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 77 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 78 // Push the requested resource names on to a channel. 79 if req.GetTypeUrl() == version.V3ListenerURL { 80 t.Logf("Received LDS request for resources: %v", req.GetResourceNames()) 81 select { 82 case ldsResourcesCh <- req.GetResourceNames(): 83 case <-ctx.Done(): 84 } 85 } 86 // Return an error everytime a request is sent on the stream. This 87 // should cause the transport to backoff before attempting to 88 // recreate the stream. 89 return streamErr 90 }, 91 // Push on a channel whenever the stream is closed. 92 OnStreamClosed: func(int64, *v3corepb.Node) { 93 select { 94 case streamCloseCh <- struct{}{}: 95 case <-ctx.Done(): 96 } 97 }, 98 }) 99 100 // Override the backoff implementation to push on a channel that is read by 101 // the test goroutine. 102 backoffCtx, backoffCancel := context.WithCancel(ctx) 103 streamBackoff := func(int) time.Duration { 104 select { 105 case backoffCh <- struct{}{}: 106 case <-backoffCtx.Done(): 107 } 108 return 0 109 } 110 defer backoffCancel() 111 112 // Create an xDS client with bootstrap pointing to the above server. 113 nodeID := uuid.New().String() 114 client := createXDSClientWithBackoff(t, mgmtServer.Address, nodeID, streamBackoff) 115 116 // Register a watch for a listener resource. 117 const listenerName = "listener" 118 lw := newListenerWatcher() 119 ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, listenerName, lw) 120 defer ldsCancel() 121 122 // Verify that an ADS stream is created and an LDS request with the above 123 // resource name is sent. 124 if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { 125 t.Fatal(err) 126 } 127 128 // Verify that the received stream error is reported to the watcher. 129 if err := verifyListenerResourceError(ctx, lw.resourceErrCh, streamErr.Error(), nodeID); err != nil { 130 t.Fatal(err) 131 } 132 133 // Verify that the stream is closed. 134 select { 135 case <-streamCloseCh: 136 case <-ctx.Done(): 137 t.Fatalf("Timeout waiting for stream to be closed after an error") 138 } 139 140 // Verify that the ADS stream backs off before recreating the stream. 141 select { 142 case <-backoffCh: 143 case <-ctx.Done(): 144 t.Fatalf("Timeout waiting for ADS stream to backoff after stream failure") 145 } 146 147 // Verify that the same resource name is re-requested on the new stream. 148 if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { 149 t.Fatal(err) 150 } 151 152 // To prevent indefinite blocking during xDS client close, which is caused 153 // by a blocking backoff channel write, cancel the backoff context early 154 // given that the test is complete. 155 backoffCancel() 156 157 } 158 159 // Tests the case where a stream breaks because the server goes down. Verifies 160 // that when the server comes back up, the same resources are re-requested, this 161 // time with the previously acked version and an empty nonce. 162 func (s) TestADS_RetriesAfterBrokenStream(t *testing.T) { 163 // Channels used for verifying different events in the test. 164 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 165 streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received. 166 167 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 168 defer cancel() 169 170 // Create an xDS management server listening on a local port. 171 l, err := net.Listen("tcp", "localhost:0") 172 if err != nil { 173 t.Fatalf("net.Listen() failed: %v", err) 174 } 175 lis := testutils.NewRestartableListener(l) 176 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 177 Listener: lis, 178 // Push the received request on to a channel for the test goroutine to 179 // verify that it matches expectations. 180 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 181 select { 182 case streamRequestCh <- req: 183 case <-ctx.Done(): 184 } 185 return nil 186 }, 187 // Push the response that the management server is about to send on to a 188 // channel. The test goroutine to uses this to extract the version and 189 // nonce, expected on subsequent requests. 190 OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) { 191 select { 192 case streamResponseCh <- resp: 193 case <-ctx.Done(): 194 } 195 }, 196 }) 197 198 // Create a listener resource on the management server. 199 const listenerName = "listener" 200 const routeConfigName = "route-config" 201 nodeID := uuid.New().String() 202 resources := e2e.UpdateOptions{ 203 NodeID: nodeID, 204 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, routeConfigName)}, 205 SkipValidation: true, 206 } 207 if err := mgmtServer.Update(ctx, resources); err != nil { 208 t.Fatal(err) 209 } 210 211 // Override the backoff implementation to always return 0, to reduce test 212 // run time. Instead control when the backoff returns by blocking on a 213 // channel, that the test closes. 214 backoffCh := make(chan struct{}) 215 streamBackoff := func(int) time.Duration { 216 select { 217 case backoffCh <- struct{}{}: 218 case <-ctx.Done(): 219 } 220 return 0 221 } 222 223 // Create an xDS client pointing to the above server. 224 client := createXDSClientWithBackoff(t, mgmtServer.Address, nodeID, streamBackoff) 225 226 // Register a watch for a listener resource. 227 lw := newListenerWatcher() 228 ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, listenerName, lw) 229 defer ldsCancel() 230 231 // Verify that the initial discovery request matches expectation. 232 var gotReq *v3discoverypb.DiscoveryRequest 233 select { 234 case gotReq = <-streamRequestCh: 235 case <-ctx.Done(): 236 t.Fatalf("Timeout waiting for discovery request on the stream") 237 } 238 wantReq := &v3discoverypb.DiscoveryRequest{ 239 VersionInfo: "", 240 Node: &v3corepb.Node{ 241 Id: nodeID, 242 UserAgentName: "user-agent", 243 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 244 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, 245 }, 246 ResourceNames: []string{listenerName}, 247 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 248 ResponseNonce: "", 249 } 250 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 251 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 252 } 253 254 // Capture the version and nonce from the response. 255 var gotResp *v3discoverypb.DiscoveryResponse 256 select { 257 case gotResp = <-streamResponseCh: 258 case <-ctx.Done(): 259 t.Fatalf("Timeout waiting for discovery response on the stream") 260 } 261 version := gotResp.GetVersionInfo() 262 nonce := gotResp.GetNonce() 263 264 // Verify that the ACK contains the appropriate version and nonce. 265 wantReq.VersionInfo = version 266 wantReq.ResponseNonce = nonce 267 select { 268 case gotReq = <-streamRequestCh: 269 case <-ctx.Done(): 270 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 271 } 272 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 273 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 274 } 275 276 // Verify the update received by the watcher. 277 wantUpdate := listenerUpdateErrTuple{ 278 update: listenerUpdate{ 279 RouteConfigName: routeConfigName}, 280 } 281 if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate); err != nil { 282 t.Fatal(err) 283 } 284 285 // Bring down the management server to simulate a broken stream. 286 lis.Stop() 287 288 // Verify that the error callback on the watcher is not invoked. 289 verifyNoListenerUpdate(ctx, lw.updateCh) 290 291 // Wait for backoff to kick in, and unblock the first backoff attempt. 292 select { 293 case <-backoffCh: 294 case <-ctx.Done(): 295 t.Fatal("Timeout waiting for stream backoff") 296 } 297 298 // Bring up the management server. The test does not have prcecise control 299 // over when new streams to the management server will start succeeding. The 300 // ADS stream implementation will backoff as many times as required before 301 // it can successfully create a new stream. Therefore, we need to receive on 302 // the backoffCh as many times as required, and unblock the backoff 303 // implementation. 304 lis.Restart() 305 go func() { 306 for { 307 select { 308 case <-backoffCh: 309 case <-ctx.Done(): 310 return 311 } 312 } 313 }() 314 315 // Verify that the transport creates a new stream and sends out a new 316 // request which contains the previously acked version, but an empty nonce. 317 wantReq.ResponseNonce = "" 318 select { 319 case gotReq = <-streamRequestCh: 320 case <-ctx.Done(): 321 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 322 } 323 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 324 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 325 } 326 } 327 328 // Tests the case where a resource is requested before the a valid ADS stream 329 // exists. Verifies that the a discovery request is sent out for the previously 330 // requested resource once a valid stream is created. 331 func (s) TestADS_ResourceRequestedBeforeStreamCreation(t *testing.T) { 332 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 333 defer cancel() 334 335 // Channels used for verifying different events in the test. 336 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 337 338 // Create an xDS management server listening on a local port. 339 l, err := net.Listen("tcp", "localhost:0") 340 if err != nil { 341 t.Fatalf("net.Listen() failed: %v", err) 342 } 343 lis := testutils.NewRestartableListener(l) 344 streamErr := errors.New("ADS stream error") 345 346 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 347 Listener: lis, 348 349 // Return an error everytime a request is sent on the stream. This 350 // should cause the transport to backoff before attempting to recreate 351 // the stream. 352 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 353 select { 354 case streamRequestCh <- req: 355 default: 356 } 357 return streamErr 358 }, 359 }) 360 361 // Bring down the management server before creating the transport. This 362 // allows us to test the case where SendRequest() is called when there is no 363 // stream to the management server. 364 lis.Stop() 365 366 // Override the backoff implementation to always return 0, to reduce test 367 // run time. Instead control when the backoff returns by blocking on a 368 // channel, that the test closes. 369 backoffCh := make(chan struct{}, 1) 370 unblockBackoffCh := make(chan struct{}) 371 streamBackoff := func(int) time.Duration { 372 select { 373 case backoffCh <- struct{}{}: 374 default: 375 } 376 <-unblockBackoffCh 377 return 0 378 } 379 380 // Create an xDS client with bootstrap pointing to the above server. 381 nodeID := uuid.New().String() 382 client := createXDSClientWithBackoff(t, mgmtServer.Address, nodeID, streamBackoff) 383 384 // Register a watch for a listener resource. 385 const listenerName = "listener" 386 lw := newListenerWatcher() 387 ldsCancel := client.WatchResource(xdsresource.V3ListenerURL, listenerName, lw) 388 defer ldsCancel() 389 390 // The above watch results in an attempt to create a new stream, which will 391 // fail, and will result in backoff. Wait for backoff to kick in. 392 select { 393 case <-backoffCh: 394 case <-ctx.Done(): 395 t.Fatal("Timeout waiting for stream backoff") 396 } 397 398 // Bring up the connection to the management server, and unblock the backoff 399 // implementation. 400 lis.Restart() 401 close(unblockBackoffCh) 402 403 // Verify that the initial discovery request matches expectation. 404 var gotReq *v3discoverypb.DiscoveryRequest 405 select { 406 case gotReq = <-streamRequestCh: 407 case <-ctx.Done(): 408 t.Fatalf("Timeout waiting for discovery request on the stream") 409 } 410 wantReq := &v3discoverypb.DiscoveryRequest{ 411 VersionInfo: "", 412 Node: &v3corepb.Node{ 413 Id: nodeID, 414 UserAgentName: "user-agent", 415 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 416 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, 417 }, 418 ResourceNames: []string{listenerName}, 419 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 420 ResponseNonce: "", 421 } 422 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 423 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 424 } 425 } 426 427 // waitForResourceNames waits for the wantNames to be received on namesCh. 428 // Returns a non-nil error if the context expires before that. 429 func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error { 430 t.Helper() 431 432 var lastRequestedNames []string 433 for ; ; <-time.After(defaultTestShortTimeout) { 434 select { 435 case <-ctx.Done(): 436 return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames) 437 case gotNames := <-namesCh: 438 if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) { 439 return nil 440 } 441 lastRequestedNames = gotNames 442 } 443 } 444 }