google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/tests/ads_stream_backoff_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient_test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "testing" 26 "time" 27 28 "github.com/google/go-cmp/cmp" 29 "github.com/google/go-cmp/cmp/cmpopts" 30 "github.com/google/uuid" 31 "google.golang.org/grpc" 32 "google.golang.org/grpc/internal/testutils" 33 "google.golang.org/grpc/internal/testutils/xds/e2e" 34 "google.golang.org/grpc/internal/xds/bootstrap" 35 "google.golang.org/grpc/xds/internal/xdsclient" 36 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" 37 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 38 "google.golang.org/protobuf/testing/protocmp" 39 40 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 41 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 42 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 43 ) 44 45 // Creates an xDS client with the given bootstrap contents and backoff function. 46 func createXDSClientWithBackoff(t *testing.T, bootstrapContents []byte, streamBackoff func(int) time.Duration) xdsclient.XDSClient { 47 t.Helper() 48 49 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 50 if err != nil { 51 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 52 } 53 pool := xdsclient.NewPool(config) 54 client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ 55 Name: t.Name(), 56 StreamBackoffAfterFailure: streamBackoff, 57 }) 58 if err != nil { 59 t.Fatalf("Failed to create xDS client: %v", err) 60 } 61 t.Cleanup(close) 62 return client 63 } 64 65 // Tests the case where the management server returns an error in the ADS 66 // streaming RPC. Verifies that the ADS stream is restarted after a backoff 67 // period, and that the previously requested resources are re-requested on the 68 // new stream. 69 func (s) TestADS_BackoffAfterStreamFailure(t *testing.T) { 70 // Channels used for verifying different events in the test. 71 streamCloseCh := make(chan struct{}, 1) // ADS stream is closed. 72 ldsResourcesCh := make(chan []string, 1) // Listener resource names in the discovery request. 73 backoffCh := make(chan struct{}, 1) // Backoff after stream failure. 74 75 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 76 defer cancel() 77 78 // Create an xDS management server that returns RPC errors. 79 streamErr := errors.New("ADS stream error") 80 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 81 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 82 // Push the requested resource names on to a channel. 83 if req.GetTypeUrl() == version.V3ListenerURL { 84 t.Logf("Received LDS request for resources: %v", req.GetResourceNames()) 85 select { 86 case ldsResourcesCh <- req.GetResourceNames(): 87 case <-ctx.Done(): 88 } 89 } 90 // Return an error everytime a request is sent on the stream. This 91 // should cause the transport to backoff before attempting to 92 // recreate the stream. 93 return streamErr 94 }, 95 // Push on a channel whenever the stream is closed. 96 OnStreamClosed: func(int64, *v3corepb.Node) { 97 select { 98 case streamCloseCh <- struct{}{}: 99 case <-ctx.Done(): 100 } 101 }, 102 }) 103 104 // Override the backoff implementation to push on a channel that is read by 105 // the test goroutine. 106 backoffCtx, backoffCancel := context.WithCancel(ctx) 107 streamBackoff := func(v int) time.Duration { 108 select { 109 case backoffCh <- struct{}{}: 110 case <-backoffCtx.Done(): 111 } 112 return 0 113 } 114 defer backoffCancel() 115 116 // Create an xDS client with bootstrap pointing to the above server. 117 nodeID := uuid.New().String() 118 bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) 119 client := createXDSClientWithBackoff(t, bc, streamBackoff) 120 121 // Register a watch for a listener resource. 122 const listenerName = "listener" 123 lw := newListenerWatcher() 124 ldsCancel := xdsresource.WatchListener(client, listenerName, lw) 125 defer ldsCancel() 126 127 // Verify that an ADS stream is created and an LDS request with the above 128 // resource name is sent. 129 if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { 130 t.Fatal(err) 131 } 132 133 // Verify that the received stream error is reported to the watcher. 134 if err := verifyListenerError(ctx, lw.updateCh, streamErr.Error(), nodeID); err != nil { 135 t.Fatal(err) 136 } 137 138 // Verify that the stream is closed. 139 select { 140 case <-streamCloseCh: 141 case <-ctx.Done(): 142 t.Fatalf("Timeout waiting for stream to be closed after an error") 143 } 144 145 // Verify that the ADS stream backs off before recreating the stream. 146 select { 147 case <-backoffCh: 148 case <-ctx.Done(): 149 t.Fatalf("Timeout waiting for ADS stream to backoff after stream failure") 150 } 151 152 // Verify that the same resource name is re-requested on the new stream. 153 if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { 154 t.Fatal(err) 155 } 156 157 // To prevent indefinite blocking during xDS client close, which is caused 158 // by a blocking backoff channel write, cancel the backoff context early 159 // given that the test is complete. 160 backoffCancel() 161 162 } 163 164 // Tests the case where a stream breaks because the server goes down. Verifies 165 // that when the server comes back up, the same resources are re-requested, this 166 // time with the previously acked version and an empty nonce. 167 func (s) TestADS_RetriesAfterBrokenStream(t *testing.T) { 168 // Channels used for verifying different events in the test. 169 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 170 streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received. 171 172 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 173 defer cancel() 174 175 // Create an xDS management server listening on a local port. 176 l, err := testutils.LocalTCPListener() 177 if err != nil { 178 t.Fatalf("Failed to create a local listener for the xDS management server: %v", err) 179 } 180 lis := testutils.NewRestartableListener(l) 181 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 182 Listener: lis, 183 // Push the received request on to a channel for the test goroutine to 184 // verify that it matches expectations. 185 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 186 select { 187 case streamRequestCh <- req: 188 case <-ctx.Done(): 189 } 190 return nil 191 }, 192 // Push the response that the management server is about to send on to a 193 // channel. The test goroutine to uses this to extract the version and 194 // nonce, expected on subsequent requests. 195 OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) { 196 select { 197 case streamResponseCh <- resp: 198 case <-ctx.Done(): 199 } 200 }, 201 }) 202 203 // Create a listener resource on the management server. 204 const listenerName = "listener" 205 const routeConfigName = "route-config" 206 nodeID := uuid.New().String() 207 resources := e2e.UpdateOptions{ 208 NodeID: nodeID, 209 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, routeConfigName)}, 210 SkipValidation: true, 211 } 212 if err := mgmtServer.Update(ctx, resources); err != nil { 213 t.Fatal(err) 214 } 215 216 // Override the backoff implementation to always return 0, to reduce test 217 // run time. Instead control when the backoff returns by blocking on a 218 // channel, that the test closes. 219 backoffCh := make(chan struct{}) 220 streamBackoff := func(v int) time.Duration { 221 select { 222 case backoffCh <- struct{}{}: 223 case <-ctx.Done(): 224 } 225 return 0 226 } 227 228 // Create an xDS client with bootstrap pointing to the above server. 229 bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) 230 client := createXDSClientWithBackoff(t, bc, streamBackoff) 231 232 // Register a watch for a listener resource. 233 lw := newListenerWatcher() 234 ldsCancel := xdsresource.WatchListener(client, listenerName, lw) 235 defer ldsCancel() 236 237 // Verify that the initial discovery request matches expectation. 238 var gotReq *v3discoverypb.DiscoveryRequest 239 select { 240 case gotReq = <-streamRequestCh: 241 case <-ctx.Done(): 242 t.Fatalf("Timeout waiting for discovery request on the stream") 243 } 244 wantReq := &v3discoverypb.DiscoveryRequest{ 245 VersionInfo: "", 246 Node: &v3corepb.Node{ 247 Id: nodeID, 248 UserAgentName: "gRPC Go", 249 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version}, 250 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, 251 }, 252 ResourceNames: []string{listenerName}, 253 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 254 ResponseNonce: "", 255 } 256 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 257 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 258 } 259 260 // Capture the version and nonce from the response. 261 var gotResp *v3discoverypb.DiscoveryResponse 262 select { 263 case gotResp = <-streamResponseCh: 264 case <-ctx.Done(): 265 t.Fatalf("Timeout waiting for discovery response on the stream") 266 } 267 version := gotResp.GetVersionInfo() 268 nonce := gotResp.GetNonce() 269 270 // Verify that the ACK contains the appropriate version and nonce. 271 wantReq.VersionInfo = version 272 wantReq.ResponseNonce = nonce 273 select { 274 case gotReq = <-streamRequestCh: 275 case <-ctx.Done(): 276 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 277 } 278 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 279 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 280 } 281 282 // Verify the update received by the watcher. 283 wantUpdate := listenerUpdateErrTuple{ 284 update: xdsresource.ListenerUpdate{ 285 RouteConfigName: routeConfigName, 286 HTTPFilters: []xdsresource.HTTPFilter{{Name: "router"}}, 287 }, 288 } 289 if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate); err != nil { 290 t.Fatal(err) 291 } 292 293 // Bring down the management server to simulate a broken stream. 294 lis.Stop() 295 296 // Verify that the error callback on the watcher is not invoked. 297 verifyNoListenerUpdate(ctx, lw.updateCh) 298 299 // Wait for backoff to kick in, and unblock the first backoff attempt. 300 select { 301 case <-backoffCh: 302 case <-ctx.Done(): 303 t.Fatal("Timeout waiting for stream backoff") 304 } 305 306 // Bring up the management server. The test does not have prcecise control 307 // over when new streams to the management server will start succeeding. The 308 // ADS stream implementation will backoff as many times as required before 309 // it can successfully create a new stream. Therefore, we need to receive on 310 // the backoffCh as many times as required, and unblock the backoff 311 // implementation. 312 lis.Restart() 313 go func() { 314 for { 315 select { 316 case <-backoffCh: 317 case <-ctx.Done(): 318 return 319 } 320 } 321 }() 322 323 // Verify that the transport creates a new stream and sends out a new 324 // request which contains the previously acked version, but an empty nonce. 325 wantReq.ResponseNonce = "" 326 select { 327 case gotReq = <-streamRequestCh: 328 case <-ctx.Done(): 329 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 330 } 331 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 332 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 333 } 334 } 335 336 // Tests the case where a resource is requested before the a valid ADS stream 337 // exists. Verifies that the a discovery request is sent out for the previously 338 // requested resource once a valid stream is created. 339 func (s) TestADS_ResourceRequestedBeforeStreamCreation(t *testing.T) { 340 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 341 defer cancel() 342 343 // Channels used for verifying different events in the test. 344 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 345 346 // Create an xDS management server listening on a local port. 347 l, err := testutils.LocalTCPListener() 348 if err != nil { 349 t.Fatalf("Failed to create a local listener: %v", err) 350 } 351 lis := testutils.NewRestartableListener(l) 352 streamErr := errors.New("ADS stream error") 353 354 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 355 Listener: lis, 356 357 // Return an error everytime a request is sent on the stream. This 358 // should cause the transport to backoff before attempting to recreate 359 // the stream. 360 OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { 361 select { 362 case streamRequestCh <- req: 363 default: 364 } 365 return streamErr 366 }, 367 }) 368 369 // Bring down the management server before creating the transport. This 370 // allows us to test the case where SendRequest() is called when there is no 371 // stream to the management server. 372 lis.Stop() 373 374 // Override the backoff implementation to always return 0, to reduce test 375 // run time. Instead control when the backoff returns by blocking on a 376 // channel, that the test closes. 377 backoffCh := make(chan struct{}, 1) 378 unblockBackoffCh := make(chan struct{}) 379 streamBackoff := func(v int) time.Duration { 380 select { 381 case backoffCh <- struct{}{}: 382 default: 383 } 384 <-unblockBackoffCh 385 return 0 386 } 387 388 // Create an xDS client with bootstrap pointing to the above server. 389 nodeID := uuid.New().String() 390 bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) 391 client := createXDSClientWithBackoff(t, bc, streamBackoff) 392 393 // Register a watch for a listener resource. 394 const listenerName = "listener" 395 lw := newListenerWatcher() 396 ldsCancel := xdsresource.WatchListener(client, listenerName, lw) 397 defer ldsCancel() 398 399 // The above watch results in an attempt to create a new stream, which will 400 // fail, and will result in backoff. Wait for backoff to kick in. 401 select { 402 case <-backoffCh: 403 case <-ctx.Done(): 404 t.Fatal("Timeout waiting for stream backoff") 405 } 406 407 // Bring up the connection to the management server, and unblock the backoff 408 // implementation. 409 lis.Restart() 410 close(unblockBackoffCh) 411 412 // Verify that the initial discovery request matches expectation. 413 var gotReq *v3discoverypb.DiscoveryRequest 414 select { 415 case gotReq = <-streamRequestCh: 416 case <-ctx.Done(): 417 t.Fatalf("Timeout waiting for discovery request on the stream") 418 } 419 wantReq := &v3discoverypb.DiscoveryRequest{ 420 VersionInfo: "", 421 Node: &v3corepb.Node{ 422 Id: nodeID, 423 UserAgentName: "gRPC Go", 424 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version}, 425 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, 426 }, 427 ResourceNames: []string{listenerName}, 428 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 429 ResponseNonce: "", 430 } 431 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 432 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 433 } 434 } 435 436 // waitForResourceNames waits for the wantNames to be received on namesCh. 437 // Returns a non-nil error if the context expires before that. 438 func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error { 439 t.Helper() 440 441 var lastRequestedNames []string 442 for ; ; <-time.After(defaultTestShortTimeout) { 443 select { 444 case <-ctx.Done(): 445 return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames) 446 case gotNames := <-namesCh: 447 if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) { 448 return nil 449 } 450 lastRequestedNames = gotNames 451 } 452 } 453 }