google.golang.org/grpc@v1.62.1/xds/internal/xdsclient/transport/transport_backoff_test.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package transport_test 19 20 import ( 21 "context" 22 "errors" 23 "strings" 24 "testing" 25 "time" 26 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/go-cmp/cmp/cmpopts" 29 "github.com/google/uuid" 30 "google.golang.org/grpc/connectivity" 31 "google.golang.org/grpc/internal/testutils" 32 "google.golang.org/grpc/internal/testutils/xds/e2e" 33 xdstestutils "google.golang.org/grpc/xds/internal/testutils" 34 "google.golang.org/grpc/xds/internal/xdsclient/transport" 35 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 36 "google.golang.org/protobuf/testing/protocmp" 37 "google.golang.org/protobuf/types/known/anypb" 38 39 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 40 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 41 v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" 42 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 43 ) 44 45 var strSort = func(s1, s2 string) bool { return s1 < s2 } 46 47 // TestTransport_BackoffAfterStreamFailure tests the case where the management 48 // server returns an error in the ADS streaming RPC. The test verifies the 49 // following: 50 // 1. Initial discovery request matches expectation. 51 // 2. RPC error is propagated via the stream error handler. 52 // 3. When the stream is closed, the transport backs off. 53 // 4. The same discovery request is sent on the newly created stream. 54 func (s) TestTransport_BackoffAfterStreamFailure(t *testing.T) { 55 // Channels used for verifying different events in the test. 56 streamCloseCh := make(chan struct{}, 1) // ADS stream is closed. 57 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 58 backoffCh := make(chan struct{}, 1) // Transport backoff after stream failure. 59 streamErrCh := make(chan error, 1) // Stream error seen by the transport. 60 61 // Create an xDS management server listening on a local port. 62 streamErr := errors.New("ADS stream error") 63 mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{ 64 // Push on a channel whenever the stream is closed. 65 OnStreamClosed: func(int64, *v3corepb.Node) { 66 select { 67 case streamCloseCh <- struct{}{}: 68 default: 69 } 70 }, 71 72 // Return an error everytime a request is sent on the stream. This 73 // should cause the transport to backoff before attempting to recreate 74 // the stream. 75 OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { 76 select { 77 case streamRequestCh <- req: 78 default: 79 } 80 return streamErr 81 }, 82 }) 83 if err != nil { 84 t.Fatalf("Failed to start xDS management server: %v", err) 85 } 86 defer mgmtServer.Stop() 87 t.Logf("Started xDS management server on %s", mgmtServer.Address) 88 89 // Override the backoff implementation to push on a channel that is read by 90 // the test goroutine. 91 transportBackoff := func(v int) time.Duration { 92 select { 93 case backoffCh <- struct{}{}: 94 default: 95 } 96 return 0 97 } 98 99 // Create a new transport. Since we are only testing backoff behavior here, 100 // we can pass a no-op data model layer implementation. 101 nodeID := uuid.New().String() 102 tr, err := transport.New(transport.Options{ 103 ServerCfg: *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address), 104 OnRecvHandler: func(transport.ResourceUpdate) error { return nil }, // No data model layer validation. 105 OnErrorHandler: func(err error) { 106 select { 107 case streamErrCh <- err: 108 default: 109 } 110 }, 111 OnSendHandler: func(*transport.ResourceSendInfo) {}, 112 Backoff: transportBackoff, 113 NodeProto: &v3corepb.Node{Id: nodeID}, 114 }) 115 if err != nil { 116 t.Fatalf("Failed to create xDS transport: %v", err) 117 } 118 defer tr.Close() 119 120 // Send a discovery request through the transport. 121 const resourceName = "resource name" 122 tr.SendRequest(version.V3ListenerURL, []string{resourceName}) 123 124 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 125 defer cancel() 126 127 // Verify that the initial discovery request matches expectation. 128 var gotReq *v3discoverypb.DiscoveryRequest 129 select { 130 case gotReq = <-streamRequestCh: 131 case <-ctx.Done(): 132 t.Fatalf("Timeout waiting for discovery request on the stream") 133 } 134 wantReq := &v3discoverypb.DiscoveryRequest{ 135 VersionInfo: "", 136 Node: &v3corepb.Node{Id: nodeID}, 137 ResourceNames: []string{resourceName}, 138 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 139 ResponseNonce: "", 140 } 141 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 142 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 143 } 144 145 // Verify that the received stream error is reported to the user. 146 var gotErr error 147 select { 148 case gotErr = <-streamErrCh: 149 case <-ctx.Done(): 150 t.Fatalf("Timeout waiting for stream error to be reported to the user") 151 } 152 if !strings.Contains(gotErr.Error(), streamErr.Error()) { 153 t.Fatalf("Received stream error: %v, wantErr: %v", gotErr, streamErr) 154 } 155 156 // Verify that the stream is closed. 157 select { 158 case <-streamCloseCh: 159 case <-ctx.Done(): 160 t.Fatalf("Timeout waiting for stream to be closed after an error") 161 } 162 163 // Verify that the transport backs off before recreating the stream. 164 select { 165 case <-backoffCh: 166 case <-ctx.Done(): 167 t.Fatalf("Timeout waiting for transport to backoff after stream failure") 168 } 169 170 // Verify that the same discovery request is resent on the new stream. 171 select { 172 case gotReq = <-streamRequestCh: 173 case <-ctx.Done(): 174 t.Fatalf("Timeout waiting for discovery request on the stream") 175 } 176 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 177 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 178 } 179 } 180 181 // TestTransport_RetriesAfterBrokenStream tests the case where a stream breaks 182 // because the server goes down. The test verifies the following: 183 // 1. Initial discovery request matches expectation. 184 // 2. Good response from the server leads to an ACK with appropriate version. 185 // 3. Management server going down, leads to stream failure. 186 // 4. Once the management server comes back up, the same resources are 187 // re-requested, this time with an empty nonce. 188 func (s) TestTransport_RetriesAfterBrokenStream(t *testing.T) { 189 // Channels used for verifying different events in the test. 190 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 191 streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received. 192 streamErrCh := make(chan error, 1) // Stream error seen by the transport. 193 194 // Create an xDS management server listening on a local port. 195 l, err := testutils.LocalTCPListener() 196 if err != nil { 197 t.Fatalf("Failed to create a local listener for the xDS management server: %v", err) 198 } 199 lis := testutils.NewRestartableListener(l) 200 mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{ 201 Listener: lis, 202 // Push the received request on to a channel for the test goroutine to 203 // verify that it matches expectations. 204 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 205 select { 206 case streamRequestCh <- req: 207 default: 208 } 209 return nil 210 }, 211 // Push the response that the management server is about to send on to a 212 // channel. The test goroutine to uses this to extract the version and 213 // nonce, expected on subsequent requests. 214 OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) { 215 select { 216 case streamResponseCh <- resp: 217 default: 218 } 219 }, 220 }) 221 if err != nil { 222 t.Fatalf("Failed to start xDS management server: %v", err) 223 } 224 defer mgmtServer.Stop() 225 t.Logf("Started xDS management server on %s", lis.Addr().String()) 226 227 // Configure the management server with appropriate resources. 228 apiListener := &v3listenerpb.ApiListener{ 229 ApiListener: func() *anypb.Any { 230 return testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ 231 RouteSpecifier: &v3httppb.HttpConnectionManager_Rds{ 232 Rds: &v3httppb.Rds{ 233 ConfigSource: &v3corepb.ConfigSource{ 234 ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{Ads: &v3corepb.AggregatedConfigSource{}}, 235 }, 236 RouteConfigName: "route-configuration-name", 237 }, 238 }, 239 }) 240 }(), 241 } 242 const resourceName1 = "resource name 1" 243 const resourceName2 = "resource name 2" 244 listenerResource1 := &v3listenerpb.Listener{ 245 Name: resourceName1, 246 ApiListener: apiListener, 247 } 248 listenerResource2 := &v3listenerpb.Listener{ 249 Name: resourceName2, 250 ApiListener: apiListener, 251 } 252 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 253 defer cancel() 254 nodeID := uuid.New().String() 255 mgmtServer.Update(ctx, e2e.UpdateOptions{ 256 NodeID: nodeID, 257 Listeners: []*v3listenerpb.Listener{listenerResource1, listenerResource2}, 258 SkipValidation: true, 259 }) 260 261 // Create a new transport. Since we are only testing backoff behavior here, 262 // we can pass a no-op data model layer implementation. 263 tr, err := transport.New(transport.Options{ 264 ServerCfg: *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address), 265 OnRecvHandler: func(transport.ResourceUpdate) error { return nil }, // No data model layer validation. 266 OnErrorHandler: func(err error) { 267 select { 268 case streamErrCh <- err: 269 default: 270 } 271 }, 272 OnSendHandler: func(*transport.ResourceSendInfo) {}, 273 Backoff: func(int) time.Duration { return time.Duration(0) }, // No backoff. 274 NodeProto: &v3corepb.Node{Id: nodeID}, 275 }) 276 if err != nil { 277 t.Fatalf("Failed to create xDS transport: %v", err) 278 } 279 defer tr.Close() 280 281 // Send a discovery request through the transport. 282 tr.SendRequest(version.V3ListenerURL, []string{resourceName1, resourceName2}) 283 284 // Verify that the initial discovery request matches expectation. 285 var gotReq *v3discoverypb.DiscoveryRequest 286 select { 287 case gotReq = <-streamRequestCh: 288 case <-ctx.Done(): 289 t.Fatalf("Timeout waiting for discovery request on the stream") 290 } 291 wantReq := &v3discoverypb.DiscoveryRequest{ 292 VersionInfo: "", 293 Node: &v3corepb.Node{Id: nodeID}, 294 ResourceNames: []string{resourceName1, resourceName2}, 295 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 296 ResponseNonce: "", 297 } 298 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" { 299 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 300 } 301 302 // Capture the version and nonce from the response. 303 var gotResp *v3discoverypb.DiscoveryResponse 304 select { 305 case gotResp = <-streamResponseCh: 306 case <-ctx.Done(): 307 t.Fatalf("Timeout waiting for discovery response on the stream") 308 } 309 version := gotResp.GetVersionInfo() 310 nonce := gotResp.GetNonce() 311 312 // Verify that the ACK contains the appropriate version and nonce. 313 wantReq.VersionInfo = version 314 wantReq.ResponseNonce = nonce 315 select { 316 case gotReq = <-streamRequestCh: 317 case <-ctx.Done(): 318 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 319 } 320 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" { 321 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 322 } 323 324 // Bring down the management server to simulate a broken stream. 325 lis.Stop() 326 327 // We don't care about the exact error here and it can vary based on which 328 // error gets reported first, the Recv() failure or the new stream creation 329 // failure. So, all we check here is whether we get an error or not. 330 select { 331 case <-streamErrCh: 332 case <-ctx.Done(): 333 t.Fatalf("Timeout waiting for stream error to be reported to the user") 334 } 335 336 // Bring up the connection to the management server. 337 lis.Restart() 338 339 // Verify that the transport creates a new stream and sends out a new 340 // request which contains the previously acked version, but an empty nonce. 341 wantReq.ResponseNonce = "" 342 select { 343 case gotReq = <-streamRequestCh: 344 case <-ctx.Done(): 345 t.Fatalf("Timeout waiting for the discovery request ACK on the stream") 346 } 347 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" { 348 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 349 } 350 } 351 352 // TestTransport_ResourceRequestedBeforeStreamCreation tests the case where a 353 // resource is requested before the transport has a valid stream. Verifies that 354 // the transport sends out the request once it has a valid stream. 355 func (s) TestTransport_ResourceRequestedBeforeStreamCreation(t *testing.T) { 356 // Channels used for verifying different events in the test. 357 streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. 358 359 // Create an xDS management server listening on a local port. 360 l, err := testutils.LocalTCPListener() 361 if err != nil { 362 t.Fatalf("Failed to create a local listener for the xDS management server: %v", err) 363 } 364 lis := testutils.NewRestartableListener(l) 365 streamErr := errors.New("ADS stream error") 366 367 mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{ 368 Listener: lis, 369 370 // Return an error everytime a request is sent on the stream. This 371 // should cause the transport to backoff before attempting to recreate 372 // the stream. 373 OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { 374 select { 375 case streamRequestCh <- req: 376 default: 377 } 378 return streamErr 379 }, 380 }) 381 if err != nil { 382 t.Fatalf("Failed to start xDS management server: %v", err) 383 } 384 defer mgmtServer.Stop() 385 t.Logf("Started xDS management server on %s", lis.Addr().String()) 386 387 // Bring down the management server before creating the transport. This 388 // allows us to test the case where SendRequest() is called when there is no 389 // stream to the management server. 390 lis.Stop() 391 392 // Create a new transport. Since we are only testing backoff behavior here, 393 // we can pass a no-op data model layer implementation. 394 nodeID := uuid.New().String() 395 tr, err := transport.New(transport.Options{ 396 ServerCfg: *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address), 397 OnRecvHandler: func(transport.ResourceUpdate) error { return nil }, // No data model layer validation. 398 OnErrorHandler: func(error) {}, // No stream error handling. 399 OnSendHandler: func(*transport.ResourceSendInfo) {}, // No on send handler 400 Backoff: func(int) time.Duration { return time.Duration(0) }, // No backoff. 401 NodeProto: &v3corepb.Node{Id: nodeID}, 402 }) 403 if err != nil { 404 t.Fatalf("Failed to create xDS transport: %v", err) 405 } 406 defer tr.Close() 407 408 // Send a discovery request through the transport. 409 const resourceName = "resource name" 410 tr.SendRequest(version.V3ListenerURL, []string{resourceName}) 411 412 // Wait until the transport has attempted to connect to the management 413 // server and has seen the connection fail. In this case, since the 414 // connection is down, and the transport creates streams with WaitForReady() 415 // set to true, stream creation will never fail (unless the context 416 // expires), and therefore we cannot rely on the stream error handler. 417 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 418 defer cancel() 419 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 420 if tr.ChannelConnectivityStateForTesting() == connectivity.TransientFailure { 421 break 422 } 423 } 424 425 lis.Restart() 426 427 // Verify that the initial discovery request matches expectation. 428 var gotReq *v3discoverypb.DiscoveryRequest 429 select { 430 case gotReq = <-streamRequestCh: 431 case <-ctx.Done(): 432 t.Fatalf("Timeout waiting for discovery request on the stream") 433 } 434 wantReq := &v3discoverypb.DiscoveryRequest{ 435 VersionInfo: "", 436 Node: &v3corepb.Node{Id: nodeID}, 437 ResourceNames: []string{resourceName}, 438 TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", 439 ResponseNonce: "", 440 } 441 if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { 442 t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) 443 } 444 }