google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/tests/fallback_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient_test 20 21 import ( 22 "context" 23 "fmt" 24 "sync/atomic" 25 "testing" 26 "time" 27 28 "github.com/google/uuid" 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/codes" 31 "google.golang.org/grpc/credentials/insecure" 32 "google.golang.org/grpc/internal" 33 "google.golang.org/grpc/internal/stubserver" 34 "google.golang.org/grpc/internal/testutils" 35 "google.golang.org/grpc/internal/testutils/xds/e2e" 36 "google.golang.org/grpc/internal/xds/bootstrap" 37 "google.golang.org/grpc/peer" 38 "google.golang.org/grpc/resolver" 39 "google.golang.org/grpc/status" 40 "google.golang.org/grpc/xds/internal/xdsclient" 41 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 42 43 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 44 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 45 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 46 v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" 47 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 48 testgrpc "google.golang.org/grpc/interop/grpc_testing" 49 testpb "google.golang.org/grpc/interop/grpc_testing" 50 ) 51 52 // Give the fallback tests additional time to complete because they need to 53 // first identify failed connections before establishing new ones. 54 const defaultFallbackTestTimeout = 2 * defaultTestTimeout 55 56 func waitForRPCsToReachBackend(ctx context.Context, client testgrpc.TestServiceClient, backend string) error { 57 var lastErr error 58 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 59 var peer peer.Peer 60 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { 61 lastErr = err 62 continue 63 } 64 // Veirfy the peer when the RPC succeeds. 65 if peer.Addr.String() == backend { 66 break 67 } 68 } 69 if ctx.Err() != nil { 70 return fmt.Errorf("timeout when waiting for RPCs to reach expected backend. Last error: %v", lastErr) 71 } 72 return nil 73 } 74 75 // Tests fallback on startup where the xDS client is unable to establish a 76 // connection to the primary server. The test verifies that the xDS client falls 77 // back to the secondary server, and when the primary comes back up, it reverts 78 // to it. The test also verifies that when all requested resources are cached 79 // from the primary, fallback is not triggered when the connection goes down. 80 func (s) TestFallback_OnStartup(t *testing.T) { 81 ctx, cancel := context.WithTimeout(context.Background(), defaultFallbackTestTimeout) 82 defer cancel() 83 84 // Create two listeners for the two management servers. The test can 85 // start/stop these listeners and can also get notified when the listener 86 // receives a connection request. 87 primaryWrappedLis := testutils.NewListenerWrapper(t, nil) 88 primaryLis := testutils.NewRestartableListener(primaryWrappedLis) 89 fallbackWrappedLis := testutils.NewListenerWrapper(t, nil) 90 fallbackLis := testutils.NewRestartableListener(fallbackWrappedLis) 91 92 // Start two management servers, primary and fallback, with the above 93 // listeners. 94 primaryManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: primaryLis}) 95 fallbackManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: fallbackLis}) 96 97 // Start two test service backends. 98 backend1 := stubserver.StartTestService(t, nil) 99 defer backend1.Stop() 100 backend2 := stubserver.StartTestService(t, nil) 101 defer backend2.Stop() 102 103 // Configure xDS resource on the primary management server, with a cluster 104 // resource that contains an endpoint for backend1. 105 nodeID := uuid.New().String() 106 const serviceName = "my-service-fallback-xds" 107 resources := e2e.DefaultClientResources(e2e.ResourceParams{ 108 DialTarget: serviceName, 109 NodeID: nodeID, 110 Host: "localhost", 111 Port: testutils.ParsePort(t, backend1.Address), 112 SecLevel: e2e.SecurityLevelNone, 113 }) 114 if err := primaryManagementServer.Update(ctx, resources); err != nil { 115 t.Fatal(err) 116 } 117 118 // Configure xDS resource on the secondary management server, with a cluster 119 // resource that contains an endpoint for backend2. Only the listener 120 // resource has the same name on both servers. 121 fallbackRouteConfigName := "fallback-route-" + serviceName 122 fallbackClusterName := "fallback-cluster-" + serviceName 123 fallbackEndpointsName := "fallback-endpoints-" + serviceName 124 resources = e2e.UpdateOptions{ 125 NodeID: nodeID, 126 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, fallbackRouteConfigName)}, 127 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(fallbackRouteConfigName, serviceName, fallbackClusterName)}, 128 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(fallbackClusterName, fallbackEndpointsName, e2e.SecurityLevelNone)}, 129 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(fallbackEndpointsName, "localhost", []uint32{testutils.ParsePort(t, backend2.Address)})}, 130 } 131 if err := fallbackManagementServer.Update(ctx, resources); err != nil { 132 t.Fatal(err) 133 } 134 135 // Shut both management servers down before starting the gRPC client to 136 // trigger fallback on startup. 137 primaryLis.Stop() 138 fallbackLis.Stop() 139 140 // Generate bootstrap configuration with the above two servers. 141 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 142 Servers: []byte(fmt.Sprintf(`[ 143 { 144 "server_uri": %q, 145 "channel_creds": [{"type": "insecure"}] 146 }, 147 { 148 "server_uri": %q, 149 "channel_creds": [{"type": "insecure"}] 150 }]`, primaryManagementServer.Address, fallbackManagementServer.Address)), 151 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 152 }) 153 if err != nil { 154 t.Fatalf("Failed to create bootstrap file: %v", err) 155 } 156 157 // Create an xDS client with the above bootstrap configuration. 158 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 159 if err != nil { 160 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 161 } 162 pool := xdsclient.NewPool(config) 163 if err != nil { 164 t.Fatalf("Failed to create xDS client: %v", err) 165 } 166 167 // Get the xDS resolver to use the above xDS client. 168 resolverBuilder := internal.NewXDSResolverWithPoolForTesting.(func(*xdsclient.Pool) (resolver.Builder, error)) 169 resolver, err := resolverBuilder(pool) 170 if err != nil { 171 t.Fatalf("Failed to create xDS resolver for testing: %v", err) 172 } 173 174 // Start a gRPC client that uses the above xDS resolver. 175 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(resolver)) 176 if err != nil { 177 t.Fatalf("Failed to create gRPC client: %v", err) 178 } 179 defer cc.Close() 180 cc.Connect() 181 182 // Ensure that a connection is attempted to the primary. 183 if _, err := primaryWrappedLis.NewConnCh.Receive(ctx); err != nil { 184 t.Fatalf("Failure when waiting for a connection to be opened to the primary management server: %v", err) 185 } 186 187 // Ensure that a connection is attempted to the fallback. 188 if _, err := fallbackWrappedLis.NewConnCh.Receive(ctx); err != nil { 189 t.Fatalf("Failure when waiting for a connection to be opened to the primary management server: %v", err) 190 } 191 192 // Make an RPC with a shortish deadline and expect it to fail. 193 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 194 defer sCancel() 195 client := testgrpc.NewTestServiceClient(cc) 196 if _, err := client.EmptyCall(sCtx, &testpb.Empty{}, grpc.WaitForReady(true)); err == nil || status.Code(err) != codes.DeadlineExceeded { 197 t.Fatalf("EmptyCall() = %v, want DeadlineExceeded", err) 198 } 199 200 // Start the fallback server. Ensure that an RPC can succeed, and that it 201 // reaches backend2. 202 fallbackLis.Restart() 203 if err := waitForRPCsToReachBackend(ctx, client, backend2.Address); err != nil { 204 t.Fatal(err) 205 } 206 207 // Start the primary server. It can take a while before the xDS client 208 // notices this, since the ADS stream implementation uses a backoff before 209 // retrying the stream. 210 primaryLis.Restart() 211 212 // Wait for the connection to the secondary to be closed and ensure that an 213 // RPC can succeed, and that it reaches backend1. 214 c, err := fallbackWrappedLis.NewConnCh.Receive(ctx) 215 if err != nil { 216 t.Fatalf("Failure when retrieving the most recent connection to the fallback management server: %v", err) 217 } 218 conn := c.(*testutils.ConnWrapper) 219 if _, err := conn.CloseCh.Receive(ctx); err != nil { 220 t.Fatalf("Connection to fallback server not closed once primary becomes ready: %v", err) 221 } 222 if err := waitForRPCsToReachBackend(ctx, client, backend1.Address); err != nil { 223 t.Fatal(err) 224 } 225 226 // Stop the primary servers. Since all xDS resources were received from the 227 // primary (and RPCs were succeeding to the clusters returned by the 228 // primary), we will not trigger fallback. 229 primaryLis.Stop() 230 sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout) 231 defer sCancel() 232 if _, err := fallbackWrappedLis.NewConnCh.Receive(sCtx); err == nil { 233 t.Fatalf("Fallback attempted when not expected to. There are no uncached resources from the primary server at this point.") 234 } 235 236 // Ensure that RPCs still succeed, and that they use the configuration 237 // received from the primary. 238 if err := waitForRPCsToReachBackend(ctx, client, backend1.Address); err != nil { 239 t.Fatal(err) 240 } 241 } 242 243 // Tests fallback when the primary management server fails during an update. 244 func (s) TestFallback_MidUpdate(t *testing.T) { 245 ctx, cancel := context.WithTimeout(context.Background(), defaultFallbackTestTimeout) 246 defer cancel() 247 248 // Create two listeners for the two management servers. The test can 249 // start/stop these listeners and can also get notified when the listener 250 // receives a connection request. 251 primaryWrappedLis := testutils.NewListenerWrapper(t, nil) 252 primaryLis := testutils.NewRestartableListener(primaryWrappedLis) 253 fallbackWrappedLis := testutils.NewListenerWrapper(t, nil) 254 fallbackLis := testutils.NewRestartableListener(fallbackWrappedLis) 255 256 // This boolean helps with triggering fallback mid update. When this boolean 257 // is set and the below defined cluster resource is requested, the primary 258 // management server shuts down the connection, forcing the client to 259 // fallback to the secondary server. 260 var closeConnOnMidUpdateClusterResource atomic.Bool 261 const ( 262 serviceName = "my-service-fallback-xds" 263 routeConfigName = "route-" + serviceName 264 clusterName = "cluster-" + serviceName 265 endpointsName = "endpoints-" + serviceName 266 midUpdateRouteConfigName = "mid-update-route-" + serviceName 267 midUpdateClusterName = "mid-update-cluster-" + serviceName 268 midUpdateEndpointsName = "mid-update-endpoints-" + serviceName 269 ) 270 271 // Start two management servers, primary and fallback, with the above 272 // listeners. 273 primaryManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 274 Listener: primaryLis, 275 OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { 276 if closeConnOnMidUpdateClusterResource.Load() == false { 277 return nil 278 } 279 if req.GetTypeUrl() != version.V3ClusterURL { 280 return nil 281 } 282 for _, name := range req.GetResourceNames() { 283 if name == midUpdateClusterName { 284 primaryLis.Stop() 285 return fmt.Errorf("closing ADS stream because %q resource was requested", midUpdateClusterName) 286 } 287 } 288 return nil 289 }, 290 AllowResourceSubset: true, 291 }) 292 fallbackManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: fallbackLis}) 293 294 // Start three test service backends. 295 backend1 := stubserver.StartTestService(t, nil) 296 defer backend1.Stop() 297 backend2 := stubserver.StartTestService(t, nil) 298 defer backend2.Stop() 299 backend3 := stubserver.StartTestService(t, nil) 300 defer backend3.Stop() 301 302 // Configure xDS resource on the primary management server, with a cluster 303 // resource that contains an endpoint for backend1. 304 nodeID := uuid.New().String() 305 primaryResources := e2e.UpdateOptions{ 306 NodeID: nodeID, 307 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, routeConfigName)}, 308 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, serviceName, clusterName)}, 309 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, endpointsName, e2e.SecurityLevelNone)}, 310 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(endpointsName, "localhost", []uint32{testutils.ParsePort(t, backend1.Address)})}, 311 } 312 if err := primaryManagementServer.Update(ctx, primaryResources); err != nil { 313 t.Fatal(err) 314 } 315 316 // Configure xDS resource on the secondary management server, with a cluster 317 // resource that contains an endpoint for backend2. Only the listener 318 // resource has the same name on both servers. 319 const ( 320 fallbackRouteConfigName = "fallback-route-" + serviceName 321 fallbackClusterName = "fallback-cluster-" + serviceName 322 fallbackEndpointsName = "fallback-endpoints-" + serviceName 323 ) 324 fallbackResources := e2e.UpdateOptions{ 325 NodeID: nodeID, 326 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, fallbackRouteConfigName)}, 327 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(fallbackRouteConfigName, serviceName, fallbackClusterName)}, 328 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(fallbackClusterName, fallbackEndpointsName, e2e.SecurityLevelNone)}, 329 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(fallbackEndpointsName, "localhost", []uint32{testutils.ParsePort(t, backend2.Address)})}, 330 } 331 if err := fallbackManagementServer.Update(ctx, fallbackResources); err != nil { 332 t.Fatal(err) 333 } 334 335 // Generate bootstrap configuration with the above two servers. 336 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 337 Servers: []byte(fmt.Sprintf(`[ 338 { 339 "server_uri": %q, 340 "channel_creds": [{"type": "insecure"}] 341 }, 342 { 343 "server_uri": %q, 344 "channel_creds": [{"type": "insecure"}] 345 }]`, primaryManagementServer.Address, fallbackManagementServer.Address)), 346 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 347 }) 348 if err != nil { 349 t.Fatalf("Failed to create bootstrap file: %v", err) 350 } 351 352 // Create an xDS client with the above bootstrap configuration. 353 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 354 if err != nil { 355 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 356 } 357 pool := xdsclient.NewPool(config) 358 if err != nil { 359 t.Fatalf("Failed to create xDS client: %v", err) 360 } 361 362 // Get the xDS resolver to use the above xDS client. 363 resolverBuilder := internal.NewXDSResolverWithPoolForTesting.(func(*xdsclient.Pool) (resolver.Builder, error)) 364 resolver, err := resolverBuilder(pool) 365 if err != nil { 366 t.Fatalf("Failed to create xDS resolver for testing: %v", err) 367 } 368 369 // Start a gRPC client that uses the above xDS resolver. 370 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(resolver)) 371 if err != nil { 372 t.Fatalf("Failed to create gRPC client: %v", err) 373 } 374 defer cc.Close() 375 cc.Connect() 376 377 // Ensure that RPCs reach the cluster specified by the primary server and 378 // that no connection is attempted to the fallback server. 379 client := testgrpc.NewTestServiceClient(cc) 380 if err := waitForRPCsToReachBackend(ctx, client, backend1.Address); err != nil { 381 t.Fatal(err) 382 } 383 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 384 defer sCancel() 385 if _, err := fallbackWrappedLis.NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { 386 t.Fatalf("Connection attempt made to fallback server when none expected: %v", err) 387 } 388 389 // Instruct the primary server to close the connection if below defined 390 // cluster resource is requested. 391 closeConnOnMidUpdateClusterResource.Store(true) 392 393 // Update the listener resource on the primary server to point to a new 394 // route configuration that points to a new cluster that points to a new 395 // endpoints resource that contains backend3. 396 primaryResources = e2e.UpdateOptions{ 397 NodeID: nodeID, 398 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, midUpdateRouteConfigName)}, 399 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(midUpdateRouteConfigName, serviceName, midUpdateClusterName)}, 400 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(midUpdateClusterName, midUpdateEndpointsName, e2e.SecurityLevelNone)}, 401 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(midUpdateEndpointsName, "localhost", []uint32{testutils.ParsePort(t, backend3.Address)})}, 402 } 403 if err := primaryManagementServer.Update(ctx, primaryResources); err != nil { 404 t.Fatal(err) 405 } 406 407 // Ensure that a connection is attempted to the fallback (because both 408 // conditions mentioned for fallback in A71 are satisfied: connectivity 409 // failure and a watcher for an uncached resource), and that RPCs are 410 // routed to the cluster returned by the fallback server. 411 c, err := fallbackWrappedLis.NewConnCh.Receive(ctx) 412 if err != nil { 413 t.Fatalf("Failure when waiting for a connection to be opened to the fallback management server: %v", err) 414 } 415 fallbackConn := c.(*testutils.ConnWrapper) 416 if err := waitForRPCsToReachBackend(ctx, client, backend2.Address); err != nil { 417 t.Fatal(err) 418 } 419 420 // Set the primary management server to not close the connection anymore if 421 // the mid-update cluster resource is requested, and get it to start serving 422 // again. 423 closeConnOnMidUpdateClusterResource.Store(false) 424 primaryLis.Restart() 425 426 // A new snapshot, with the same resources, is pushed to the management 427 // server to get it to respond for already requested resource names. 428 if err := primaryManagementServer.Update(ctx, primaryResources); err != nil { 429 t.Fatal(err) 430 } 431 432 // Ensure that RPCs reach the backend pointed to by the new cluster. 433 if err := waitForRPCsToReachBackend(ctx, client, backend3.Address); err != nil { 434 t.Fatal(err) 435 } 436 437 // Wait for the connection to the secondary to be closed since we have 438 // reverted back to the primary. 439 if _, err := fallbackConn.CloseCh.Receive(ctx); err != nil { 440 t.Fatalf("Connection to fallback server not closed once primary becomes ready: %v", err) 441 } 442 } 443 444 // Tests fallback when the primary management server fails during startup. 445 func (s) TestFallback_MidStartup(t *testing.T) { 446 ctx, cancel := context.WithTimeout(context.Background(), defaultFallbackTestTimeout) 447 defer cancel() 448 449 // Create two listeners for the two management servers. The test can 450 // start/stop these listeners and can also get notified when the listener 451 // receives a connection request. 452 primaryWrappedLis := testutils.NewListenerWrapper(t, nil) 453 primaryLis := testutils.NewRestartableListener(primaryWrappedLis) 454 fallbackWrappedLis := testutils.NewListenerWrapper(t, nil) 455 fallbackLis := testutils.NewRestartableListener(fallbackWrappedLis) 456 457 // This boolean helps with triggering fallback during startup. When this 458 // boolean is set and a cluster resource is requested, the primary 459 // management server shuts down the connection, forcing the client to 460 // fallback to the secondary server. 461 var closeConnOnClusterResource atomic.Bool 462 closeConnOnClusterResource.Store(true) 463 464 // Start two management servers, primary and fallback, with the above 465 // listeners. 466 primaryManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 467 Listener: primaryLis, 468 OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error { 469 if closeConnOnClusterResource.Load() == false { 470 return nil 471 } 472 if req.GetTypeUrl() != version.V3ClusterURL { 473 return nil 474 } 475 primaryLis.Stop() 476 return fmt.Errorf("closing ADS stream because cluster resource was requested") 477 }, 478 AllowResourceSubset: true, 479 }) 480 fallbackManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: fallbackLis}) 481 482 // Start two test service backends. 483 backend1 := stubserver.StartTestService(t, nil) 484 defer backend1.Stop() 485 backend2 := stubserver.StartTestService(t, nil) 486 defer backend2.Stop() 487 488 // Configure xDS resource on the primary management server, with a cluster 489 // resource that contains an endpoint for backend1. 490 nodeID := uuid.New().String() 491 const serviceName = "my-service-fallback-xds" 492 primaryResources := e2e.DefaultClientResources(e2e.ResourceParams{ 493 DialTarget: serviceName, 494 NodeID: nodeID, 495 Host: "localhost", 496 Port: testutils.ParsePort(t, backend1.Address), 497 SecLevel: e2e.SecurityLevelNone, 498 }) 499 if err := primaryManagementServer.Update(ctx, primaryResources); err != nil { 500 t.Fatal(err) 501 } 502 503 // Configure xDS resource on the secondary management server, with a cluster 504 // resource that contains an endpoint for backend2. Only the listener 505 // resource has the same name on both servers. 506 fallbackRouteConfigName := "fallback-route-" + serviceName 507 fallbackClusterName := "fallback-cluster-" + serviceName 508 fallbackEndpointsName := "fallback-endpoints-" + serviceName 509 fallbackResources := e2e.UpdateOptions{ 510 NodeID: nodeID, 511 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, fallbackRouteConfigName)}, 512 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(fallbackRouteConfigName, serviceName, fallbackClusterName)}, 513 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(fallbackClusterName, fallbackEndpointsName, e2e.SecurityLevelNone)}, 514 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(fallbackEndpointsName, "localhost", []uint32{testutils.ParsePort(t, backend2.Address)})}, 515 } 516 if err := fallbackManagementServer.Update(ctx, fallbackResources); err != nil { 517 t.Fatal(err) 518 } 519 520 // Generate bootstrap configuration with the above two servers. 521 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 522 Servers: []byte(fmt.Sprintf(`[ 523 { 524 "server_uri": %q, 525 "channel_creds": [{"type": "insecure"}] 526 }, 527 { 528 "server_uri": %q, 529 "channel_creds": [{"type": "insecure"}] 530 }]`, primaryManagementServer.Address, fallbackManagementServer.Address)), 531 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 532 }) 533 if err != nil { 534 t.Fatalf("Failed to create bootstrap file: %v", err) 535 } 536 537 // Create an xDS client with the above bootstrap configuration. 538 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 539 if err != nil { 540 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 541 } 542 pool := xdsclient.NewPool(config) 543 if err != nil { 544 t.Fatalf("Failed to create xDS client: %v", err) 545 } 546 547 // Get the xDS resolver to use the above xDS client. 548 resolverBuilder := internal.NewXDSResolverWithPoolForTesting.(func(*xdsclient.Pool) (resolver.Builder, error)) 549 resolver, err := resolverBuilder(pool) 550 if err != nil { 551 t.Fatalf("Failed to create xDS resolver for testing: %v", err) 552 } 553 554 // Start a gRPC client that uses the above xDS resolver. 555 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(resolver)) 556 if err != nil { 557 t.Fatalf("Failed to create gRPC client: %v", err) 558 } 559 defer cc.Close() 560 cc.Connect() 561 562 // Ensure that a connection is attempted to the primary. 563 if _, err := primaryWrappedLis.NewConnCh.Receive(ctx); err != nil { 564 t.Fatalf("Failure when waiting for a connection to be opened to the primary management server: %v", err) 565 } 566 567 // Ensure that a connection is attempted to the fallback. 568 c, err := fallbackWrappedLis.NewConnCh.Receive(ctx) 569 if err != nil { 570 t.Fatalf("Failure when waiting for a connection to be opened to the secondary management server: %v", err) 571 } 572 fallbackConn := c.(*testutils.ConnWrapper) 573 574 // Ensure that RPCs are routed to the cluster returned by the fallback 575 // management server. 576 client := testgrpc.NewTestServiceClient(cc) 577 if err := waitForRPCsToReachBackend(ctx, client, backend2.Address); err != nil { 578 t.Fatal(err) 579 } 580 581 // Get the primary management server to no longer close the connection when 582 // the cluster resource is requested. 583 closeConnOnClusterResource.Store(false) 584 primaryLis.Restart() 585 586 // A new snapshot, with the same resources, is pushed to the management 587 // server to get it to respond for already requested resource names. 588 if err := primaryManagementServer.Update(ctx, primaryResources); err != nil { 589 t.Fatal(err) 590 } 591 592 // Ensure that RPCs are routed to the cluster returned by the primary 593 // management server. 594 if err := waitForRPCsToReachBackend(ctx, client, backend1.Address); err != nil { 595 t.Fatal(err) 596 } 597 598 // Wait for the connection to the secondary to be closed since we have 599 // reverted back to the primary. 600 if _, err := fallbackConn.CloseCh.Receive(ctx); err != nil { 601 t.Fatalf("Connection to fallback server not closed once primary becomes ready: %v", err) 602 } 603 } 604 605 // Tests that RPCs succeed at startup when the primary management server is 606 // down, but the secondary is available. 607 func (s) TestFallback_OnStartup_RPCSuccess(t *testing.T) { 608 ctx, cancel := context.WithTimeout(context.Background(), defaultFallbackTestTimeout) 609 defer cancel() 610 611 // Create two listeners for the two management servers. The test can 612 // start/stop these listeners. 613 l, err := testutils.LocalTCPListener() 614 if err != nil { 615 t.Fatalf("Failed to create listener: %v", err) 616 } 617 primaryLis := testutils.NewRestartableListener(l) 618 l, err = testutils.LocalTCPListener() 619 if err != nil { 620 t.Fatalf("Failed to create listener: %v", err) 621 } 622 fallbackLis := testutils.NewRestartableListener(l) 623 624 // Start two management servers, primary and fallback, with the above 625 // listeners. 626 primaryManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: primaryLis}) 627 fallbackManagementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: fallbackLis}) 628 629 // Start two test service backends. 630 backend1 := stubserver.StartTestService(t, nil) 631 defer backend1.Stop() 632 backend2 := stubserver.StartTestService(t, nil) 633 defer backend2.Stop() 634 635 // Configure xDS resource on the primary management server, with a cluster 636 // resource that contains an endpoint for backend1. 637 nodeID := uuid.New().String() 638 const serviceName = "my-service-fallback-xds" 639 resources := e2e.DefaultClientResources(e2e.ResourceParams{ 640 DialTarget: serviceName, 641 NodeID: nodeID, 642 Host: "localhost", 643 Port: testutils.ParsePort(t, backend1.Address), 644 SecLevel: e2e.SecurityLevelNone, 645 }) 646 if err := primaryManagementServer.Update(ctx, resources); err != nil { 647 t.Fatal(err) 648 } 649 650 // Configure xDS resource on the secondary management server, with a cluster 651 // resource that contains an endpoint for backend2. Only the listener 652 // resource has the same name on both servers. 653 fallbackRouteConfigName := "fallback-route-" + serviceName 654 fallbackClusterName := "fallback-cluster-" + serviceName 655 fallbackEndpointsName := "fallback-endpoints-" + serviceName 656 resources = e2e.UpdateOptions{ 657 NodeID: nodeID, 658 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, fallbackRouteConfigName)}, 659 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(fallbackRouteConfigName, serviceName, fallbackClusterName)}, 660 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(fallbackClusterName, fallbackEndpointsName, e2e.SecurityLevelNone)}, 661 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(fallbackEndpointsName, "localhost", []uint32{testutils.ParsePort(t, backend2.Address)})}, 662 } 663 if err := fallbackManagementServer.Update(ctx, resources); err != nil { 664 t.Fatal(err) 665 } 666 667 // Shutdown the primary management server before starting the gRPC client to 668 // trigger fallback on startup. 669 primaryLis.Stop() 670 671 // Generate bootstrap configuration with the above two servers. 672 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 673 Servers: []byte(fmt.Sprintf(`[ 674 { 675 "server_uri": %q, 676 "channel_creds": [{"type": "insecure"}] 677 }, 678 { 679 "server_uri": %q, 680 "channel_creds": [{"type": "insecure"}] 681 }]`, primaryManagementServer.Address, fallbackManagementServer.Address)), 682 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 683 }) 684 if err != nil { 685 t.Fatalf("Failed to create bootstrap file: %v", err) 686 } 687 688 // Create an xDS client with the above bootstrap configuration. 689 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 690 if err != nil { 691 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 692 } 693 pool := xdsclient.NewPool(config) 694 if err != nil { 695 t.Fatalf("Failed to create xDS client: %v", err) 696 } 697 698 // Get the xDS resolver to use the above xDS client. 699 resolverBuilder := internal.NewXDSResolverWithPoolForTesting.(func(*xdsclient.Pool) (resolver.Builder, error)) 700 resolver, err := resolverBuilder(pool) 701 if err != nil { 702 t.Fatalf("Failed to create xDS resolver for testing: %v", err) 703 } 704 705 // Start a gRPC client that uses the above xDS resolver. 706 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(resolver)) 707 if err != nil { 708 t.Fatalf("Failed to create gRPC client: %v", err) 709 } 710 defer cc.Close() 711 712 // Make an RPC (without the `wait_for_ready` call option) and expect it to 713 // succeed since the fallback management server is up and running. 714 client := testgrpc.NewTestServiceClient(cc) 715 var peer peer.Peer 716 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { 717 t.Fatalf("EmptyCall() failed: %v", err) 718 } 719 if got, want := peer.Addr.String(), backend2.Address; got != want { 720 t.Fatalf("Unexpected peer address: got %q, want %q", got, want) 721 } 722 723 // Start the primary server. It can take a while before the xDS client 724 // notices this, since the ADS stream implementation uses a backoff before 725 // retrying the stream. 726 primaryLis.Restart() 727 if err := waitForRPCsToReachBackend(ctx, client, backend1.Address); err != nil { 728 t.Fatal(err) 729 } 730 }