google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go (about) 1 /* 2 * Copyright 2023 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package e2e_test 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "strings" 24 "testing" 25 "time" 26 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/uuid" 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/balancer" 31 "google.golang.org/grpc/balancer/roundrobin" 32 "google.golang.org/grpc/codes" 33 "google.golang.org/grpc/connectivity" 34 "google.golang.org/grpc/credentials/insecure" 35 "google.golang.org/grpc/internal" 36 "google.golang.org/grpc/internal/balancer/stub" 37 iserviceconfig "google.golang.org/grpc/internal/serviceconfig" 38 "google.golang.org/grpc/internal/stubserver" 39 "google.golang.org/grpc/internal/testutils" 40 "google.golang.org/grpc/internal/testutils/xds/e2e" 41 "google.golang.org/grpc/internal/xds/bootstrap" 42 "google.golang.org/grpc/resolver" 43 "google.golang.org/grpc/resolver/manual" 44 "google.golang.org/grpc/serviceconfig" 45 "google.golang.org/grpc/status" 46 xdsinternal "google.golang.org/grpc/xds/internal" 47 "google.golang.org/grpc/xds/internal/balancer/clusterimpl" 48 "google.golang.org/grpc/xds/internal/balancer/outlierdetection" 49 "google.golang.org/grpc/xds/internal/balancer/priority" 50 "google.golang.org/grpc/xds/internal/balancer/wrrlocality" 51 "google.golang.org/grpc/xds/internal/xdsclient" 52 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" 53 "google.golang.org/protobuf/types/known/durationpb" 54 "google.golang.org/protobuf/types/known/wrapperspb" 55 56 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 57 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 58 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 59 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 60 testgrpc "google.golang.org/grpc/interop/grpc_testing" 61 testpb "google.golang.org/grpc/interop/grpc_testing" 62 63 _ "google.golang.org/grpc/xds/internal/balancer/cdsbalancer" // Register the "cds_experimental" LB policy. 64 ) 65 66 // setupAndDial performs common setup across all tests 67 // 68 // - creates an xDS client with the passed in bootstrap contents 69 // - creates a manual resolver that configures `cds_experimental` as the 70 // top-level LB policy. 71 // - creates a ClientConn to talk to the test backends 72 // 73 // Returns a function to close the ClientConn and the xDS client. 74 func setupAndDial(t *testing.T, bootstrapContents []byte) (*grpc.ClientConn, func()) { 75 t.Helper() 76 77 // Create an xDS client for use by the cluster_resolver LB policy. 78 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 79 if err != nil { 80 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 81 } 82 pool := xdsclient.NewPool(config) 83 xdsC, xdsClose, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ 84 Name: t.Name(), 85 }) 86 if err != nil { 87 t.Fatalf("Failed to create xDS client: %v", err) 88 } 89 90 // Create a manual resolver and push a service config specifying the use of 91 // the cds LB policy as the top-level LB policy, and a corresponding config 92 // with a single cluster. 93 r := manual.NewBuilderWithScheme("whatever") 94 jsonSC := fmt.Sprintf(`{ 95 "loadBalancingConfig":[{ 96 "cds_experimental":{ 97 "cluster": "%s" 98 } 99 }] 100 }`, clusterName) 101 scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC) 102 r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsC)) 103 104 // Create a ClientConn and make a successful RPC. 105 cc, err := grpc.NewClient(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) 106 if err != nil { 107 xdsClose() 108 t.Fatalf("grpc.NewClient() failed: %v", err) 109 } 110 cc.Connect() 111 return cc, func() { 112 xdsClose() 113 cc.Close() 114 } 115 } 116 117 // TestErrorFromParentLB_ConnectionError tests the case where the parent of the 118 // clusterresolver LB policy sends it a connection error. The parent policy, 119 // CDS LB policy, sends a connection error when the ADS stream to the management 120 // server breaks. The test verifies that there is no perceivable effect because 121 // of this connection error, and that RPCs continue to work (because the LB 122 // policies are expected to use previously received xDS resources). 123 func (s) TestErrorFromParentLB_ConnectionError(t *testing.T) { 124 // Create a listener to be used by the management server. The test will 125 // close this listener to simulate ADS stream breakage. 126 lis, err := testutils.LocalTCPListener() 127 if err != nil { 128 t.Fatalf("testutils.LocalTCPListener() failed: %v", err) 129 } 130 131 // Start an xDS management server with the above restartable listener, and 132 // push a channel when the stream is closed. 133 streamClosedCh := make(chan struct{}, 1) 134 managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 135 Listener: lis, 136 OnStreamClosed: func(int64, *v3corepb.Node) { 137 select { 138 case streamClosedCh <- struct{}{}: 139 default: 140 } 141 }, 142 }) 143 144 // Create bootstrap configuration pointing to the above management server. 145 nodeID := uuid.New().String() 146 bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address) 147 148 server := stubserver.StartTestService(t, nil) 149 defer server.Stop() 150 151 // Configure cluster and endpoints resources in the management server. 152 resources := e2e.UpdateOptions{ 153 NodeID: nodeID, 154 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, edsServiceName, e2e.SecurityLevelNone)}, 155 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 156 SkipValidation: true, 157 } 158 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 159 defer cancel() 160 if err := managementServer.Update(ctx, resources); err != nil { 161 t.Fatal(err) 162 } 163 164 // Create xDS client, configure cds_experimental LB policy with a manual 165 // resolver, and dial the test backends. 166 cc, cleanup := setupAndDial(t, bootstrapContents) 167 defer cleanup() 168 169 client := testgrpc.NewTestServiceClient(cc) 170 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 171 t.Fatalf("EmptyCall() failed: %v", err) 172 } 173 174 // Close the listener and ensure that the ADS stream breaks. 175 lis.Close() 176 select { 177 case <-ctx.Done(): 178 t.Fatal("Timeout when waiting for ADS stream to close") 179 default: 180 } 181 182 // Ensure that RPCs continue to succeed for the next second. 183 for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { 184 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 185 t.Fatalf("EmptyCall() failed: %v", err) 186 } 187 } 188 } 189 190 // TestErrorFromParentLB_ResourceNotFound tests the case where the parent of the 191 // clusterresolver LB policy sends it a resource-not-found error. The parent 192 // policy, CDS LB policy, sends a resource-not-found error when the cluster 193 // resource associated with these LB policies is removed by the management 194 // server. The test verifies that the associated EDS is canceled and RPCs fail. 195 // It also ensures that when the Cluster resource is added back, the EDS 196 // resource is re-requested and RPCs being to succeed. 197 func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { 198 // Start an xDS management server that uses a couple of channels to 199 // notify the test about the following events: 200 // - an EDS requested with the expected resource name is requested 201 // - EDS resource is unrequested, i.e, an EDS request with no resource name 202 // is received, which indicates that we are no longer interested in that 203 // resource. 204 edsResourceRequestedCh := make(chan struct{}, 1) 205 edsResourceCanceledCh := make(chan struct{}, 1) 206 managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 207 OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { 208 if req.GetTypeUrl() == version.V3EndpointsURL { 209 switch len(req.GetResourceNames()) { 210 case 0: 211 select { 212 case edsResourceCanceledCh <- struct{}{}: 213 default: 214 } 215 case 1: 216 if req.GetResourceNames()[0] == edsServiceName { 217 select { 218 case edsResourceRequestedCh <- struct{}{}: 219 default: 220 } 221 } 222 default: 223 t.Errorf("Unexpected number of resources, %d, in an EDS request", len(req.GetResourceNames())) 224 } 225 } 226 return nil 227 }, 228 }) 229 230 // Create bootstrap configuration pointing to the above management server. 231 nodeID := uuid.New().String() 232 bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address) 233 234 server := stubserver.StartTestService(t, nil) 235 defer server.Stop() 236 237 // Configure cluster and endpoints resources in the management server. 238 resources := e2e.UpdateOptions{ 239 NodeID: nodeID, 240 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, edsServiceName, e2e.SecurityLevelNone)}, 241 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 242 SkipValidation: true, 243 } 244 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 245 defer cancel() 246 if err := managementServer.Update(ctx, resources); err != nil { 247 t.Fatal(err) 248 } 249 250 // Create xDS client, configure cds_experimental LB policy with a manual 251 // resolver, and dial the test backends. 252 cc, cleanup := setupAndDial(t, bootstrapContents) 253 defer cleanup() 254 255 // Wait for the EDS resource to be requested. 256 select { 257 case <-ctx.Done(): 258 t.Fatal("Timeout when waiting for EDS resource to be requested") 259 case <-edsResourceRequestedCh: 260 } 261 262 // Ensure that a successful RPC can be made. 263 client := testgrpc.NewTestServiceClient(cc) 264 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 265 t.Fatalf("EmptyCall() failed: %v", err) 266 } 267 268 // Delete the cluster resource from the management server. 269 resources.Clusters = nil 270 if err := managementServer.Update(ctx, resources); err != nil { 271 t.Fatal(err) 272 } 273 274 // Wait for the EDS resource to be not requested anymore. 275 select { 276 case <-ctx.Done(): 277 t.Fatal("Timeout when waiting for EDS resource to not requested") 278 case <-edsResourceCanceledCh: 279 } 280 281 // Ensure that RPCs start to fail with expected error. 282 wantErr := fmt.Sprintf("cluster %q not found", clusterName) 283 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 284 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 285 defer sCancel() 286 _, err := client.EmptyCall(sCtx, &testpb.Empty{}) 287 if status.Code(err) == codes.Unavailable && strings.Contains(err.Error(), wantErr) { 288 break 289 } 290 if err != nil { 291 t.Logf("EmptyCall failed: %v", err) 292 } 293 } 294 if ctx.Err() != nil { 295 t.Fatalf("RPCs did not fail after removal of Cluster resource") 296 } 297 298 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 299 300 // Configure cluster and endpoints resources in the management server. 301 resources = e2e.UpdateOptions{ 302 NodeID: nodeID, 303 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, edsServiceName, e2e.SecurityLevelNone)}, 304 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 305 SkipValidation: true, 306 } 307 if err := managementServer.Update(ctx, resources); err != nil { 308 t.Fatal(err) 309 } 310 311 // Wait for the EDS resource to be requested again. 312 select { 313 case <-ctx.Done(): 314 t.Fatal("Timeout when waiting for EDS resource to be requested") 315 case <-edsResourceRequestedCh: 316 } 317 318 // Ensure that a successful RPC can be made. 319 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 320 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 321 defer sCancel() 322 if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); err != nil { 323 t.Logf("EmptyCall failed: %v", err) 324 continue 325 } 326 break 327 } 328 if ctx.Err() != nil { 329 t.Fatalf("RPCs did not fail after removal of Cluster resource") 330 } 331 } 332 333 // Test verifies that when the received Cluster resource contains outlier 334 // detection configuration, the LB config pushed to the child policy contains 335 // the appropriate configuration for the outlier detection LB policy. 336 func (s) TestOutlierDetectionConfigPropagationToChildPolicy(t *testing.T) { 337 // Unregister the priority balancer builder for the duration of this test, 338 // and register a policy under the same name that makes the LB config 339 // pushed to it available to the test. 340 priorityBuilder := balancer.Get(priority.Name) 341 internal.BalancerUnregister(priorityBuilder.Name()) 342 lbCfgCh := make(chan serviceconfig.LoadBalancingConfig, 1) 343 stub.Register(priority.Name, stub.BalancerFuncs{ 344 Init: func(bd *stub.BalancerData) { 345 bd.Data = priorityBuilder.Build(bd.ClientConn, bd.BuildOptions) 346 }, 347 ParseConfig: func(lbCfg json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 348 return priorityBuilder.(balancer.ConfigParser).ParseConfig(lbCfg) 349 }, 350 UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { 351 select { 352 case lbCfgCh <- ccs.BalancerConfig: 353 default: 354 } 355 bal := bd.Data.(balancer.Balancer) 356 return bal.UpdateClientConnState(ccs) 357 }, 358 Close: func(bd *stub.BalancerData) { 359 bal := bd.Data.(balancer.Balancer) 360 bal.Close() 361 }, 362 }) 363 defer balancer.Register(priorityBuilder) 364 365 managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) 366 367 // Create bootstrap configuration pointing to the above management server. 368 nodeID := uuid.New().String() 369 bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address) 370 371 server := stubserver.StartTestService(t, nil) 372 defer server.Stop() 373 374 // Configure cluster and endpoints resources in the management server. 375 cluster := e2e.DefaultCluster(clusterName, edsServiceName, e2e.SecurityLevelNone) 376 cluster.OutlierDetection = &v3clusterpb.OutlierDetection{ 377 Interval: durationpb.New(10 * time.Second), 378 BaseEjectionTime: durationpb.New(30 * time.Second), 379 MaxEjectionTime: durationpb.New(300 * time.Second), 380 MaxEjectionPercent: wrapperspb.UInt32(10), 381 SuccessRateStdevFactor: wrapperspb.UInt32(2000), 382 EnforcingSuccessRate: wrapperspb.UInt32(50), 383 SuccessRateMinimumHosts: wrapperspb.UInt32(10), 384 SuccessRateRequestVolume: wrapperspb.UInt32(50), 385 } 386 resources := e2e.UpdateOptions{ 387 NodeID: nodeID, 388 Clusters: []*v3clusterpb.Cluster{cluster}, 389 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 390 SkipValidation: true, 391 } 392 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 393 defer cancel() 394 if err := managementServer.Update(ctx, resources); err != nil { 395 t.Fatal(err) 396 } 397 398 // Create xDS client, configure cds_experimental LB policy with a manual 399 // resolver, and dial the test backends. 400 _, cleanup := setupAndDial(t, bootstrapContents) 401 defer cleanup() 402 403 // The priority configuration generated should have Outlier Detection as a 404 // direct child due to Outlier Detection being turned on. 405 wantCfg := &priority.LBConfig{ 406 Children: map[string]*priority.Child{ 407 "priority-0-0": { 408 Config: &iserviceconfig.BalancerConfig{ 409 Name: outlierdetection.Name, 410 Config: &outlierdetection.LBConfig{ 411 Interval: iserviceconfig.Duration(10 * time.Second), // default interval 412 BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), 413 MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), 414 MaxEjectionPercent: 10, 415 SuccessRateEjection: &outlierdetection.SuccessRateEjection{ 416 StdevFactor: 2000, 417 EnforcementPercentage: 50, 418 MinimumHosts: 10, 419 RequestVolume: 50, 420 }, 421 ChildPolicy: &iserviceconfig.BalancerConfig{ 422 Name: clusterimpl.Name, 423 Config: &clusterimpl.LBConfig{ 424 Cluster: clusterName, 425 EDSServiceName: edsServiceName, 426 TelemetryLabels: xdsinternal.UnknownCSMLabels, 427 ChildPolicy: &iserviceconfig.BalancerConfig{ 428 Name: wrrlocality.Name, 429 Config: &wrrlocality.LBConfig{ 430 ChildPolicy: &iserviceconfig.BalancerConfig{ 431 Name: roundrobin.Name, 432 }, 433 }, 434 }, 435 }, 436 }, 437 }, 438 }, 439 IgnoreReresolutionRequests: true, 440 }, 441 }, 442 Priorities: []string{"priority-0-0"}, 443 } 444 445 select { 446 case lbCfg := <-lbCfgCh: 447 gotCfg := lbCfg.(*priority.LBConfig) 448 if diff := cmp.Diff(wantCfg, gotCfg); diff != "" { 449 t.Fatalf("Child policy received unexpected diff in config (-want +got):\n%s", diff) 450 } 451 case <-ctx.Done(): 452 t.Fatalf("Timeout when waiting for child policy to receive its configuration") 453 } 454 }