google.golang.org/grpc@v1.72.2/xds/internal/balancer/cdsbalancer/aggregate_cluster_test.go (about) 1 /* 2 * Copyright 2021 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cdsbalancer 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "strings" 24 "testing" 25 "time" 26 27 "google.golang.org/grpc" 28 "google.golang.org/grpc/codes" 29 "google.golang.org/grpc/connectivity" 30 "google.golang.org/grpc/internal/pretty" 31 "google.golang.org/grpc/internal/stubserver" 32 "google.golang.org/grpc/internal/testutils" 33 "google.golang.org/grpc/internal/testutils/xds/e2e" 34 "google.golang.org/grpc/serviceconfig" 35 "google.golang.org/grpc/status" 36 "google.golang.org/grpc/xds/internal" 37 "google.golang.org/grpc/xds/internal/balancer/clusterresolver" 38 39 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 40 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 41 testgrpc "google.golang.org/grpc/interop/grpc_testing" 42 testpb "google.golang.org/grpc/interop/grpc_testing" 43 ) 44 45 // makeAggregateClusterResource returns an aggregate cluster resource with the 46 // given name and list of child names. 47 func makeAggregateClusterResource(name string, childNames []string) *v3clusterpb.Cluster { 48 return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 49 ClusterName: name, 50 Type: e2e.ClusterTypeAggregate, 51 ChildNames: childNames, 52 }) 53 } 54 55 // makeLogicalDNSClusterResource returns a LOGICAL_DNS cluster resource with the 56 // given name and given DNS host and port. 57 func makeLogicalDNSClusterResource(name, dnsHost string, dnsPort uint32) *v3clusterpb.Cluster { 58 return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 59 ClusterName: name, 60 Type: e2e.ClusterTypeLogicalDNS, 61 DNSHostName: dnsHost, 62 DNSPort: dnsPort, 63 }) 64 } 65 66 // Tests the case where the cluster resource requested by the cds LB policy is a 67 // leaf cluster. The management server sends two updates for the same leaf 68 // cluster resource. The test verifies that the load balancing configuration 69 // pushed to the cluster_resolver LB policy contains the expected discovery 70 // mechanism corresponding to the leaf cluster, on both occasions. 71 func (s) TestAggregateClusterSuccess_LeafNode(t *testing.T) { 72 tests := []struct { 73 name string 74 firstClusterResource *v3clusterpb.Cluster 75 secondClusterResource *v3clusterpb.Cluster 76 wantFirstChildCfg serviceconfig.LoadBalancingConfig 77 wantSecondChildCfg serviceconfig.LoadBalancingConfig 78 }{ 79 { 80 name: "eds", 81 firstClusterResource: e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone), 82 secondClusterResource: e2e.DefaultCluster(clusterName, serviceName+"-new", e2e.SecurityLevelNone), 83 wantFirstChildCfg: &clusterresolver.LBConfig{ 84 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 85 Cluster: clusterName, 86 Type: clusterresolver.DiscoveryMechanismTypeEDS, 87 EDSServiceName: serviceName, 88 OutlierDetection: json.RawMessage(`{}`), 89 TelemetryLabels: internal.UnknownCSMLabels, 90 }}, 91 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 92 }, 93 wantSecondChildCfg: &clusterresolver.LBConfig{ 94 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 95 Cluster: clusterName, 96 Type: clusterresolver.DiscoveryMechanismTypeEDS, 97 EDSServiceName: serviceName + "-new", 98 OutlierDetection: json.RawMessage(`{}`), 99 TelemetryLabels: internal.UnknownCSMLabels, 100 }}, 101 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 102 }, 103 }, 104 { 105 name: "dns", 106 firstClusterResource: makeLogicalDNSClusterResource(clusterName, "dns_host", uint32(8080)), 107 secondClusterResource: makeLogicalDNSClusterResource(clusterName, "dns_host_new", uint32(8080)), 108 wantFirstChildCfg: &clusterresolver.LBConfig{ 109 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 110 Cluster: clusterName, 111 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 112 DNSHostname: "dns_host:8080", 113 OutlierDetection: json.RawMessage(`{}`), 114 TelemetryLabels: internal.UnknownCSMLabels, 115 }}, 116 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 117 }, 118 wantSecondChildCfg: &clusterresolver.LBConfig{ 119 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 120 Cluster: clusterName, 121 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 122 DNSHostname: "dns_host_new:8080", 123 OutlierDetection: json.RawMessage(`{}`), 124 TelemetryLabels: internal.UnknownCSMLabels, 125 }}, 126 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 127 }, 128 }, 129 } 130 131 for _, test := range tests { 132 t.Run(test.name, func(t *testing.T) { 133 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 134 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 135 136 // Push the first cluster resource through the management server and 137 // verify the configuration pushed to the child policy. 138 resources := e2e.UpdateOptions{ 139 NodeID: nodeID, 140 Clusters: []*v3clusterpb.Cluster{test.firstClusterResource}, 141 SkipValidation: true, 142 } 143 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 144 defer cancel() 145 if err := mgmtServer.Update(ctx, resources); err != nil { 146 t.Fatal(err) 147 } 148 if err := compareLoadBalancingConfig(ctx, lbCfgCh, test.wantFirstChildCfg); err != nil { 149 t.Fatal(err) 150 } 151 152 // Push the second cluster resource through the management server and 153 // verify the configuration pushed to the child policy. 154 resources.Clusters[0] = test.secondClusterResource 155 if err := mgmtServer.Update(ctx, resources); err != nil { 156 t.Fatal(err) 157 } 158 if err := compareLoadBalancingConfig(ctx, lbCfgCh, test.wantSecondChildCfg); err != nil { 159 t.Fatal(err) 160 } 161 }) 162 } 163 } 164 165 // Tests the case where the cluster resource requested by the cds LB policy is 166 // an aggregate cluster root pointing to two child clusters, one of type EDS and 167 // the other of type LogicalDNS. The test verifies that load balancing 168 // configuration is pushed to the cluster_resolver LB policy only when all child 169 // clusters are resolved and it also verifies that the pushed configuration 170 // contains the expected discovery mechanisms. The test then updates the 171 // aggregate cluster to point to two child clusters, the same leaf cluster of 172 // type EDS and a different leaf cluster of type LogicalDNS and verifies that 173 // the load balancing configuration pushed to the cluster_resolver LB policy 174 // contains the expected discovery mechanisms. 175 func (s) TestAggregateClusterSuccess_ThenUpdateChildClusters(t *testing.T) { 176 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 177 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 178 179 // Configure the management server with the aggregate cluster resource 180 // pointing to two child clusters, one EDS and one LogicalDNS. Include the 181 // resource corresponding to the EDS cluster here, but don't include 182 // resource corresponding to the LogicalDNS cluster yet. 183 resources := e2e.UpdateOptions{ 184 NodeID: nodeID, 185 Clusters: []*v3clusterpb.Cluster{ 186 makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), 187 e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone), 188 }, 189 SkipValidation: true, 190 } 191 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 192 defer cancel() 193 if err := mgmtServer.Update(ctx, resources); err != nil { 194 t.Fatal(err) 195 } 196 197 // Verify that no configuration is pushed to the child policy yet, because 198 // not all clusters making up the aggregate cluster have been resolved yet. 199 select { 200 case cfg := <-lbCfgCh: 201 t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg)) 202 case <-time.After(defaultTestShortTimeout): 203 } 204 205 // Now configure the LogicalDNS cluster in the management server. This 206 // should result in configuration being pushed down to the child policy. 207 resources.Clusters = append(resources.Clusters, makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort)) 208 if err := mgmtServer.Update(ctx, resources); err != nil { 209 t.Fatal(err) 210 } 211 212 wantChildCfg := &clusterresolver.LBConfig{ 213 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{ 214 { 215 Cluster: edsClusterName, 216 Type: clusterresolver.DiscoveryMechanismTypeEDS, 217 EDSServiceName: serviceName, 218 OutlierDetection: json.RawMessage(`{}`), 219 TelemetryLabels: internal.UnknownCSMLabels, 220 }, 221 { 222 Cluster: dnsClusterName, 223 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 224 DNSHostname: fmt.Sprintf("%s:%d", dnsHostName, dnsPort), 225 OutlierDetection: json.RawMessage(`{}`), 226 TelemetryLabels: internal.UnknownCSMLabels, 227 }, 228 }, 229 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 230 } 231 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 232 t.Fatal(err) 233 } 234 235 const dnsClusterNameNew = dnsClusterName + "-new" 236 const dnsHostNameNew = dnsHostName + "-new" 237 resources = e2e.UpdateOptions{ 238 NodeID: nodeID, 239 Clusters: []*v3clusterpb.Cluster{ 240 makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterNameNew}), 241 e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone), 242 makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), 243 makeLogicalDNSClusterResource(dnsClusterNameNew, dnsHostNameNew, dnsPort), 244 }, 245 SkipValidation: true, 246 } 247 if err := mgmtServer.Update(ctx, resources); err != nil { 248 t.Fatal(err) 249 } 250 wantChildCfg = &clusterresolver.LBConfig{ 251 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{ 252 { 253 Cluster: edsClusterName, 254 Type: clusterresolver.DiscoveryMechanismTypeEDS, 255 EDSServiceName: serviceName, 256 OutlierDetection: json.RawMessage(`{}`), 257 TelemetryLabels: internal.UnknownCSMLabels, 258 }, 259 { 260 Cluster: dnsClusterNameNew, 261 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 262 DNSHostname: fmt.Sprintf("%s:%d", dnsHostNameNew, dnsPort), 263 OutlierDetection: json.RawMessage(`{}`), 264 TelemetryLabels: internal.UnknownCSMLabels, 265 }, 266 }, 267 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 268 } 269 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 270 t.Fatal(err) 271 } 272 } 273 274 // Tests the case where the cluster resource requested by the cds LB policy is 275 // an aggregate cluster root pointing to two child clusters, one of type EDS and 276 // the other of type LogicalDNS. The test verifies that the load balancing 277 // configuration pushed to the cluster_resolver LB policy contains the discovery 278 // mechanisms for both child clusters. The test then updates the root cluster 279 // resource requested by the cds LB policy to a leaf cluster of type EDS and 280 // verifies the load balancing configuration pushed to the cluster_resolver LB 281 // policy contains a single discovery mechanism. 282 func (s) TestAggregateClusterSuccess_ThenChangeRootToEDS(t *testing.T) { 283 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 284 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 285 286 // Configure the management server with the aggregate cluster resource 287 // pointing to two child clusters. 288 resources := e2e.UpdateOptions{ 289 NodeID: nodeID, 290 Clusters: []*v3clusterpb.Cluster{ 291 makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), 292 e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone), 293 makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), 294 }, 295 SkipValidation: true, 296 } 297 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 298 defer cancel() 299 if err := mgmtServer.Update(ctx, resources); err != nil { 300 t.Fatal(err) 301 } 302 303 wantChildCfg := &clusterresolver.LBConfig{ 304 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{ 305 { 306 Cluster: edsClusterName, 307 Type: clusterresolver.DiscoveryMechanismTypeEDS, 308 EDSServiceName: serviceName, 309 OutlierDetection: json.RawMessage(`{}`), 310 TelemetryLabels: internal.UnknownCSMLabels, 311 }, 312 { 313 Cluster: dnsClusterName, 314 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 315 DNSHostname: fmt.Sprintf("%s:%d", dnsHostName, dnsPort), 316 OutlierDetection: json.RawMessage(`{}`), 317 TelemetryLabels: internal.UnknownCSMLabels, 318 }, 319 }, 320 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 321 } 322 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 323 t.Fatal(err) 324 } 325 326 resources = e2e.UpdateOptions{ 327 NodeID: nodeID, 328 Clusters: []*v3clusterpb.Cluster{ 329 e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone), 330 makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), 331 }, 332 SkipValidation: true, 333 } 334 if err := mgmtServer.Update(ctx, resources); err != nil { 335 t.Fatal(err) 336 } 337 wantChildCfg = &clusterresolver.LBConfig{ 338 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 339 Cluster: clusterName, 340 Type: clusterresolver.DiscoveryMechanismTypeEDS, 341 EDSServiceName: serviceName, 342 OutlierDetection: json.RawMessage(`{}`), 343 TelemetryLabels: internal.UnknownCSMLabels, 344 }}, 345 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 346 } 347 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 348 t.Fatal(err) 349 } 350 } 351 352 // Tests the case where a requested cluster resource switches between being a 353 // leaf and an aggregate cluster pointing to an EDS and LogicalDNS child 354 // cluster. In each of these cases, the test verifies that the load balancing 355 // configuration pushed to the cluster_resolver LB policy contains the expected 356 // discovery mechanisms. 357 func (s) TestAggregatedClusterSuccess_SwitchBetweenLeafAndAggregate(t *testing.T) { 358 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 359 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 360 361 // Start off with the requested cluster being a leaf EDS cluster. 362 resources := e2e.UpdateOptions{ 363 NodeID: nodeID, 364 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)}, 365 SkipValidation: true, 366 } 367 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 368 defer cancel() 369 if err := mgmtServer.Update(ctx, resources); err != nil { 370 t.Fatal(err) 371 } 372 wantChildCfg := &clusterresolver.LBConfig{ 373 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 374 Cluster: clusterName, 375 Type: clusterresolver.DiscoveryMechanismTypeEDS, 376 EDSServiceName: serviceName, 377 OutlierDetection: json.RawMessage(`{}`), 378 TelemetryLabels: internal.UnknownCSMLabels, 379 }}, 380 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 381 } 382 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 383 t.Fatal(err) 384 } 385 386 // Switch the requested cluster to be an aggregate cluster pointing to two 387 // child clusters. 388 resources = e2e.UpdateOptions{ 389 NodeID: nodeID, 390 Clusters: []*v3clusterpb.Cluster{ 391 makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), 392 e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone), 393 makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), 394 }, 395 SkipValidation: true, 396 } 397 if err := mgmtServer.Update(ctx, resources); err != nil { 398 t.Fatal(err) 399 } 400 wantChildCfg = &clusterresolver.LBConfig{ 401 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{ 402 { 403 Cluster: edsClusterName, 404 Type: clusterresolver.DiscoveryMechanismTypeEDS, 405 EDSServiceName: serviceName, 406 OutlierDetection: json.RawMessage(`{}`), 407 TelemetryLabels: internal.UnknownCSMLabels, 408 }, 409 { 410 Cluster: dnsClusterName, 411 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 412 DNSHostname: fmt.Sprintf("%s:%d", dnsHostName, dnsPort), 413 OutlierDetection: json.RawMessage(`{}`), 414 TelemetryLabels: internal.UnknownCSMLabels, 415 }, 416 }, 417 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 418 } 419 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 420 t.Fatal(err) 421 } 422 423 // Switch the cluster back to a leaf EDS cluster. 424 resources = e2e.UpdateOptions{ 425 NodeID: nodeID, 426 Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)}, 427 SkipValidation: true, 428 } 429 if err := mgmtServer.Update(ctx, resources); err != nil { 430 t.Fatal(err) 431 } 432 wantChildCfg = &clusterresolver.LBConfig{ 433 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 434 Cluster: clusterName, 435 Type: clusterresolver.DiscoveryMechanismTypeEDS, 436 EDSServiceName: serviceName, 437 OutlierDetection: json.RawMessage(`{}`), 438 TelemetryLabels: internal.UnknownCSMLabels, 439 }}, 440 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 441 } 442 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 443 t.Fatal(err) 444 } 445 } 446 447 // Tests the scenario where an aggregate cluster exceeds the maximum depth, 448 // which is 16. Verifies that the channel moves to TRANSIENT_FAILURE, and the 449 // error is propagated to RPC callers. The test then modifies the graph to no 450 // longer exceed maximum depth, but be at the maximum allowed depth, and 451 // verifies that an RPC can be made successfully. 452 func (s) TestAggregatedClusterFailure_ExceedsMaxStackDepth(t *testing.T) { 453 mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t) 454 455 resources := e2e.UpdateOptions{ 456 NodeID: nodeID, 457 Clusters: []*v3clusterpb.Cluster{ 458 makeAggregateClusterResource(clusterName, []string{clusterName + "-1"}), 459 makeAggregateClusterResource(clusterName+"-1", []string{clusterName + "-2"}), 460 makeAggregateClusterResource(clusterName+"-2", []string{clusterName + "-3"}), 461 makeAggregateClusterResource(clusterName+"-3", []string{clusterName + "-4"}), 462 makeAggregateClusterResource(clusterName+"-4", []string{clusterName + "-5"}), 463 makeAggregateClusterResource(clusterName+"-5", []string{clusterName + "-6"}), 464 makeAggregateClusterResource(clusterName+"-6", []string{clusterName + "-7"}), 465 makeAggregateClusterResource(clusterName+"-7", []string{clusterName + "-8"}), 466 makeAggregateClusterResource(clusterName+"-8", []string{clusterName + "-9"}), 467 makeAggregateClusterResource(clusterName+"-9", []string{clusterName + "-10"}), 468 makeAggregateClusterResource(clusterName+"-10", []string{clusterName + "-11"}), 469 makeAggregateClusterResource(clusterName+"-11", []string{clusterName + "-12"}), 470 makeAggregateClusterResource(clusterName+"-12", []string{clusterName + "-13"}), 471 makeAggregateClusterResource(clusterName+"-13", []string{clusterName + "-14"}), 472 makeAggregateClusterResource(clusterName+"-14", []string{clusterName + "-15"}), 473 makeAggregateClusterResource(clusterName+"-15", []string{clusterName + "-16"}), 474 e2e.DefaultCluster(clusterName+"-16", serviceName, e2e.SecurityLevelNone), 475 }, 476 SkipValidation: true, 477 } 478 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 479 defer cancel() 480 if err := mgmtServer.Update(ctx, resources); err != nil { 481 t.Fatal(err) 482 } 483 484 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 485 486 const wantErr = "aggregate cluster graph exceeds max depth" 487 client := testgrpc.NewTestServiceClient(cc) 488 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 489 if code := status.Code(err); code != codes.Unavailable { 490 t.Fatalf("EmptyCall() failed with code: %v, want %v", code, codes.Unavailable) 491 } 492 if err != nil && !strings.Contains(err.Error(), wantErr) { 493 t.Fatalf("EmptyCall() failed with err: %v, want err containing: %v", err, wantErr) 494 } 495 496 // Start a test service backend. 497 server := stubserver.StartTestService(t, nil) 498 t.Cleanup(server.Stop) 499 500 // Update the aggregate cluster resource to no longer exceed max depth, and 501 // be at the maximum depth allowed. 502 resources = e2e.UpdateOptions{ 503 NodeID: nodeID, 504 Clusters: []*v3clusterpb.Cluster{ 505 makeAggregateClusterResource(clusterName, []string{clusterName + "-1"}), 506 makeAggregateClusterResource(clusterName+"-1", []string{clusterName + "-2"}), 507 makeAggregateClusterResource(clusterName+"-2", []string{clusterName + "-3"}), 508 makeAggregateClusterResource(clusterName+"-3", []string{clusterName + "-4"}), 509 makeAggregateClusterResource(clusterName+"-4", []string{clusterName + "-5"}), 510 makeAggregateClusterResource(clusterName+"-5", []string{clusterName + "-6"}), 511 makeAggregateClusterResource(clusterName+"-6", []string{clusterName + "-7"}), 512 makeAggregateClusterResource(clusterName+"-7", []string{clusterName + "-8"}), 513 makeAggregateClusterResource(clusterName+"-8", []string{clusterName + "-9"}), 514 makeAggregateClusterResource(clusterName+"-9", []string{clusterName + "-10"}), 515 makeAggregateClusterResource(clusterName+"-10", []string{clusterName + "-11"}), 516 makeAggregateClusterResource(clusterName+"-11", []string{clusterName + "-12"}), 517 makeAggregateClusterResource(clusterName+"-12", []string{clusterName + "-13"}), 518 makeAggregateClusterResource(clusterName+"-13", []string{clusterName + "-14"}), 519 makeAggregateClusterResource(clusterName+"-14", []string{clusterName + "-15"}), 520 e2e.DefaultCluster(clusterName+"-15", serviceName, e2e.SecurityLevelNone), 521 }, 522 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 523 SkipValidation: true, 524 } 525 if err := mgmtServer.Update(ctx, resources); err != nil { 526 t.Fatal(err) 527 } 528 529 // Verify that a successful RPC can be made. 530 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 531 t.Fatalf("EmptyCall() failed: %v", err) 532 } 533 } 534 535 // Tests a diamond shaped aggregate cluster (A->[B,C]; B->D; C->D). Verifies 536 // that the load balancing configuration pushed to the cluster_resolver LB 537 // policy specifies cluster D only once. Also verifies that configuration is 538 // pushed only after all child clusters are resolved. 539 func (s) TestAggregatedClusterSuccess_DiamondDependency(t *testing.T) { 540 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 541 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 542 543 // Configure the management server with an aggregate cluster resource having 544 // a diamond dependency pattern, (A->[B,C]; B->D; C->D). Includes resources 545 // for cluster A, B and D, but don't include the resource for cluster C yet. 546 // This will help us verify that no configuration is pushed to the child 547 // policy until the whole cluster graph is resolved. 548 const ( 549 clusterNameA = clusterName // cluster name in cds LB policy config 550 clusterNameB = clusterName + "-B" 551 clusterNameC = clusterName + "-C" 552 clusterNameD = clusterName + "-D" 553 ) 554 resources := e2e.UpdateOptions{ 555 NodeID: nodeID, 556 Clusters: []*v3clusterpb.Cluster{ 557 makeAggregateClusterResource(clusterNameA, []string{clusterNameB, clusterNameC}), 558 makeAggregateClusterResource(clusterNameB, []string{clusterNameD}), 559 e2e.DefaultCluster(clusterNameD, serviceName, e2e.SecurityLevelNone), 560 }, 561 SkipValidation: true, 562 } 563 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 564 defer cancel() 565 if err := mgmtServer.Update(ctx, resources); err != nil { 566 t.Fatal(err) 567 } 568 569 // Verify that no configuration is pushed to the child policy yet, because 570 // not all clusters making up the aggregate cluster have been resolved yet. 571 select { 572 case cfg := <-lbCfgCh: 573 t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg)) 574 case <-time.After(defaultTestShortTimeout): 575 } 576 577 // Now configure the resource for cluster C in the management server, 578 // thereby completing the cluster graph. This should result in configuration 579 // being pushed down to the child policy. 580 resources.Clusters = append(resources.Clusters, makeAggregateClusterResource(clusterNameC, []string{clusterNameD})) 581 if err := mgmtServer.Update(ctx, resources); err != nil { 582 t.Fatal(err) 583 } 584 585 wantChildCfg := &clusterresolver.LBConfig{ 586 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 587 Cluster: clusterNameD, 588 Type: clusterresolver.DiscoveryMechanismTypeEDS, 589 EDSServiceName: serviceName, 590 OutlierDetection: json.RawMessage(`{}`), 591 TelemetryLabels: internal.UnknownCSMLabels, 592 }}, 593 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 594 } 595 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 596 t.Fatal(err) 597 } 598 } 599 600 // Tests the case where the aggregate cluster graph contains duplicates (A->[B, 601 // C]; B->[C, D]). Verifies that the load balancing configuration pushed to the 602 // cluster_resolver LB policy does not contain duplicates, and that the 603 // discovery mechanism corresponding to cluster C is of higher priority than the 604 // discovery mechanism for cluster D. Also verifies that the configuration is 605 // pushed only after all child clusters are resolved. 606 func (s) TestAggregatedClusterSuccess_IgnoreDups(t *testing.T) { 607 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 608 mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t) 609 610 // Configure the management server with an aggregate cluster resource that 611 // has duplicates in the graph, (A->[B, C]; B->[C, D]). Include resources 612 // for clusters A, B and D, but don't configure the resource for cluster C 613 // yet. This will help us verify that no configuration is pushed to the 614 // child policy until the whole cluster graph is resolved. 615 const ( 616 clusterNameA = clusterName // cluster name in cds LB policy config 617 clusterNameB = clusterName + "-B" 618 clusterNameC = clusterName + "-C" 619 clusterNameD = clusterName + "-D" 620 ) 621 resources := e2e.UpdateOptions{ 622 NodeID: nodeID, 623 Clusters: []*v3clusterpb.Cluster{ 624 makeAggregateClusterResource(clusterNameA, []string{clusterNameB, clusterNameC}), 625 makeAggregateClusterResource(clusterNameB, []string{clusterNameC, clusterNameD}), 626 e2e.DefaultCluster(clusterNameD, serviceName, e2e.SecurityLevelNone), 627 }, 628 SkipValidation: true, 629 } 630 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 631 defer cancel() 632 if err := mgmtServer.Update(ctx, resources); err != nil { 633 t.Fatal(err) 634 } 635 636 // Verify that no configuration is pushed to the child policy yet, because 637 // not all clusters making up the aggregate cluster have been resolved yet. 638 select { 639 case cfg := <-lbCfgCh: 640 t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg)) 641 case <-time.After(defaultTestShortTimeout): 642 } 643 644 // Now configure the resource for cluster C in the management server, 645 // thereby completing the cluster graph. This should result in configuration 646 // being pushed down to the child policy. 647 resources.Clusters = append(resources.Clusters, e2e.DefaultCluster(clusterNameC, serviceName, e2e.SecurityLevelNone)) 648 if err := mgmtServer.Update(ctx, resources); err != nil { 649 t.Fatal(err) 650 } 651 652 wantChildCfg := &clusterresolver.LBConfig{ 653 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{ 654 { 655 Cluster: clusterNameC, 656 Type: clusterresolver.DiscoveryMechanismTypeEDS, 657 EDSServiceName: serviceName, 658 OutlierDetection: json.RawMessage(`{}`), 659 TelemetryLabels: internal.UnknownCSMLabels, 660 }, 661 { 662 Cluster: clusterNameD, 663 Type: clusterresolver.DiscoveryMechanismTypeEDS, 664 EDSServiceName: serviceName, 665 OutlierDetection: json.RawMessage(`{}`), 666 TelemetryLabels: internal.UnknownCSMLabels, 667 }, 668 }, 669 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 670 } 671 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 672 t.Fatal(err) 673 } 674 } 675 676 // Tests the scenario where the aggregate cluster graph has a node that has 677 // child node of itself. The case for this is A -> A, and since there is no base 678 // cluster (EDS or Logical DNS), no configuration should be pushed to the child 679 // policy. The channel is expected to move to TRANSIENT_FAILURE and RPCs are 680 // expected to fail with code UNAVAILABLE and an error message specifying that 681 // the aggregate cluster graph has no leaf clusters. Then the test updates A -> B, 682 // where B is a leaf EDS cluster. Verifies that configuration is pushed to the 683 // child policy and that an RPC can be successfully made. 684 func (s) TestAggregatedCluster_NodeChildOfItself(t *testing.T) { 685 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 686 mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t) 687 688 const ( 689 clusterNameA = clusterName // cluster name in cds LB policy config 690 clusterNameB = clusterName + "-B" 691 ) 692 // Configure the management server with an aggregate cluster resource whose 693 // child is itself. 694 resources := e2e.UpdateOptions{ 695 NodeID: nodeID, 696 Clusters: []*v3clusterpb.Cluster{makeAggregateClusterResource(clusterNameA, []string{clusterNameA})}, 697 SkipValidation: true, 698 } 699 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 700 defer cancel() 701 if err := mgmtServer.Update(ctx, resources); err != nil { 702 t.Fatal(err) 703 } 704 705 select { 706 case cfg := <-lbCfgCh: 707 t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg)) 708 case <-time.After(defaultTestShortTimeout): 709 } 710 711 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 712 713 // Verify that the RPC fails with expected code. 714 client := testgrpc.NewTestServiceClient(cc) 715 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 716 if gotCode, wantCode := status.Code(err), codes.Unavailable; gotCode != wantCode { 717 t.Fatalf("EmptyCall() failed with code: %v, want %v", gotCode, wantCode) 718 } 719 const wantErr = "aggregate cluster graph has no leaf clusters" 720 if !strings.Contains(err.Error(), wantErr) { 721 t.Fatalf("EmptyCall() failed with err: %v, want error containing %s", err, wantErr) 722 } 723 724 // Start a test service backend. 725 server := stubserver.StartTestService(t, nil) 726 t.Cleanup(server.Stop) 727 728 // Update the aggregate cluster to point to a leaf EDS cluster. 729 resources = e2e.UpdateOptions{ 730 NodeID: nodeID, 731 Clusters: []*v3clusterpb.Cluster{ 732 makeAggregateClusterResource(clusterNameA, []string{clusterNameB}), 733 e2e.DefaultCluster(clusterNameB, serviceName, e2e.SecurityLevelNone), 734 }, 735 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 736 SkipValidation: true, 737 } 738 if err := mgmtServer.Update(ctx, resources); err != nil { 739 t.Fatal(err) 740 } 741 742 // Verify the configuration pushed to the child policy. 743 wantChildCfg := &clusterresolver.LBConfig{ 744 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 745 Cluster: clusterNameB, 746 Type: clusterresolver.DiscoveryMechanismTypeEDS, 747 EDSServiceName: serviceName, 748 OutlierDetection: json.RawMessage(`{}`), 749 TelemetryLabels: internal.UnknownCSMLabels, 750 }}, 751 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 752 } 753 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 754 t.Fatal(err) 755 } 756 757 // Verify that a successful RPC can be made. 758 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 759 t.Fatalf("EmptyCall() failed: %v", err) 760 } 761 } 762 763 // Tests the scenario where the aggregate cluster graph contains a cycle and 764 // contains no leaf clusters. The case used here is [A -> B, B -> A]. As there 765 // are no leaf clusters in this graph, no configuration should be pushed to the 766 // child policy. The channel is expected to move to TRANSIENT_FAILURE and RPCs 767 // are expected to fail with code UNAVAILABLE and an error message specifying 768 // that the aggregate cluster graph has no leaf clusters. 769 func (s) TestAggregatedCluster_CycleWithNoLeafNode(t *testing.T) { 770 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 771 mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t) 772 773 const ( 774 clusterNameA = clusterName // cluster name in cds LB policy config 775 clusterNameB = clusterName + "-B" 776 ) 777 // Configure the management server with an aggregate cluster resource graph 778 // that contains a cycle and no leaf clusters. 779 resources := e2e.UpdateOptions{ 780 NodeID: nodeID, 781 Clusters: []*v3clusterpb.Cluster{ 782 makeAggregateClusterResource(clusterNameA, []string{clusterNameB}), 783 makeAggregateClusterResource(clusterNameB, []string{clusterNameA}), 784 }, 785 SkipValidation: true, 786 } 787 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 788 defer cancel() 789 if err := mgmtServer.Update(ctx, resources); err != nil { 790 t.Fatal(err) 791 } 792 793 select { 794 case cfg := <-lbCfgCh: 795 t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg)) 796 case <-time.After(defaultTestShortTimeout): 797 } 798 799 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 800 801 // Verify that the RPC fails with expected code. 802 client := testgrpc.NewTestServiceClient(cc) 803 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 804 if gotCode, wantCode := status.Code(err), codes.Unavailable; gotCode != wantCode { 805 t.Fatalf("EmptyCall() failed with code: %v, want %v", gotCode, wantCode) 806 } 807 const wantErr = "aggregate cluster graph has no leaf clusters" 808 if !strings.Contains(err.Error(), wantErr) { 809 t.Fatalf("EmptyCall() failed with err: %v, want %s", err, wantErr) 810 } 811 } 812 813 // Tests the scenario where the aggregate cluster graph contains a cycle and 814 // also contains a leaf cluster. The case used here is [A -> B, B -> A, C]. As 815 // there is a leaf cluster in this graph , configuration should be pushed to the 816 // child policy and RPCs should get routed to that leaf cluster. 817 func (s) TestAggregatedCluster_CycleWithLeafNode(t *testing.T) { 818 lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t) 819 mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t) 820 821 // Start a test service backend. 822 server := stubserver.StartTestService(t, nil) 823 t.Cleanup(server.Stop) 824 825 const ( 826 clusterNameA = clusterName // cluster name in cds LB policy config 827 clusterNameB = clusterName + "-B" 828 clusterNameC = clusterName + "-C" 829 ) 830 // Configure the management server with an aggregate cluster resource graph 831 // that contains a cycle, but also contains a leaf cluster. 832 resources := e2e.UpdateOptions{ 833 NodeID: nodeID, 834 Clusters: []*v3clusterpb.Cluster{ 835 makeAggregateClusterResource(clusterNameA, []string{clusterNameB}), 836 makeAggregateClusterResource(clusterNameB, []string{clusterNameA, clusterNameC}), 837 e2e.DefaultCluster(clusterNameC, serviceName, e2e.SecurityLevelNone), 838 }, 839 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, 840 SkipValidation: true, 841 } 842 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 843 defer cancel() 844 if err := mgmtServer.Update(ctx, resources); err != nil { 845 t.Fatal(err) 846 } 847 848 // Verify the configuration pushed to the child policy. 849 wantChildCfg := &clusterresolver.LBConfig{ 850 DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{ 851 Cluster: clusterNameC, 852 Type: clusterresolver.DiscoveryMechanismTypeEDS, 853 EDSServiceName: serviceName, 854 OutlierDetection: json.RawMessage(`{}`), 855 TelemetryLabels: internal.UnknownCSMLabels, 856 }}, 857 XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`), 858 } 859 if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil { 860 t.Fatal(err) 861 } 862 863 // Verify that a successful RPC can be made. 864 client := testgrpc.NewTestServiceClient(cc) 865 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 866 t.Fatalf("EmptyCall() failed: %v", err) 867 } 868 }