google.golang.org/grpc@v1.72.2/xds/internal/balancer/cdsbalancer/aggregate_cluster_test.go (about)

     1  /*
     2   * Copyright 2021 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cdsbalancer
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"strings"
    24  	"testing"
    25  	"time"
    26  
    27  	"google.golang.org/grpc"
    28  	"google.golang.org/grpc/codes"
    29  	"google.golang.org/grpc/connectivity"
    30  	"google.golang.org/grpc/internal/pretty"
    31  	"google.golang.org/grpc/internal/stubserver"
    32  	"google.golang.org/grpc/internal/testutils"
    33  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    34  	"google.golang.org/grpc/serviceconfig"
    35  	"google.golang.org/grpc/status"
    36  	"google.golang.org/grpc/xds/internal"
    37  	"google.golang.org/grpc/xds/internal/balancer/clusterresolver"
    38  
    39  	v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    40  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    41  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    42  	testpb "google.golang.org/grpc/interop/grpc_testing"
    43  )
    44  
    45  // makeAggregateClusterResource returns an aggregate cluster resource with the
    46  // given name and list of child names.
    47  func makeAggregateClusterResource(name string, childNames []string) *v3clusterpb.Cluster {
    48  	return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
    49  		ClusterName: name,
    50  		Type:        e2e.ClusterTypeAggregate,
    51  		ChildNames:  childNames,
    52  	})
    53  }
    54  
    55  // makeLogicalDNSClusterResource returns a LOGICAL_DNS cluster resource with the
    56  // given name and given DNS host and port.
    57  func makeLogicalDNSClusterResource(name, dnsHost string, dnsPort uint32) *v3clusterpb.Cluster {
    58  	return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
    59  		ClusterName: name,
    60  		Type:        e2e.ClusterTypeLogicalDNS,
    61  		DNSHostName: dnsHost,
    62  		DNSPort:     dnsPort,
    63  	})
    64  }
    65  
    66  // Tests the case where the cluster resource requested by the cds LB policy is a
    67  // leaf cluster. The management server sends two updates for the same leaf
    68  // cluster resource. The test verifies that the load balancing configuration
    69  // pushed to the cluster_resolver LB policy contains the expected discovery
    70  // mechanism corresponding to the leaf cluster, on both occasions.
    71  func (s) TestAggregateClusterSuccess_LeafNode(t *testing.T) {
    72  	tests := []struct {
    73  		name                  string
    74  		firstClusterResource  *v3clusterpb.Cluster
    75  		secondClusterResource *v3clusterpb.Cluster
    76  		wantFirstChildCfg     serviceconfig.LoadBalancingConfig
    77  		wantSecondChildCfg    serviceconfig.LoadBalancingConfig
    78  	}{
    79  		{
    80  			name:                  "eds",
    81  			firstClusterResource:  e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone),
    82  			secondClusterResource: e2e.DefaultCluster(clusterName, serviceName+"-new", e2e.SecurityLevelNone),
    83  			wantFirstChildCfg: &clusterresolver.LBConfig{
    84  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
    85  					Cluster:          clusterName,
    86  					Type:             clusterresolver.DiscoveryMechanismTypeEDS,
    87  					EDSServiceName:   serviceName,
    88  					OutlierDetection: json.RawMessage(`{}`),
    89  					TelemetryLabels:  internal.UnknownCSMLabels,
    90  				}},
    91  				XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
    92  			},
    93  			wantSecondChildCfg: &clusterresolver.LBConfig{
    94  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
    95  					Cluster:          clusterName,
    96  					Type:             clusterresolver.DiscoveryMechanismTypeEDS,
    97  					EDSServiceName:   serviceName + "-new",
    98  					OutlierDetection: json.RawMessage(`{}`),
    99  					TelemetryLabels:  internal.UnknownCSMLabels,
   100  				}},
   101  				XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   102  			},
   103  		},
   104  		{
   105  			name:                  "dns",
   106  			firstClusterResource:  makeLogicalDNSClusterResource(clusterName, "dns_host", uint32(8080)),
   107  			secondClusterResource: makeLogicalDNSClusterResource(clusterName, "dns_host_new", uint32(8080)),
   108  			wantFirstChildCfg: &clusterresolver.LBConfig{
   109  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   110  					Cluster:          clusterName,
   111  					Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   112  					DNSHostname:      "dns_host:8080",
   113  					OutlierDetection: json.RawMessage(`{}`),
   114  					TelemetryLabels:  internal.UnknownCSMLabels,
   115  				}},
   116  				XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   117  			},
   118  			wantSecondChildCfg: &clusterresolver.LBConfig{
   119  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   120  					Cluster:          clusterName,
   121  					Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   122  					DNSHostname:      "dns_host_new:8080",
   123  					OutlierDetection: json.RawMessage(`{}`),
   124  					TelemetryLabels:  internal.UnknownCSMLabels,
   125  				}},
   126  				XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   127  			},
   128  		},
   129  	}
   130  
   131  	for _, test := range tests {
   132  		t.Run(test.name, func(t *testing.T) {
   133  			lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   134  			mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   135  
   136  			// Push the first cluster resource through the management server and
   137  			// verify the configuration pushed to the child policy.
   138  			resources := e2e.UpdateOptions{
   139  				NodeID:         nodeID,
   140  				Clusters:       []*v3clusterpb.Cluster{test.firstClusterResource},
   141  				SkipValidation: true,
   142  			}
   143  			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   144  			defer cancel()
   145  			if err := mgmtServer.Update(ctx, resources); err != nil {
   146  				t.Fatal(err)
   147  			}
   148  			if err := compareLoadBalancingConfig(ctx, lbCfgCh, test.wantFirstChildCfg); err != nil {
   149  				t.Fatal(err)
   150  			}
   151  
   152  			// Push the second cluster resource through the management server and
   153  			// verify the configuration pushed to the child policy.
   154  			resources.Clusters[0] = test.secondClusterResource
   155  			if err := mgmtServer.Update(ctx, resources); err != nil {
   156  				t.Fatal(err)
   157  			}
   158  			if err := compareLoadBalancingConfig(ctx, lbCfgCh, test.wantSecondChildCfg); err != nil {
   159  				t.Fatal(err)
   160  			}
   161  		})
   162  	}
   163  }
   164  
   165  // Tests the case where the cluster resource requested by the cds LB policy is
   166  // an aggregate cluster root pointing to two child clusters, one of type EDS and
   167  // the other of type LogicalDNS. The test verifies that load balancing
   168  // configuration is pushed to the cluster_resolver LB policy only when all child
   169  // clusters are resolved and it also verifies that the pushed configuration
   170  // contains the expected discovery mechanisms. The test then updates the
   171  // aggregate cluster to point to two child clusters, the same leaf cluster of
   172  // type EDS and a different leaf cluster of type LogicalDNS and verifies that
   173  // the load balancing configuration pushed to the cluster_resolver LB policy
   174  // contains the expected discovery mechanisms.
   175  func (s) TestAggregateClusterSuccess_ThenUpdateChildClusters(t *testing.T) {
   176  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   177  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   178  
   179  	// Configure the management server with the aggregate cluster resource
   180  	// pointing to two child clusters, one EDS and one LogicalDNS. Include the
   181  	// resource corresponding to the EDS cluster here, but don't include
   182  	// resource corresponding to the LogicalDNS cluster yet.
   183  	resources := e2e.UpdateOptions{
   184  		NodeID: nodeID,
   185  		Clusters: []*v3clusterpb.Cluster{
   186  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   187  			e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone),
   188  		},
   189  		SkipValidation: true,
   190  	}
   191  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   192  	defer cancel()
   193  	if err := mgmtServer.Update(ctx, resources); err != nil {
   194  		t.Fatal(err)
   195  	}
   196  
   197  	// Verify that no configuration is pushed to the child policy yet, because
   198  	// not all clusters making up the aggregate cluster have been resolved yet.
   199  	select {
   200  	case cfg := <-lbCfgCh:
   201  		t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg))
   202  	case <-time.After(defaultTestShortTimeout):
   203  	}
   204  
   205  	// Now configure the LogicalDNS cluster in the management server. This
   206  	// should result in configuration being pushed down to the child policy.
   207  	resources.Clusters = append(resources.Clusters, makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort))
   208  	if err := mgmtServer.Update(ctx, resources); err != nil {
   209  		t.Fatal(err)
   210  	}
   211  
   212  	wantChildCfg := &clusterresolver.LBConfig{
   213  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{
   214  			{
   215  				Cluster:          edsClusterName,
   216  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   217  				EDSServiceName:   serviceName,
   218  				OutlierDetection: json.RawMessage(`{}`),
   219  				TelemetryLabels:  internal.UnknownCSMLabels,
   220  			},
   221  			{
   222  				Cluster:          dnsClusterName,
   223  				Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   224  				DNSHostname:      fmt.Sprintf("%s:%d", dnsHostName, dnsPort),
   225  				OutlierDetection: json.RawMessage(`{}`),
   226  				TelemetryLabels:  internal.UnknownCSMLabels,
   227  			},
   228  		},
   229  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   230  	}
   231  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   232  		t.Fatal(err)
   233  	}
   234  
   235  	const dnsClusterNameNew = dnsClusterName + "-new"
   236  	const dnsHostNameNew = dnsHostName + "-new"
   237  	resources = e2e.UpdateOptions{
   238  		NodeID: nodeID,
   239  		Clusters: []*v3clusterpb.Cluster{
   240  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterNameNew}),
   241  			e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone),
   242  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   243  			makeLogicalDNSClusterResource(dnsClusterNameNew, dnsHostNameNew, dnsPort),
   244  		},
   245  		SkipValidation: true,
   246  	}
   247  	if err := mgmtServer.Update(ctx, resources); err != nil {
   248  		t.Fatal(err)
   249  	}
   250  	wantChildCfg = &clusterresolver.LBConfig{
   251  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{
   252  			{
   253  				Cluster:          edsClusterName,
   254  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   255  				EDSServiceName:   serviceName,
   256  				OutlierDetection: json.RawMessage(`{}`),
   257  				TelemetryLabels:  internal.UnknownCSMLabels,
   258  			},
   259  			{
   260  				Cluster:          dnsClusterNameNew,
   261  				Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   262  				DNSHostname:      fmt.Sprintf("%s:%d", dnsHostNameNew, dnsPort),
   263  				OutlierDetection: json.RawMessage(`{}`),
   264  				TelemetryLabels:  internal.UnknownCSMLabels,
   265  			},
   266  		},
   267  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   268  	}
   269  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   270  		t.Fatal(err)
   271  	}
   272  }
   273  
   274  // Tests the case where the cluster resource requested by the cds LB policy is
   275  // an aggregate cluster root pointing to two child clusters, one of type EDS and
   276  // the other of type LogicalDNS. The test verifies that the load balancing
   277  // configuration pushed to the cluster_resolver LB policy contains the discovery
   278  // mechanisms for both child clusters. The test then updates the root cluster
   279  // resource requested by the cds LB policy to a leaf cluster of type EDS and
   280  // verifies the load balancing configuration pushed to the cluster_resolver LB
   281  // policy contains a single discovery mechanism.
   282  func (s) TestAggregateClusterSuccess_ThenChangeRootToEDS(t *testing.T) {
   283  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   284  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   285  
   286  	// Configure the management server with the aggregate cluster resource
   287  	// pointing to two child clusters.
   288  	resources := e2e.UpdateOptions{
   289  		NodeID: nodeID,
   290  		Clusters: []*v3clusterpb.Cluster{
   291  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   292  			e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone),
   293  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   294  		},
   295  		SkipValidation: true,
   296  	}
   297  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   298  	defer cancel()
   299  	if err := mgmtServer.Update(ctx, resources); err != nil {
   300  		t.Fatal(err)
   301  	}
   302  
   303  	wantChildCfg := &clusterresolver.LBConfig{
   304  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{
   305  			{
   306  				Cluster:          edsClusterName,
   307  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   308  				EDSServiceName:   serviceName,
   309  				OutlierDetection: json.RawMessage(`{}`),
   310  				TelemetryLabels:  internal.UnknownCSMLabels,
   311  			},
   312  			{
   313  				Cluster:          dnsClusterName,
   314  				Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   315  				DNSHostname:      fmt.Sprintf("%s:%d", dnsHostName, dnsPort),
   316  				OutlierDetection: json.RawMessage(`{}`),
   317  				TelemetryLabels:  internal.UnknownCSMLabels,
   318  			},
   319  		},
   320  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   321  	}
   322  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   323  		t.Fatal(err)
   324  	}
   325  
   326  	resources = e2e.UpdateOptions{
   327  		NodeID: nodeID,
   328  		Clusters: []*v3clusterpb.Cluster{
   329  			e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone),
   330  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   331  		},
   332  		SkipValidation: true,
   333  	}
   334  	if err := mgmtServer.Update(ctx, resources); err != nil {
   335  		t.Fatal(err)
   336  	}
   337  	wantChildCfg = &clusterresolver.LBConfig{
   338  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   339  			Cluster:          clusterName,
   340  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   341  			EDSServiceName:   serviceName,
   342  			OutlierDetection: json.RawMessage(`{}`),
   343  			TelemetryLabels:  internal.UnknownCSMLabels,
   344  		}},
   345  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   346  	}
   347  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   348  		t.Fatal(err)
   349  	}
   350  }
   351  
   352  // Tests the case where a requested cluster resource switches between being a
   353  // leaf and an aggregate cluster pointing to an EDS and LogicalDNS child
   354  // cluster. In each of these cases, the test verifies that the load balancing
   355  // configuration pushed to the cluster_resolver LB policy contains the expected
   356  // discovery mechanisms.
   357  func (s) TestAggregatedClusterSuccess_SwitchBetweenLeafAndAggregate(t *testing.T) {
   358  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   359  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   360  
   361  	// Start off with the requested cluster being a leaf EDS cluster.
   362  	resources := e2e.UpdateOptions{
   363  		NodeID:         nodeID,
   364  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
   365  		SkipValidation: true,
   366  	}
   367  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   368  	defer cancel()
   369  	if err := mgmtServer.Update(ctx, resources); err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	wantChildCfg := &clusterresolver.LBConfig{
   373  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   374  			Cluster:          clusterName,
   375  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   376  			EDSServiceName:   serviceName,
   377  			OutlierDetection: json.RawMessage(`{}`),
   378  			TelemetryLabels:  internal.UnknownCSMLabels,
   379  		}},
   380  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   381  	}
   382  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   383  		t.Fatal(err)
   384  	}
   385  
   386  	// Switch the requested cluster to be an aggregate cluster pointing to two
   387  	// child clusters.
   388  	resources = e2e.UpdateOptions{
   389  		NodeID: nodeID,
   390  		Clusters: []*v3clusterpb.Cluster{
   391  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   392  			e2e.DefaultCluster(edsClusterName, serviceName, e2e.SecurityLevelNone),
   393  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   394  		},
   395  		SkipValidation: true,
   396  	}
   397  	if err := mgmtServer.Update(ctx, resources); err != nil {
   398  		t.Fatal(err)
   399  	}
   400  	wantChildCfg = &clusterresolver.LBConfig{
   401  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{
   402  			{
   403  				Cluster:          edsClusterName,
   404  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   405  				EDSServiceName:   serviceName,
   406  				OutlierDetection: json.RawMessage(`{}`),
   407  				TelemetryLabels:  internal.UnknownCSMLabels,
   408  			},
   409  			{
   410  				Cluster:          dnsClusterName,
   411  				Type:             clusterresolver.DiscoveryMechanismTypeLogicalDNS,
   412  				DNSHostname:      fmt.Sprintf("%s:%d", dnsHostName, dnsPort),
   413  				OutlierDetection: json.RawMessage(`{}`),
   414  				TelemetryLabels:  internal.UnknownCSMLabels,
   415  			},
   416  		},
   417  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   418  	}
   419  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   420  		t.Fatal(err)
   421  	}
   422  
   423  	// Switch the cluster back to a leaf EDS cluster.
   424  	resources = e2e.UpdateOptions{
   425  		NodeID:         nodeID,
   426  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
   427  		SkipValidation: true,
   428  	}
   429  	if err := mgmtServer.Update(ctx, resources); err != nil {
   430  		t.Fatal(err)
   431  	}
   432  	wantChildCfg = &clusterresolver.LBConfig{
   433  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   434  			Cluster:          clusterName,
   435  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   436  			EDSServiceName:   serviceName,
   437  			OutlierDetection: json.RawMessage(`{}`),
   438  			TelemetryLabels:  internal.UnknownCSMLabels,
   439  		}},
   440  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   441  	}
   442  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   443  		t.Fatal(err)
   444  	}
   445  }
   446  
   447  // Tests the scenario where an aggregate cluster exceeds the maximum depth,
   448  // which is 16. Verifies that the channel moves to TRANSIENT_FAILURE, and the
   449  // error is propagated to RPC callers. The test then modifies the graph to no
   450  // longer exceed maximum depth, but be at the maximum allowed depth, and
   451  // verifies that an RPC can be made successfully.
   452  func (s) TestAggregatedClusterFailure_ExceedsMaxStackDepth(t *testing.T) {
   453  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
   454  
   455  	resources := e2e.UpdateOptions{
   456  		NodeID: nodeID,
   457  		Clusters: []*v3clusterpb.Cluster{
   458  			makeAggregateClusterResource(clusterName, []string{clusterName + "-1"}),
   459  			makeAggregateClusterResource(clusterName+"-1", []string{clusterName + "-2"}),
   460  			makeAggregateClusterResource(clusterName+"-2", []string{clusterName + "-3"}),
   461  			makeAggregateClusterResource(clusterName+"-3", []string{clusterName + "-4"}),
   462  			makeAggregateClusterResource(clusterName+"-4", []string{clusterName + "-5"}),
   463  			makeAggregateClusterResource(clusterName+"-5", []string{clusterName + "-6"}),
   464  			makeAggregateClusterResource(clusterName+"-6", []string{clusterName + "-7"}),
   465  			makeAggregateClusterResource(clusterName+"-7", []string{clusterName + "-8"}),
   466  			makeAggregateClusterResource(clusterName+"-8", []string{clusterName + "-9"}),
   467  			makeAggregateClusterResource(clusterName+"-9", []string{clusterName + "-10"}),
   468  			makeAggregateClusterResource(clusterName+"-10", []string{clusterName + "-11"}),
   469  			makeAggregateClusterResource(clusterName+"-11", []string{clusterName + "-12"}),
   470  			makeAggregateClusterResource(clusterName+"-12", []string{clusterName + "-13"}),
   471  			makeAggregateClusterResource(clusterName+"-13", []string{clusterName + "-14"}),
   472  			makeAggregateClusterResource(clusterName+"-14", []string{clusterName + "-15"}),
   473  			makeAggregateClusterResource(clusterName+"-15", []string{clusterName + "-16"}),
   474  			e2e.DefaultCluster(clusterName+"-16", serviceName, e2e.SecurityLevelNone),
   475  		},
   476  		SkipValidation: true,
   477  	}
   478  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   479  	defer cancel()
   480  	if err := mgmtServer.Update(ctx, resources); err != nil {
   481  		t.Fatal(err)
   482  	}
   483  
   484  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   485  
   486  	const wantErr = "aggregate cluster graph exceeds max depth"
   487  	client := testgrpc.NewTestServiceClient(cc)
   488  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   489  	if code := status.Code(err); code != codes.Unavailable {
   490  		t.Fatalf("EmptyCall() failed with code: %v, want %v", code, codes.Unavailable)
   491  	}
   492  	if err != nil && !strings.Contains(err.Error(), wantErr) {
   493  		t.Fatalf("EmptyCall() failed with err: %v, want err containing: %v", err, wantErr)
   494  	}
   495  
   496  	// Start a test service backend.
   497  	server := stubserver.StartTestService(t, nil)
   498  	t.Cleanup(server.Stop)
   499  
   500  	// Update the aggregate cluster resource to no longer exceed max depth, and
   501  	// be at the maximum depth allowed.
   502  	resources = e2e.UpdateOptions{
   503  		NodeID: nodeID,
   504  		Clusters: []*v3clusterpb.Cluster{
   505  			makeAggregateClusterResource(clusterName, []string{clusterName + "-1"}),
   506  			makeAggregateClusterResource(clusterName+"-1", []string{clusterName + "-2"}),
   507  			makeAggregateClusterResource(clusterName+"-2", []string{clusterName + "-3"}),
   508  			makeAggregateClusterResource(clusterName+"-3", []string{clusterName + "-4"}),
   509  			makeAggregateClusterResource(clusterName+"-4", []string{clusterName + "-5"}),
   510  			makeAggregateClusterResource(clusterName+"-5", []string{clusterName + "-6"}),
   511  			makeAggregateClusterResource(clusterName+"-6", []string{clusterName + "-7"}),
   512  			makeAggregateClusterResource(clusterName+"-7", []string{clusterName + "-8"}),
   513  			makeAggregateClusterResource(clusterName+"-8", []string{clusterName + "-9"}),
   514  			makeAggregateClusterResource(clusterName+"-9", []string{clusterName + "-10"}),
   515  			makeAggregateClusterResource(clusterName+"-10", []string{clusterName + "-11"}),
   516  			makeAggregateClusterResource(clusterName+"-11", []string{clusterName + "-12"}),
   517  			makeAggregateClusterResource(clusterName+"-12", []string{clusterName + "-13"}),
   518  			makeAggregateClusterResource(clusterName+"-13", []string{clusterName + "-14"}),
   519  			makeAggregateClusterResource(clusterName+"-14", []string{clusterName + "-15"}),
   520  			e2e.DefaultCluster(clusterName+"-15", serviceName, e2e.SecurityLevelNone),
   521  		},
   522  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   523  		SkipValidation: true,
   524  	}
   525  	if err := mgmtServer.Update(ctx, resources); err != nil {
   526  		t.Fatal(err)
   527  	}
   528  
   529  	// Verify that a successful RPC can be made.
   530  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   531  		t.Fatalf("EmptyCall() failed: %v", err)
   532  	}
   533  }
   534  
   535  // Tests a diamond shaped aggregate cluster (A->[B,C]; B->D; C->D). Verifies
   536  // that the load balancing configuration pushed to the cluster_resolver LB
   537  // policy specifies cluster D only once. Also verifies that configuration is
   538  // pushed only after all child clusters are resolved.
   539  func (s) TestAggregatedClusterSuccess_DiamondDependency(t *testing.T) {
   540  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   541  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   542  
   543  	// Configure the management server with an aggregate cluster resource having
   544  	// a diamond dependency pattern, (A->[B,C]; B->D; C->D). Includes resources
   545  	// for cluster A, B and D, but don't include the resource for cluster C yet.
   546  	// This will help us verify that no configuration is pushed to the child
   547  	// policy until the whole cluster graph is resolved.
   548  	const (
   549  		clusterNameA = clusterName // cluster name in cds LB policy config
   550  		clusterNameB = clusterName + "-B"
   551  		clusterNameC = clusterName + "-C"
   552  		clusterNameD = clusterName + "-D"
   553  	)
   554  	resources := e2e.UpdateOptions{
   555  		NodeID: nodeID,
   556  		Clusters: []*v3clusterpb.Cluster{
   557  			makeAggregateClusterResource(clusterNameA, []string{clusterNameB, clusterNameC}),
   558  			makeAggregateClusterResource(clusterNameB, []string{clusterNameD}),
   559  			e2e.DefaultCluster(clusterNameD, serviceName, e2e.SecurityLevelNone),
   560  		},
   561  		SkipValidation: true,
   562  	}
   563  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   564  	defer cancel()
   565  	if err := mgmtServer.Update(ctx, resources); err != nil {
   566  		t.Fatal(err)
   567  	}
   568  
   569  	// Verify that no configuration is pushed to the child policy yet, because
   570  	// not all clusters making up the aggregate cluster have been resolved yet.
   571  	select {
   572  	case cfg := <-lbCfgCh:
   573  		t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg))
   574  	case <-time.After(defaultTestShortTimeout):
   575  	}
   576  
   577  	// Now configure the resource for cluster C in the management server,
   578  	// thereby completing the cluster graph. This should result in configuration
   579  	// being pushed down to the child policy.
   580  	resources.Clusters = append(resources.Clusters, makeAggregateClusterResource(clusterNameC, []string{clusterNameD}))
   581  	if err := mgmtServer.Update(ctx, resources); err != nil {
   582  		t.Fatal(err)
   583  	}
   584  
   585  	wantChildCfg := &clusterresolver.LBConfig{
   586  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   587  			Cluster:          clusterNameD,
   588  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   589  			EDSServiceName:   serviceName,
   590  			OutlierDetection: json.RawMessage(`{}`),
   591  			TelemetryLabels:  internal.UnknownCSMLabels,
   592  		}},
   593  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   594  	}
   595  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   596  		t.Fatal(err)
   597  	}
   598  }
   599  
   600  // Tests the case where the aggregate cluster graph contains duplicates (A->[B,
   601  // C]; B->[C, D]). Verifies that the load balancing configuration pushed to the
   602  // cluster_resolver LB policy does not contain duplicates, and that the
   603  // discovery mechanism corresponding to cluster C is of higher priority than the
   604  // discovery mechanism for cluster D. Also verifies that the configuration is
   605  // pushed only after all child clusters are resolved.
   606  func (s) TestAggregatedClusterSuccess_IgnoreDups(t *testing.T) {
   607  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   608  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   609  
   610  	// Configure the management server with an aggregate cluster resource that
   611  	// has duplicates in the graph, (A->[B, C]; B->[C, D]). Include resources
   612  	// for clusters A, B and D, but don't configure the resource for cluster C
   613  	// yet. This will help us verify that no configuration is pushed to the
   614  	// child policy until the whole cluster graph is resolved.
   615  	const (
   616  		clusterNameA = clusterName // cluster name in cds LB policy config
   617  		clusterNameB = clusterName + "-B"
   618  		clusterNameC = clusterName + "-C"
   619  		clusterNameD = clusterName + "-D"
   620  	)
   621  	resources := e2e.UpdateOptions{
   622  		NodeID: nodeID,
   623  		Clusters: []*v3clusterpb.Cluster{
   624  			makeAggregateClusterResource(clusterNameA, []string{clusterNameB, clusterNameC}),
   625  			makeAggregateClusterResource(clusterNameB, []string{clusterNameC, clusterNameD}),
   626  			e2e.DefaultCluster(clusterNameD, serviceName, e2e.SecurityLevelNone),
   627  		},
   628  		SkipValidation: true,
   629  	}
   630  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   631  	defer cancel()
   632  	if err := mgmtServer.Update(ctx, resources); err != nil {
   633  		t.Fatal(err)
   634  	}
   635  
   636  	// Verify that no configuration is pushed to the child policy yet, because
   637  	// not all clusters making up the aggregate cluster have been resolved yet.
   638  	select {
   639  	case cfg := <-lbCfgCh:
   640  		t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg))
   641  	case <-time.After(defaultTestShortTimeout):
   642  	}
   643  
   644  	// Now configure the resource for cluster C in the management server,
   645  	// thereby completing the cluster graph. This should result in configuration
   646  	// being pushed down to the child policy.
   647  	resources.Clusters = append(resources.Clusters, e2e.DefaultCluster(clusterNameC, serviceName, e2e.SecurityLevelNone))
   648  	if err := mgmtServer.Update(ctx, resources); err != nil {
   649  		t.Fatal(err)
   650  	}
   651  
   652  	wantChildCfg := &clusterresolver.LBConfig{
   653  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{
   654  			{
   655  				Cluster:          clusterNameC,
   656  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   657  				EDSServiceName:   serviceName,
   658  				OutlierDetection: json.RawMessage(`{}`),
   659  				TelemetryLabels:  internal.UnknownCSMLabels,
   660  			},
   661  			{
   662  				Cluster:          clusterNameD,
   663  				Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   664  				EDSServiceName:   serviceName,
   665  				OutlierDetection: json.RawMessage(`{}`),
   666  				TelemetryLabels:  internal.UnknownCSMLabels,
   667  			},
   668  		},
   669  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   670  	}
   671  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   672  		t.Fatal(err)
   673  	}
   674  }
   675  
   676  // Tests the scenario where the aggregate cluster graph has a node that has
   677  // child node of itself. The case for this is A -> A, and since there is no base
   678  // cluster (EDS or Logical DNS), no configuration should be pushed to the child
   679  // policy.  The channel is expected to move to TRANSIENT_FAILURE and RPCs are
   680  // expected to fail with code UNAVAILABLE and an error message specifying that
   681  // the aggregate cluster graph has no leaf clusters.  Then the test updates A -> B,
   682  // where B is a leaf EDS cluster. Verifies that configuration is pushed to the
   683  // child policy and that an RPC can be successfully made.
   684  func (s) TestAggregatedCluster_NodeChildOfItself(t *testing.T) {
   685  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   686  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
   687  
   688  	const (
   689  		clusterNameA = clusterName // cluster name in cds LB policy config
   690  		clusterNameB = clusterName + "-B"
   691  	)
   692  	// Configure the management server with an aggregate cluster resource whose
   693  	// child is itself.
   694  	resources := e2e.UpdateOptions{
   695  		NodeID:         nodeID,
   696  		Clusters:       []*v3clusterpb.Cluster{makeAggregateClusterResource(clusterNameA, []string{clusterNameA})},
   697  		SkipValidation: true,
   698  	}
   699  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   700  	defer cancel()
   701  	if err := mgmtServer.Update(ctx, resources); err != nil {
   702  		t.Fatal(err)
   703  	}
   704  
   705  	select {
   706  	case cfg := <-lbCfgCh:
   707  		t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg))
   708  	case <-time.After(defaultTestShortTimeout):
   709  	}
   710  
   711  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   712  
   713  	// Verify that the RPC fails with expected code.
   714  	client := testgrpc.NewTestServiceClient(cc)
   715  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   716  	if gotCode, wantCode := status.Code(err), codes.Unavailable; gotCode != wantCode {
   717  		t.Fatalf("EmptyCall() failed with code: %v, want %v", gotCode, wantCode)
   718  	}
   719  	const wantErr = "aggregate cluster graph has no leaf clusters"
   720  	if !strings.Contains(err.Error(), wantErr) {
   721  		t.Fatalf("EmptyCall() failed with err: %v, want error containing %s", err, wantErr)
   722  	}
   723  
   724  	// Start a test service backend.
   725  	server := stubserver.StartTestService(t, nil)
   726  	t.Cleanup(server.Stop)
   727  
   728  	// Update the aggregate cluster to point to a leaf EDS cluster.
   729  	resources = e2e.UpdateOptions{
   730  		NodeID: nodeID,
   731  		Clusters: []*v3clusterpb.Cluster{
   732  			makeAggregateClusterResource(clusterNameA, []string{clusterNameB}),
   733  			e2e.DefaultCluster(clusterNameB, serviceName, e2e.SecurityLevelNone),
   734  		},
   735  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   736  		SkipValidation: true,
   737  	}
   738  	if err := mgmtServer.Update(ctx, resources); err != nil {
   739  		t.Fatal(err)
   740  	}
   741  
   742  	// Verify the configuration pushed to the child policy.
   743  	wantChildCfg := &clusterresolver.LBConfig{
   744  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   745  			Cluster:          clusterNameB,
   746  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   747  			EDSServiceName:   serviceName,
   748  			OutlierDetection: json.RawMessage(`{}`),
   749  			TelemetryLabels:  internal.UnknownCSMLabels,
   750  		}},
   751  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   752  	}
   753  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   754  		t.Fatal(err)
   755  	}
   756  
   757  	// Verify that a successful RPC can be made.
   758  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   759  		t.Fatalf("EmptyCall() failed: %v", err)
   760  	}
   761  }
   762  
   763  // Tests the scenario where the aggregate cluster graph contains a cycle and
   764  // contains no leaf clusters. The case used here is [A -> B, B -> A]. As there
   765  // are no leaf clusters in this graph, no configuration should be pushed to the
   766  // child policy. The channel is expected to move to TRANSIENT_FAILURE and RPCs
   767  // are expected to fail with code UNAVAILABLE and an error message specifying
   768  // that the aggregate cluster graph has no leaf clusters.
   769  func (s) TestAggregatedCluster_CycleWithNoLeafNode(t *testing.T) {
   770  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   771  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
   772  
   773  	const (
   774  		clusterNameA = clusterName // cluster name in cds LB policy config
   775  		clusterNameB = clusterName + "-B"
   776  	)
   777  	// Configure the management server with an aggregate cluster resource graph
   778  	// that contains a cycle and no leaf clusters.
   779  	resources := e2e.UpdateOptions{
   780  		NodeID: nodeID,
   781  		Clusters: []*v3clusterpb.Cluster{
   782  			makeAggregateClusterResource(clusterNameA, []string{clusterNameB}),
   783  			makeAggregateClusterResource(clusterNameB, []string{clusterNameA}),
   784  		},
   785  		SkipValidation: true,
   786  	}
   787  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   788  	defer cancel()
   789  	if err := mgmtServer.Update(ctx, resources); err != nil {
   790  		t.Fatal(err)
   791  	}
   792  
   793  	select {
   794  	case cfg := <-lbCfgCh:
   795  		t.Fatalf("Child policy received configuration when not expected to: %s", pretty.ToJSON(cfg))
   796  	case <-time.After(defaultTestShortTimeout):
   797  	}
   798  
   799  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   800  
   801  	// Verify that the RPC fails with expected code.
   802  	client := testgrpc.NewTestServiceClient(cc)
   803  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   804  	if gotCode, wantCode := status.Code(err), codes.Unavailable; gotCode != wantCode {
   805  		t.Fatalf("EmptyCall() failed with code: %v, want %v", gotCode, wantCode)
   806  	}
   807  	const wantErr = "aggregate cluster graph has no leaf clusters"
   808  	if !strings.Contains(err.Error(), wantErr) {
   809  		t.Fatalf("EmptyCall() failed with err: %v, want %s", err, wantErr)
   810  	}
   811  }
   812  
   813  // Tests the scenario where the aggregate cluster graph contains a cycle and
   814  // also contains a leaf cluster. The case used here is [A -> B, B -> A, C]. As
   815  // there is a leaf cluster in this graph , configuration should be pushed to the
   816  // child policy and RPCs should get routed to that leaf cluster.
   817  func (s) TestAggregatedCluster_CycleWithLeafNode(t *testing.T) {
   818  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   819  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
   820  
   821  	// Start a test service backend.
   822  	server := stubserver.StartTestService(t, nil)
   823  	t.Cleanup(server.Stop)
   824  
   825  	const (
   826  		clusterNameA = clusterName // cluster name in cds LB policy config
   827  		clusterNameB = clusterName + "-B"
   828  		clusterNameC = clusterName + "-C"
   829  	)
   830  	// Configure the management server with an aggregate cluster resource graph
   831  	// that contains a cycle, but also contains a leaf cluster.
   832  	resources := e2e.UpdateOptions{
   833  		NodeID: nodeID,
   834  		Clusters: []*v3clusterpb.Cluster{
   835  			makeAggregateClusterResource(clusterNameA, []string{clusterNameB}),
   836  			makeAggregateClusterResource(clusterNameB, []string{clusterNameA, clusterNameC}),
   837  			e2e.DefaultCluster(clusterNameC, serviceName, e2e.SecurityLevelNone),
   838  		},
   839  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   840  		SkipValidation: true,
   841  	}
   842  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   843  	defer cancel()
   844  	if err := mgmtServer.Update(ctx, resources); err != nil {
   845  		t.Fatal(err)
   846  	}
   847  
   848  	// Verify the configuration pushed to the child policy.
   849  	wantChildCfg := &clusterresolver.LBConfig{
   850  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   851  			Cluster:          clusterNameC,
   852  			Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   853  			EDSServiceName:   serviceName,
   854  			OutlierDetection: json.RawMessage(`{}`),
   855  			TelemetryLabels:  internal.UnknownCSMLabels,
   856  		}},
   857  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   858  	}
   859  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   860  		t.Fatal(err)
   861  	}
   862  
   863  	// Verify that a successful RPC can be made.
   864  	client := testgrpc.NewTestServiceClient(cc)
   865  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   866  		t.Fatalf("EmptyCall() failed: %v", err)
   867  	}
   868  }