google.golang.org/grpc@v1.72.2/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go (about)

     1  /*
     2   * Copyright 2019 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cdsbalancer
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"net"
    24  	"strings"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/google/go-cmp/cmp"
    29  	"github.com/google/uuid"
    30  	"google.golang.org/grpc"
    31  	"google.golang.org/grpc/balancer"
    32  	"google.golang.org/grpc/codes"
    33  	"google.golang.org/grpc/connectivity"
    34  	"google.golang.org/grpc/credentials/insecure"
    35  	"google.golang.org/grpc/internal"
    36  	"google.golang.org/grpc/internal/balancer/stub"
    37  	"google.golang.org/grpc/internal/grpctest"
    38  	"google.golang.org/grpc/internal/stubserver"
    39  	"google.golang.org/grpc/internal/testutils"
    40  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    41  	"google.golang.org/grpc/internal/xds/bootstrap"
    42  	"google.golang.org/grpc/resolver"
    43  	"google.golang.org/grpc/resolver/manual"
    44  	"google.golang.org/grpc/serviceconfig"
    45  	"google.golang.org/grpc/status"
    46  	xdsinternal "google.golang.org/grpc/xds/internal"
    47  	"google.golang.org/grpc/xds/internal/balancer/clusterresolver"
    48  	"google.golang.org/grpc/xds/internal/xdsclient"
    49  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    50  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version"
    51  	"google.golang.org/protobuf/types/known/durationpb"
    52  	"google.golang.org/protobuf/types/known/wrapperspb"
    53  
    54  	v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    55  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    56  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    57  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    58  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    59  	testpb "google.golang.org/grpc/interop/grpc_testing"
    60  
    61  	_ "google.golang.org/grpc/xds/internal/balancer/ringhash" // Register the ring_hash LB policy
    62  )
    63  
    64  const (
    65  	clusterName             = "cluster1"
    66  	edsClusterName          = clusterName + "-eds"
    67  	dnsClusterName          = clusterName + "-dns"
    68  	serviceName             = "service1"
    69  	dnsHostName             = "dns_host"
    70  	dnsPort                 = uint32(8080)
    71  	defaultTestTimeout      = 5 * time.Second
    72  	defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen.
    73  )
    74  
    75  type s struct {
    76  	grpctest.Tester
    77  }
    78  
    79  func Test(t *testing.T) {
    80  	grpctest.RunSubTests(t, s{})
    81  }
    82  
    83  func waitForResourceNames(ctx context.Context, resourceNamesCh chan []string, wantNames []string) error {
    84  	for ctx.Err() == nil {
    85  		select {
    86  		case <-ctx.Done():
    87  		case gotNames := <-resourceNamesCh:
    88  			if cmp.Equal(gotNames, wantNames) {
    89  				return nil
    90  			}
    91  		}
    92  	}
    93  	if ctx.Err() != nil {
    94  		return fmt.Errorf("Timeout when waiting for appropriate Cluster resources to be requested")
    95  	}
    96  	return nil
    97  }
    98  
    99  // Registers a wrapped cluster_resolver LB policy (child policy of the cds LB
   100  // policy) for the duration of this test that retains all the functionality of
   101  // the former, but makes certain events available for inspection by the test.
   102  //
   103  // Returns the following:
   104  // - a channel to read received load balancing configuration
   105  // - a channel to read received resolver error
   106  // - a channel that is closed when ExitIdle() is called
   107  // - a channel that is closed when the balancer is closed
   108  func registerWrappedClusterResolverPolicy(t *testing.T) (chan serviceconfig.LoadBalancingConfig, chan error, chan struct{}, chan struct{}) {
   109  	clusterresolverBuilder := balancer.Get(clusterresolver.Name)
   110  	internal.BalancerUnregister(clusterresolverBuilder.Name())
   111  
   112  	lbCfgCh := make(chan serviceconfig.LoadBalancingConfig, 1)
   113  	resolverErrCh := make(chan error, 1)
   114  	exitIdleCh := make(chan struct{})
   115  	closeCh := make(chan struct{})
   116  
   117  	stub.Register(clusterresolver.Name, stub.BalancerFuncs{
   118  		Init: func(bd *stub.BalancerData) {
   119  			bd.Data = clusterresolverBuilder.Build(bd.ClientConn, bd.BuildOptions)
   120  		},
   121  		ParseConfig: func(lbCfg json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   122  			return clusterresolverBuilder.(balancer.ConfigParser).ParseConfig(lbCfg)
   123  		},
   124  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   125  			select {
   126  			case lbCfgCh <- ccs.BalancerConfig:
   127  			default:
   128  			}
   129  			bal := bd.Data.(balancer.Balancer)
   130  			return bal.UpdateClientConnState(ccs)
   131  		},
   132  		ResolverError: func(bd *stub.BalancerData, err error) {
   133  			select {
   134  			case resolverErrCh <- err:
   135  			default:
   136  			}
   137  			bal := bd.Data.(balancer.Balancer)
   138  			bal.ResolverError(err)
   139  		},
   140  		ExitIdle: func(bd *stub.BalancerData) {
   141  			bal := bd.Data.(balancer.Balancer)
   142  			bal.(balancer.ExitIdler).ExitIdle()
   143  			close(exitIdleCh)
   144  		},
   145  		Close: func(bd *stub.BalancerData) {
   146  			bal := bd.Data.(balancer.Balancer)
   147  			bal.Close()
   148  			close(closeCh)
   149  		},
   150  	})
   151  	t.Cleanup(func() { balancer.Register(clusterresolverBuilder) })
   152  
   153  	return lbCfgCh, resolverErrCh, exitIdleCh, closeCh
   154  }
   155  
   156  // Registers a wrapped cds LB policy for the duration of this test that retains
   157  // all the functionality of the original cds LB policy, but makes the newly
   158  // built policy available to the test to directly invoke any balancer methods.
   159  //
   160  // Returns a channel on which the newly built cds LB policy is written to.
   161  func registerWrappedCDSPolicy(t *testing.T) chan balancer.Balancer {
   162  	cdsBuilder := balancer.Get(cdsName)
   163  	internal.BalancerUnregister(cdsBuilder.Name())
   164  	cdsBalancerCh := make(chan balancer.Balancer, 1)
   165  	stub.Register(cdsBuilder.Name(), stub.BalancerFuncs{
   166  		Init: func(bd *stub.BalancerData) {
   167  			bal := cdsBuilder.Build(bd.ClientConn, bd.BuildOptions)
   168  			bd.Data = bal
   169  			cdsBalancerCh <- bal
   170  		},
   171  		ParseConfig: func(lbCfg json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   172  			return cdsBuilder.(balancer.ConfigParser).ParseConfig(lbCfg)
   173  		},
   174  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   175  			bal := bd.Data.(balancer.Balancer)
   176  			return bal.UpdateClientConnState(ccs)
   177  		},
   178  		Close: func(bd *stub.BalancerData) {
   179  			bal := bd.Data.(balancer.Balancer)
   180  			bal.Close()
   181  		},
   182  	})
   183  	t.Cleanup(func() { balancer.Register(cdsBuilder) })
   184  
   185  	return cdsBalancerCh
   186  }
   187  
   188  // Performs the following setup required for tests:
   189  //   - Spins up an xDS management server
   190  //   - Creates an xDS client talking to this management server
   191  //   - Creates a manual resolver that configures the cds LB policy as the
   192  //     top-level policy, and pushes an initial configuration to it
   193  //   - Creates a gRPC channel with the above manual resolver
   194  //
   195  // Returns the following:
   196  //   - the xDS management server
   197  //   - the nodeID expected by the management server
   198  //   - the grpc channel to the test backend service
   199  //   - the manual resolver configured on the channel
   200  //   - the xDS client used the grpc channel
   201  //   - a channel on which requested cluster resource names are sent
   202  //   - a channel used to signal that previously requested cluster resources are
   203  //     no longer requested
   204  func setupWithManagementServer(t *testing.T) (*e2e.ManagementServer, string, *grpc.ClientConn, *manual.Resolver, xdsclient.XDSClient, chan []string, chan struct{}) {
   205  	return setupWithManagementServerAndListener(t, nil)
   206  }
   207  
   208  // Same as setupWithManagementServer, but also allows the caller to specify
   209  // a listener to be used by the management server.
   210  func setupWithManagementServerAndListener(t *testing.T, lis net.Listener) (*e2e.ManagementServer, string, *grpc.ClientConn, *manual.Resolver, xdsclient.XDSClient, chan []string, chan struct{}) {
   211  	t.Helper()
   212  
   213  	cdsResourceRequestedCh := make(chan []string, 1)
   214  	cdsResourceCanceledCh := make(chan struct{}, 1)
   215  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   216  		Listener: lis,
   217  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
   218  			if req.GetTypeUrl() == version.V3ClusterURL {
   219  				switch len(req.GetResourceNames()) {
   220  				case 0:
   221  					select {
   222  					case cdsResourceCanceledCh <- struct{}{}:
   223  					default:
   224  					}
   225  				default:
   226  					select {
   227  					case cdsResourceRequestedCh <- req.GetResourceNames():
   228  					default:
   229  					}
   230  				}
   231  			}
   232  			return nil
   233  		},
   234  		// Required for aggregate clusters as all resources cannot be requested
   235  		// at once.
   236  		AllowResourceSubset: true,
   237  	})
   238  
   239  	// Create bootstrap configuration pointing to the above management server.
   240  	nodeID := uuid.New().String()
   241  	bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address)
   242  
   243  	config, err := bootstrap.NewConfigFromContents(bc)
   244  	if err != nil {
   245  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err)
   246  	}
   247  	pool := xdsclient.NewPool(config)
   248  	xdsC, xdsClose, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{
   249  		Name: t.Name(),
   250  	})
   251  	if err != nil {
   252  		t.Fatalf("Failed to create xDS client: %v", err)
   253  	}
   254  	t.Cleanup(xdsClose)
   255  
   256  	r := manual.NewBuilderWithScheme("whatever")
   257  	jsonSC := fmt.Sprintf(`{
   258  			"loadBalancingConfig":[{
   259  				"cds_experimental":{
   260  					"cluster": "%s"
   261  				}
   262  			}]
   263  		}`, clusterName)
   264  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
   265  	r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsC))
   266  
   267  	cc, err := grpc.NewClient(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r))
   268  	if err != nil {
   269  		t.Fatalf("grpc.NewClient(%q) = %v", lis.Addr().String(), err)
   270  	}
   271  	cc.Connect()
   272  	t.Cleanup(func() { cc.Close() })
   273  
   274  	return mgmtServer, nodeID, cc, r, xdsC, cdsResourceRequestedCh, cdsResourceCanceledCh
   275  }
   276  
   277  // Helper function to compare the load balancing configuration received on the
   278  // channel with the expected one. Both configs are marshalled to JSON and then
   279  // compared.
   280  //
   281  // Returns an error if marshalling to JSON fails, or if the load balancing
   282  // configurations don't match, or if the context deadline expires before reading
   283  // a child policy configuration off of the lbCfgCh.
   284  func compareLoadBalancingConfig(ctx context.Context, lbCfgCh chan serviceconfig.LoadBalancingConfig, wantChildCfg serviceconfig.LoadBalancingConfig) error {
   285  	wantJSON, err := json.Marshal(wantChildCfg)
   286  	if err != nil {
   287  		return fmt.Errorf("failed to marshal expected child config to JSON: %v", err)
   288  	}
   289  	select {
   290  	case lbCfg := <-lbCfgCh:
   291  		gotJSON, err := json.Marshal(lbCfg)
   292  		if err != nil {
   293  			return fmt.Errorf("failed to marshal received LB config into JSON: %v", err)
   294  		}
   295  		if diff := cmp.Diff(wantJSON, gotJSON); diff != "" {
   296  			return fmt.Errorf("child policy received unexpected diff in config (-want +got):\n%s", diff)
   297  		}
   298  	case <-ctx.Done():
   299  		return fmt.Errorf("timeout when waiting for child policy to receive its configuration")
   300  	}
   301  	return nil
   302  }
   303  
   304  func verifyRPCError(gotErr error, wantCode codes.Code, wantErr, wantNodeID string) error {
   305  	if gotErr == nil {
   306  		return fmt.Errorf("RPC succeeded when expecting an error with code %v, message %q and nodeID %q", wantCode, wantErr, wantNodeID)
   307  	}
   308  	if gotCode := status.Code(gotErr); gotCode != wantCode {
   309  		return fmt.Errorf("RPC failed with code: %v, want code %v", gotCode, wantCode)
   310  	}
   311  	if !strings.Contains(gotErr.Error(), wantErr) {
   312  		return fmt.Errorf("RPC failed with error: %v, want %q", gotErr, wantErr)
   313  	}
   314  	if !strings.Contains(gotErr.Error(), wantNodeID) {
   315  		return fmt.Errorf("RPC failed with error: %v, want nodeID %q", gotErr, wantNodeID)
   316  	}
   317  	return nil
   318  }
   319  
   320  // Tests the functionality that handles LB policy configuration. Verifies that
   321  // the appropriate xDS resource is requested corresponding to the provided LB
   322  // policy configuration. Also verifies that when the LB policy receives the same
   323  // configuration again, it does not send out a new request, and when the
   324  // configuration changes, it stops requesting the old cluster resource and
   325  // starts requesting the new one.
   326  func (s) TestConfigurationUpdate_Success(t *testing.T) {
   327  	_, _, _, r, xdsClient, cdsResourceRequestedCh, _ := setupWithManagementServer(t)
   328  
   329  	// Verify that the specified cluster resource is requested.
   330  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   331  	defer cancel()
   332  	wantNames := []string{clusterName}
   333  	if err := waitForResourceNames(ctx, cdsResourceRequestedCh, wantNames); err != nil {
   334  		t.Fatal(err)
   335  	}
   336  
   337  	// Push the same configuration again.
   338  	jsonSC := fmt.Sprintf(`{
   339  			"loadBalancingConfig":[{
   340  				"cds_experimental":{
   341  					"cluster": "%s"
   342  				}
   343  			}]
   344  		}`, clusterName)
   345  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
   346  	r.UpdateState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient))
   347  
   348  	// Verify that a new CDS request is not sent.
   349  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   350  	defer sCancel()
   351  	select {
   352  	case <-sCtx.Done():
   353  	case gotNames := <-cdsResourceRequestedCh:
   354  		t.Fatalf("CDS resources %v requested when none expected", gotNames)
   355  	}
   356  
   357  	// Push an updated configuration with a different cluster name.
   358  	newClusterName := clusterName + "-new"
   359  	jsonSC = fmt.Sprintf(`{
   360  			"loadBalancingConfig":[{
   361  				"cds_experimental":{
   362  					"cluster": "%s"
   363  				}
   364  			}]
   365  		}`, newClusterName)
   366  	scpr = internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
   367  	r.UpdateState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient))
   368  
   369  	// Verify that the new cluster name is requested and the old one is no
   370  	// longer requested.
   371  	wantNames = []string{newClusterName}
   372  	if err := waitForResourceNames(ctx, cdsResourceRequestedCh, wantNames); err != nil {
   373  		t.Fatal(err)
   374  	}
   375  }
   376  
   377  // Tests the case where a configuration with an empty cluster name is pushed to
   378  // the CDS LB policy. Verifies that ErrBadResolverState is returned.
   379  func (s) TestConfigurationUpdate_EmptyCluster(t *testing.T) {
   380  	// Setup a management server and an xDS client to talk to it.
   381  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{})
   382  
   383  	// Create bootstrap configuration pointing to the above management server.
   384  	nodeID := uuid.New().String()
   385  	bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address)
   386  
   387  	config, err := bootstrap.NewConfigFromContents(bc)
   388  	if err != nil {
   389  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err)
   390  	}
   391  	pool := xdsclient.NewPool(config)
   392  	xdsClient, xdsClose, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{
   393  		Name: t.Name(),
   394  	})
   395  	if err != nil {
   396  		t.Fatalf("Failed to create xDS client: %v", err)
   397  	}
   398  	t.Cleanup(xdsClose)
   399  
   400  	// Create a manual resolver that configures the CDS LB policy as the
   401  	// top-level LB policy on the channel, and pushes a configuration with an
   402  	// empty cluster name. Also, register a callback with the manual resolver to
   403  	// receive the error returned by the balancer when a configuration with an
   404  	// empty cluster name is pushed.
   405  	r := manual.NewBuilderWithScheme("whatever")
   406  	updateStateErrCh := make(chan error, 1)
   407  	r.UpdateStateCallback = func(err error) { updateStateErrCh <- err }
   408  	jsonSC := `{
   409  			"loadBalancingConfig":[{
   410  				"cds_experimental":{
   411  					"cluster": ""
   412  				}
   413  			}]
   414  		}`
   415  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
   416  	r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient))
   417  
   418  	// Create a ClientConn with the above manual resolver.
   419  	cc, err := grpc.NewClient(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r))
   420  	if err != nil {
   421  		t.Fatalf("grpc.NewClient() failed: %v", err)
   422  	}
   423  	cc.Connect()
   424  	t.Cleanup(func() { cc.Close() })
   425  
   426  	select {
   427  	case <-time.After(defaultTestTimeout):
   428  		t.Fatalf("Timed out waiting for error from the LB policy")
   429  	case err := <-updateStateErrCh:
   430  		if err != balancer.ErrBadResolverState {
   431  			t.Fatalf("For a configuration update with an empty cluster name, got error %v from the LB policy, want %v", err, balancer.ErrBadResolverState)
   432  		}
   433  	}
   434  }
   435  
   436  // Tests the case where a configuration with a missing xDS client is pushed to
   437  // the CDS LB policy. Verifies that ErrBadResolverState is returned.
   438  func (s) TestConfigurationUpdate_MissingXdsClient(t *testing.T) {
   439  	// Create a manual resolver that configures the CDS LB policy as the
   440  	// top-level LB policy on the channel, and pushes a configuration that is
   441  	// missing the xDS client.  Also, register a callback with the manual
   442  	// resolver to receive the error returned by the balancer.
   443  	r := manual.NewBuilderWithScheme("whatever")
   444  	updateStateErrCh := make(chan error, 1)
   445  	r.UpdateStateCallback = func(err error) { updateStateErrCh <- err }
   446  	jsonSC := `{
   447  			"loadBalancingConfig":[{
   448  				"cds_experimental":{
   449  					"cluster": "foo"
   450  				}
   451  			}]
   452  		}`
   453  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
   454  	r.InitialState(resolver.State{ServiceConfig: scpr})
   455  
   456  	// Create a ClientConn with the above manual resolver.
   457  	cc, err := grpc.NewClient(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r))
   458  	if err != nil {
   459  		t.Fatalf("grpc.NewClient() failed: %v", err)
   460  	}
   461  	cc.Connect()
   462  	t.Cleanup(func() { cc.Close() })
   463  
   464  	select {
   465  	case <-time.After(defaultTestTimeout):
   466  		t.Fatalf("Timed out waiting for error from the LB policy")
   467  	case err := <-updateStateErrCh:
   468  		if err != balancer.ErrBadResolverState {
   469  			t.Fatalf("For a configuration update missing the xDS client, got error %v from the LB policy, want %v", err, balancer.ErrBadResolverState)
   470  		}
   471  	}
   472  }
   473  
   474  // Tests success scenarios where the cds LB policy receives a cluster resource
   475  // from the management server. Verifies that the load balancing configuration
   476  // pushed to the child is as expected.
   477  func (s) TestClusterUpdate_Success(t *testing.T) {
   478  	tests := []struct {
   479  		name            string
   480  		clusterResource *v3clusterpb.Cluster
   481  		wantChildCfg    serviceconfig.LoadBalancingConfig
   482  	}{
   483  		{
   484  			name: "happy-case-with-circuit-breakers",
   485  			clusterResource: func() *v3clusterpb.Cluster {
   486  				c := e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)
   487  				c.CircuitBreakers = &v3clusterpb.CircuitBreakers{
   488  					Thresholds: []*v3clusterpb.CircuitBreakers_Thresholds{
   489  						{
   490  							Priority:    v3corepb.RoutingPriority_DEFAULT,
   491  							MaxRequests: wrapperspb.UInt32(512),
   492  						},
   493  						{
   494  							Priority:    v3corepb.RoutingPriority_HIGH,
   495  							MaxRequests: nil,
   496  						},
   497  					},
   498  				}
   499  				return c
   500  			}(),
   501  			wantChildCfg: &clusterresolver.LBConfig{
   502  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   503  					Cluster:               clusterName,
   504  					Type:                  clusterresolver.DiscoveryMechanismTypeEDS,
   505  					EDSServiceName:        serviceName,
   506  					MaxConcurrentRequests: newUint32(512),
   507  					OutlierDetection:      json.RawMessage(`{}`),
   508  					TelemetryLabels:       xdsinternal.UnknownCSMLabels,
   509  				}},
   510  				XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   511  			},
   512  		},
   513  		{
   514  			name: "happy-case-with-ring-hash-lb-policy",
   515  			clusterResource: func() *v3clusterpb.Cluster {
   516  				c := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   517  					ClusterName:   clusterName,
   518  					ServiceName:   serviceName,
   519  					SecurityLevel: e2e.SecurityLevelNone,
   520  					Policy:        e2e.LoadBalancingPolicyRingHash,
   521  				})
   522  				c.LbConfig = &v3clusterpb.Cluster_RingHashLbConfig_{
   523  					RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{
   524  						MinimumRingSize: &wrapperspb.UInt64Value{Value: 100},
   525  						MaximumRingSize: &wrapperspb.UInt64Value{Value: 1000},
   526  					},
   527  				}
   528  				return c
   529  			}(),
   530  			wantChildCfg: &clusterresolver.LBConfig{
   531  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   532  					Cluster:          clusterName,
   533  					Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   534  					EDSServiceName:   serviceName,
   535  					OutlierDetection: json.RawMessage(`{}`),
   536  					TelemetryLabels:  xdsinternal.UnknownCSMLabels,
   537  				}},
   538  				XDSLBPolicy: json.RawMessage(`[{"ring_hash_experimental": {"minRingSize":100, "maxRingSize":1000}}]`),
   539  			},
   540  		},
   541  		{
   542  			name: "happy-case-outlier-detection-xds-defaults", // OD proto set but no proto fields set
   543  			clusterResource: func() *v3clusterpb.Cluster {
   544  				c := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   545  					ClusterName:   clusterName,
   546  					ServiceName:   serviceName,
   547  					SecurityLevel: e2e.SecurityLevelNone,
   548  					Policy:        e2e.LoadBalancingPolicyRingHash,
   549  				})
   550  				c.OutlierDetection = &v3clusterpb.OutlierDetection{}
   551  				return c
   552  			}(),
   553  			wantChildCfg: &clusterresolver.LBConfig{
   554  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   555  					Cluster:          clusterName,
   556  					Type:             clusterresolver.DiscoveryMechanismTypeEDS,
   557  					EDSServiceName:   serviceName,
   558  					OutlierDetection: json.RawMessage(`{"successRateEjection":{}}`),
   559  					TelemetryLabels:  xdsinternal.UnknownCSMLabels,
   560  				}},
   561  				XDSLBPolicy: json.RawMessage(`[{"ring_hash_experimental": {"minRingSize":1024, "maxRingSize":8388608}}]`),
   562  			},
   563  		},
   564  		{
   565  			name: "happy-case-outlier-detection-all-fields-set",
   566  			clusterResource: func() *v3clusterpb.Cluster {
   567  				c := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   568  					ClusterName:   clusterName,
   569  					ServiceName:   serviceName,
   570  					SecurityLevel: e2e.SecurityLevelNone,
   571  					Policy:        e2e.LoadBalancingPolicyRingHash,
   572  				})
   573  				c.OutlierDetection = &v3clusterpb.OutlierDetection{
   574  					Interval:                       durationpb.New(10 * time.Second),
   575  					BaseEjectionTime:               durationpb.New(30 * time.Second),
   576  					MaxEjectionTime:                durationpb.New(300 * time.Second),
   577  					MaxEjectionPercent:             wrapperspb.UInt32(10),
   578  					SuccessRateStdevFactor:         wrapperspb.UInt32(1900),
   579  					EnforcingSuccessRate:           wrapperspb.UInt32(100),
   580  					SuccessRateMinimumHosts:        wrapperspb.UInt32(5),
   581  					SuccessRateRequestVolume:       wrapperspb.UInt32(100),
   582  					FailurePercentageThreshold:     wrapperspb.UInt32(85),
   583  					EnforcingFailurePercentage:     wrapperspb.UInt32(5),
   584  					FailurePercentageMinimumHosts:  wrapperspb.UInt32(5),
   585  					FailurePercentageRequestVolume: wrapperspb.UInt32(50),
   586  				}
   587  				return c
   588  			}(),
   589  			wantChildCfg: &clusterresolver.LBConfig{
   590  				DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   591  					Cluster:        clusterName,
   592  					Type:           clusterresolver.DiscoveryMechanismTypeEDS,
   593  					EDSServiceName: serviceName,
   594  					OutlierDetection: json.RawMessage(`{
   595  						"interval": "10s",
   596  						"baseEjectionTime": "30s",
   597  						"maxEjectionTime": "300s",
   598  						"maxEjectionPercent": 10,
   599  						"successRateEjection": {
   600  							"stdevFactor": 1900,
   601  							"enforcementPercentage": 100,
   602  							"minimumHosts": 5,
   603  							"requestVolume": 100
   604  						},
   605  						"failurePercentageEjection": {
   606  							"threshold": 85,
   607  							"enforcementPercentage": 5,
   608  							"minimumHosts": 5,
   609  							"requestVolume": 50
   610  						}
   611  					}`),
   612  					TelemetryLabels: xdsinternal.UnknownCSMLabels,
   613  				}},
   614  				XDSLBPolicy: json.RawMessage(`[{"ring_hash_experimental": {"minRingSize":1024, "maxRingSize":8388608}}]`),
   615  			},
   616  		},
   617  	}
   618  
   619  	for _, test := range tests {
   620  		t.Run(test.name, func(t *testing.T) {
   621  			lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   622  			mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   623  
   624  			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   625  			defer cancel()
   626  			if err := mgmtServer.Update(ctx, e2e.UpdateOptions{
   627  				NodeID:         nodeID,
   628  				Clusters:       []*v3clusterpb.Cluster{test.clusterResource},
   629  				SkipValidation: true,
   630  			}); err != nil {
   631  				t.Fatal(err)
   632  			}
   633  
   634  			if err := compareLoadBalancingConfig(ctx, lbCfgCh, test.wantChildCfg); err != nil {
   635  				t.Fatal(err)
   636  			}
   637  		})
   638  	}
   639  }
   640  
   641  // Tests a single success scenario where the cds LB policy receives a cluster
   642  // resource from the management server with LRS enabled. Verifies that the load
   643  // balancing configuration pushed to the child is as expected.
   644  func (s) TestClusterUpdate_SuccessWithLRS(t *testing.T) {
   645  	lbCfgCh, _, _, _ := registerWrappedClusterResolverPolicy(t)
   646  	mgmtServer, nodeID, _, _, _, _, _ := setupWithManagementServer(t)
   647  
   648  	clusterResource := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   649  		ClusterName: clusterName,
   650  		ServiceName: serviceName,
   651  		EnableLRS:   true,
   652  	})
   653  	lrsServerCfg, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{URI: fmt.Sprintf("passthrough:///%s", mgmtServer.Address)})
   654  	if err != nil {
   655  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   656  	}
   657  
   658  	wantChildCfg := &clusterresolver.LBConfig{
   659  		DiscoveryMechanisms: []clusterresolver.DiscoveryMechanism{{
   660  			Cluster:             clusterName,
   661  			Type:                clusterresolver.DiscoveryMechanismTypeEDS,
   662  			EDSServiceName:      serviceName,
   663  			LoadReportingServer: lrsServerCfg,
   664  			OutlierDetection:    json.RawMessage(`{}`),
   665  			TelemetryLabels:     xdsinternal.UnknownCSMLabels,
   666  		}},
   667  		XDSLBPolicy: json.RawMessage(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`),
   668  	}
   669  
   670  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   671  	defer cancel()
   672  	if err := mgmtServer.Update(ctx, e2e.UpdateOptions{
   673  		NodeID:         nodeID,
   674  		Clusters:       []*v3clusterpb.Cluster{clusterResource},
   675  		SkipValidation: true,
   676  	}); err != nil {
   677  		t.Fatal(err)
   678  	}
   679  
   680  	if err := compareLoadBalancingConfig(ctx, lbCfgCh, wantChildCfg); err != nil {
   681  		t.Fatal(err)
   682  	}
   683  }
   684  
   685  // Tests scenarios for a bad cluster update received from the management server.
   686  //
   687  //   - when a bad cluster resource update is received without any previous good
   688  //     update from the management server, the cds LB policy is expected to put
   689  //     the channel in TRANSIENT_FAILURE.
   690  //   - when a bad cluster resource update is received after a previous good
   691  //     update from the management server, the cds LB policy is expected to
   692  //     continue using the previous good update.
   693  func (s) TestClusterUpdate_Failure(t *testing.T) {
   694  	_, resolverErrCh, _, _ := registerWrappedClusterResolverPolicy(t)
   695  	mgmtServer, nodeID, cc, _, _, cdsResourceRequestedCh, cdsResourceCanceledCh := setupWithManagementServer(t)
   696  
   697  	// Verify that the specified cluster resource is requested.
   698  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   699  	defer cancel()
   700  	wantNames := []string{clusterName}
   701  	if err := waitForResourceNames(ctx, cdsResourceRequestedCh, wantNames); err != nil {
   702  		t.Fatal(err)
   703  	}
   704  
   705  	// Configure the management server to return a cluster resource that
   706  	// contains a config_source_specifier for the `lrs_server` field which is not
   707  	// set to `self`, and hence is expected to be NACKed by the client.
   708  	cluster := e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)
   709  	cluster.LrsServer = &v3corepb.ConfigSource{ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{}}
   710  	resources := e2e.UpdateOptions{
   711  		NodeID:         nodeID,
   712  		Clusters:       []*v3clusterpb.Cluster{cluster},
   713  		SkipValidation: true,
   714  	}
   715  	if err := mgmtServer.Update(ctx, resources); err != nil {
   716  		t.Fatal(err)
   717  	}
   718  
   719  	// Verify that the watch for the cluster resource is not cancelled.
   720  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   721  	defer sCancel()
   722  	select {
   723  	case <-sCtx.Done():
   724  	case <-cdsResourceCanceledCh:
   725  		t.Fatal("Watch for cluster resource is cancelled when not expected to")
   726  	}
   727  
   728  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   729  
   730  	// Ensure that the NACK error and the xDS node ID are propagated to the RPC
   731  	// caller.
   732  	const wantClusterNACKErr = "unsupported config_source_specifier"
   733  	client := testgrpc.NewTestServiceClient(cc)
   734  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   735  	if err := verifyRPCError(err, codes.Unavailable, wantClusterNACKErr, nodeID); err != nil {
   736  		t.Fatal(err)
   737  	}
   738  
   739  	// Start a test service backend.
   740  	server := stubserver.StartTestService(t, nil)
   741  	t.Cleanup(server.Stop)
   742  
   743  	// Configure cluster and endpoints resources in the management server.
   744  	resources = e2e.UpdateOptions{
   745  		NodeID:         nodeID,
   746  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
   747  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   748  		SkipValidation: true,
   749  	}
   750  	if err := mgmtServer.Update(ctx, resources); err != nil {
   751  		t.Fatal(err)
   752  	}
   753  
   754  	// Verify that a successful RPC can be made.
   755  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   756  		t.Fatalf("EmptyCall() failed: %v", err)
   757  	}
   758  
   759  	// Send the bad cluster resource again.
   760  	resources = e2e.UpdateOptions{
   761  		NodeID:         nodeID,
   762  		Clusters:       []*v3clusterpb.Cluster{cluster},
   763  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   764  		SkipValidation: true,
   765  	}
   766  	if err := mgmtServer.Update(ctx, resources); err != nil {
   767  		t.Fatal(err)
   768  	}
   769  
   770  	// Verify that the watch for the cluster resource is not cancelled.
   771  	sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout)
   772  	defer sCancel()
   773  	select {
   774  	case <-sCtx.Done():
   775  	case <-cdsResourceCanceledCh:
   776  		t.Fatal("Watch for cluster resource is cancelled when not expected to")
   777  	}
   778  
   779  	// Verify that a successful RPC can be made, using the previously received
   780  	// good configuration.
   781  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   782  		t.Fatalf("EmptyCall() failed: %v", err)
   783  	}
   784  
   785  	// Verify that the resolver error is pushed to the child policy.
   786  	select {
   787  	case err := <-resolverErrCh:
   788  		if !strings.Contains(err.Error(), wantClusterNACKErr) {
   789  			t.Fatalf("Error pushed to child policy is %v, want %v", err, wantClusterNACKErr)
   790  		}
   791  	case <-ctx.Done():
   792  		t.Fatal("Timeout when waiting for resolver error to be pushed to the child policy")
   793  	}
   794  }
   795  
   796  // Tests the following scenarios for resolver errors:
   797  //   - when a resolver error is received without any previous good update from the
   798  //     management server, the cds LB policy is expected to put the channel in
   799  //     TRANSIENT_FAILURE.
   800  //   - when a resolver error is received (one that is not a resource-not-found
   801  //     error), with a previous good update from the management server, the cds LB
   802  //     policy is expected to push the error down the child policy, but is expected
   803  //     to continue to use the previously received good configuration.
   804  //   - when a resolver error is received (one that is a resource-not-found
   805  //     error, which is usually the case when the LDS resource is removed),
   806  //     with a previous good update from the management server, the cds LB policy
   807  //     is expected to push the error down the child policy and put the channel in
   808  //     TRANSIENT_FAILURE. It is also expected to cancel the CDS watch.
   809  func (s) TestResolverError(t *testing.T) {
   810  	_, resolverErrCh, _, childPolicyCloseCh := registerWrappedClusterResolverPolicy(t)
   811  	lis := testutils.NewListenerWrapper(t, nil)
   812  	mgmtServer, nodeID, cc, r, _, cdsResourceRequestedCh, cdsResourceCanceledCh := setupWithManagementServerAndListener(t, lis)
   813  
   814  	// Grab the wrapped connection from the listener wrapper. This will be used
   815  	// to verify the connection is closed.
   816  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   817  	defer cancel()
   818  	val, err := lis.NewConnCh.Receive(ctx)
   819  	if err != nil {
   820  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   821  	}
   822  	conn := val.(*testutils.ConnWrapper)
   823  
   824  	// Verify that the specified cluster resource is requested.
   825  	wantNames := []string{clusterName}
   826  	if err := waitForResourceNames(ctx, cdsResourceRequestedCh, wantNames); err != nil {
   827  		t.Fatal(err)
   828  	}
   829  
   830  	// Push a resolver error that is not a resource-not-found error. Here, we
   831  	// assume that errors from the xDS client or from the xDS resolver contain
   832  	// the xDS node ID.
   833  	resolverErr := fmt.Errorf("[xds node id: %s]: resolver-error-not-a-resource-not-found-error", nodeID)
   834  	r.CC().ReportError(resolverErr)
   835  
   836  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   837  
   838  	// Drain the resolver error channel.
   839  	select {
   840  	case <-resolverErrCh:
   841  	default:
   842  	}
   843  
   844  	// Ensure that the resolver error is propagated to the RPC caller.
   845  	client := testgrpc.NewTestServiceClient(cc)
   846  	_, err = client.EmptyCall(ctx, &testpb.Empty{})
   847  	if err := verifyRPCError(err, codes.Unavailable, resolverErr.Error(), nodeID); err != nil {
   848  		t.Fatal(err)
   849  	}
   850  
   851  	// Also verify that the watch for the cluster resource is not cancelled.
   852  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   853  	defer sCancel()
   854  	select {
   855  	case <-sCtx.Done():
   856  	case <-cdsResourceCanceledCh:
   857  		t.Fatal("Watch for cluster resource is cancelled when not expected to")
   858  	}
   859  
   860  	// Start a test service backend.
   861  	server := stubserver.StartTestService(t, nil)
   862  	t.Cleanup(server.Stop)
   863  
   864  	// Configure good cluster and endpoints resources in the management server.
   865  	resources := e2e.UpdateOptions{
   866  		NodeID:         nodeID,
   867  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
   868  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   869  		SkipValidation: true,
   870  	}
   871  	if err := mgmtServer.Update(ctx, resources); err != nil {
   872  		t.Fatal(err)
   873  	}
   874  
   875  	// Verify that a successful RPC can be made.
   876  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   877  		t.Fatalf("EmptyCall() failed: %v", err)
   878  	}
   879  
   880  	// Again push a resolver error that is not a resource-not-found error.
   881  	r.CC().ReportError(resolverErr)
   882  
   883  	// And again verify that the watch for the cluster resource is not
   884  	// cancelled.
   885  	sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout)
   886  	defer sCancel()
   887  	select {
   888  	case <-sCtx.Done():
   889  	case <-cdsResourceCanceledCh:
   890  		t.Fatal("Watch for cluster resource is cancelled when not expected to")
   891  	}
   892  
   893  	// Verify that a successful RPC can be made, using the previously received
   894  	// good configuration.
   895  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   896  		t.Fatalf("EmptyCall() failed: %v", err)
   897  	}
   898  
   899  	// Verify that the resolver error is pushed to the child policy.
   900  	select {
   901  	case err := <-resolverErrCh:
   902  		if err != resolverErr {
   903  			t.Fatalf("Error pushed to child policy is %v, want %v", err, resolverErr)
   904  		}
   905  	case <-ctx.Done():
   906  		t.Fatal("Timeout when waiting for resolver error to be pushed to the child policy")
   907  	}
   908  
   909  	// Push a resource-not-found-error this time around. Our xDS resolver does
   910  	// not send this error though. When an LDS or RDS resource is missing, the
   911  	// xDS resolver instead sends an erroring config selector which returns an
   912  	// error at RPC time with the xDS node ID, for new RPCs. Once ongoing RPCs
   913  	// complete, the xDS resolver will send an empty service config with no
   914  	// addresses, which will result in pick_first being configured on the
   915  	// channel. And pick_first will put the channel in TRANSIENT_FAILURE since
   916  	// it would have received an update with no addresses.
   917  	resolverErr = fmt.Errorf("[xds node id: %s]: %w", nodeID, xdsresource.NewError(xdsresource.ErrorTypeResourceNotFound, "xds resource not found error"))
   918  	r.CC().ReportError(resolverErr)
   919  
   920  	// Wait for the CDS resource to be not requested anymore, or the connection
   921  	// to the management server to be closed (which happens as part of the last
   922  	// resource watch being canceled).
   923  	select {
   924  	case <-ctx.Done():
   925  		t.Fatal("Timeout when waiting for CDS resource to be not requested")
   926  	case <-cdsResourceCanceledCh:
   927  	case <-conn.CloseCh.C:
   928  	}
   929  
   930  	// Verify that the resolver error is pushed to the child policy.
   931  	select {
   932  	case <-childPolicyCloseCh:
   933  	case <-ctx.Done():
   934  		t.Fatal("Timeout when waiting for child policy to be closed")
   935  	}
   936  
   937  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   938  
   939  	// Ensure that the resolver error is propagated to the RPC caller.
   940  	_, err = client.EmptyCall(ctx, &testpb.Empty{})
   941  	if err := verifyRPCError(err, codes.Unavailable, resolverErr.Error(), nodeID); err != nil {
   942  		t.Fatal(err)
   943  	}
   944  }
   945  
   946  // Tests scenarios involving removal of a cluster resource from the management
   947  // server.
   948  //
   949  //   - when the cluster resource is removed after a previous good
   950  //     update from the management server, the cds LB policy is expected to put
   951  //     the channel in TRANSIENT_FAILURE.
   952  //   - when the cluster resource is re-sent by the management server, RPCs
   953  //     should start succeeding.
   954  func (s) TestClusterUpdate_ResourceNotFound(t *testing.T) {
   955  	mgmtServer, nodeID, cc, _, _, cdsResourceRequestedCh, cdsResourceCanceledCh := setupWithManagementServer(t)
   956  
   957  	// Verify that the specified cluster resource is requested.
   958  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   959  	defer cancel()
   960  	wantNames := []string{clusterName}
   961  	if err := waitForResourceNames(ctx, cdsResourceRequestedCh, wantNames); err != nil {
   962  		t.Fatal(err)
   963  	}
   964  
   965  	// Start a test service backend.
   966  	server := stubserver.StartTestService(t, nil)
   967  	t.Cleanup(server.Stop)
   968  
   969  	// Configure cluster and endpoints resources in the management server.
   970  	resources := e2e.UpdateOptions{
   971  		NodeID:         nodeID,
   972  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
   973  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
   974  		SkipValidation: true,
   975  	}
   976  	if err := mgmtServer.Update(ctx, resources); err != nil {
   977  		t.Fatal(err)
   978  	}
   979  
   980  	// Verify that a successful RPC can be made.
   981  	client := testgrpc.NewTestServiceClient(cc)
   982  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   983  		t.Fatalf("EmptyCall() failed: %v", err)
   984  	}
   985  
   986  	// Remove the cluster resource from the management server, triggering a
   987  	// resource-not-found error.
   988  	resources.Clusters = nil
   989  	if err := mgmtServer.Update(ctx, resources); err != nil {
   990  		t.Fatal(err)
   991  	}
   992  
   993  	// Verify that the watch for the cluster resource is not cancelled.
   994  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   995  	defer sCancel()
   996  	select {
   997  	case <-sCtx.Done():
   998  	case <-cdsResourceCanceledCh:
   999  		t.Fatal("Watch for cluster resource is cancelled when not expected to")
  1000  	}
  1001  
  1002  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
  1003  
  1004  	// Ensure RPC fails with Unavailable status code and the error message is
  1005  	// meaningful and contains the xDS node ID.
  1006  	wantErr := fmt.Sprintf("cluster %q not found", clusterName)
  1007  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
  1008  	if err := verifyRPCError(err, codes.Unavailable, wantErr, nodeID); err != nil {
  1009  		t.Fatal(err)
  1010  	}
  1011  
  1012  	// Re-add the cluster resource to the management server.
  1013  	resources = e2e.UpdateOptions{
  1014  		NodeID:         nodeID,
  1015  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
  1016  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
  1017  		SkipValidation: true,
  1018  	}
  1019  	if err := mgmtServer.Update(ctx, resources); err != nil {
  1020  		t.Fatal(err)
  1021  	}
  1022  
  1023  	// Verify that a successful RPC can be made.
  1024  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
  1025  		t.Fatalf("EmptyCall() failed: %v", err)
  1026  	}
  1027  }
  1028  
  1029  // Tests that closing the cds LB policy results in the the child policy being
  1030  // closed.
  1031  func (s) TestClose(t *testing.T) {
  1032  	cdsBalancerCh := registerWrappedCDSPolicy(t)
  1033  	_, _, _, childPolicyCloseCh := registerWrappedClusterResolverPolicy(t)
  1034  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
  1035  
  1036  	// Start a test service backend.
  1037  	server := stubserver.StartTestService(t, nil)
  1038  	t.Cleanup(server.Stop)
  1039  
  1040  	// Configure cluster and endpoints resources in the management server.
  1041  	resources := e2e.UpdateOptions{
  1042  		NodeID:         nodeID,
  1043  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
  1044  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
  1045  		SkipValidation: true,
  1046  	}
  1047  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1048  	defer cancel()
  1049  	if err := mgmtServer.Update(ctx, resources); err != nil {
  1050  		t.Fatal(err)
  1051  	}
  1052  
  1053  	// Verify that a successful RPC can be made.
  1054  	client := testgrpc.NewTestServiceClient(cc)
  1055  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
  1056  		t.Fatalf("EmptyCall() failed: %v", err)
  1057  	}
  1058  
  1059  	// Retrieve the cds LB policy and close it.
  1060  	var cdsBal balancer.Balancer
  1061  	select {
  1062  	case cdsBal = <-cdsBalancerCh:
  1063  	case <-ctx.Done():
  1064  		t.Fatal("Timeout when waiting for cds LB policy to be created")
  1065  	}
  1066  	cdsBal.Close()
  1067  
  1068  	// Wait for the child policy to be closed.
  1069  	select {
  1070  	case <-ctx.Done():
  1071  		t.Fatal("Timeout when waiting for the child policy to be closed")
  1072  	case <-childPolicyCloseCh:
  1073  	}
  1074  }
  1075  
  1076  // Tests that calling ExitIdle on the cds LB policy results in the call being
  1077  // propagated to the child policy.
  1078  func (s) TestExitIdle(t *testing.T) {
  1079  	cdsBalancerCh := registerWrappedCDSPolicy(t)
  1080  	_, _, exitIdleCh, _ := registerWrappedClusterResolverPolicy(t)
  1081  	mgmtServer, nodeID, cc, _, _, _, _ := setupWithManagementServer(t)
  1082  
  1083  	// Start a test service backend.
  1084  	server := stubserver.StartTestService(t, nil)
  1085  	t.Cleanup(server.Stop)
  1086  
  1087  	// Configure cluster and endpoints resources in the management server.
  1088  	resources := e2e.UpdateOptions{
  1089  		NodeID:         nodeID,
  1090  		Clusters:       []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, serviceName, e2e.SecurityLevelNone)},
  1091  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(serviceName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})},
  1092  		SkipValidation: true,
  1093  	}
  1094  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1095  	defer cancel()
  1096  	if err := mgmtServer.Update(ctx, resources); err != nil {
  1097  		t.Fatal(err)
  1098  	}
  1099  
  1100  	// Verify that a successful RPC can be made.
  1101  	client := testgrpc.NewTestServiceClient(cc)
  1102  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
  1103  		t.Fatalf("EmptyCall() failed: %v", err)
  1104  	}
  1105  
  1106  	// Retrieve the cds LB policy and call ExitIdle() on it.
  1107  	var cdsBal balancer.Balancer
  1108  	select {
  1109  	case cdsBal = <-cdsBalancerCh:
  1110  	case <-ctx.Done():
  1111  		t.Fatal("Timeout when waiting for cds LB policy to be created")
  1112  	}
  1113  	cdsBal.(balancer.ExitIdler).ExitIdle()
  1114  
  1115  	// Wait for ExitIdle to be called on the child policy.
  1116  	select {
  1117  	case <-ctx.Done():
  1118  		t.Fatal("Timeout when waiting for the child policy to be closed")
  1119  	case <-exitIdleCh:
  1120  	}
  1121  }
  1122  
  1123  // TestParseConfig verifies the ParseConfig() method in the CDS balancer.
  1124  func (s) TestParseConfig(t *testing.T) {
  1125  	bb := balancer.Get(cdsName)
  1126  	if bb == nil {
  1127  		t.Fatalf("balancer.Get(%q) returned nil", cdsName)
  1128  	}
  1129  	parser, ok := bb.(balancer.ConfigParser)
  1130  	if !ok {
  1131  		t.Fatalf("balancer %q does not implement the ConfigParser interface", cdsName)
  1132  	}
  1133  
  1134  	tests := []struct {
  1135  		name    string
  1136  		input   json.RawMessage
  1137  		wantCfg serviceconfig.LoadBalancingConfig
  1138  		wantErr bool
  1139  	}{
  1140  		{
  1141  			name:    "good-config",
  1142  			input:   json.RawMessage(`{"Cluster": "cluster1"}`),
  1143  			wantCfg: &lbConfig{ClusterName: "cluster1"},
  1144  		},
  1145  		{
  1146  			name:    "unknown-fields-in-config",
  1147  			input:   json.RawMessage(`{"Unknown": "foobar"}`),
  1148  			wantCfg: &lbConfig{ClusterName: ""},
  1149  		},
  1150  		{
  1151  			name:    "empty-config",
  1152  			input:   json.RawMessage(""),
  1153  			wantErr: true,
  1154  		},
  1155  		{
  1156  			name:    "bad-config",
  1157  			input:   json.RawMessage(`{"Cluster": 5}`),
  1158  			wantErr: true,
  1159  		},
  1160  	}
  1161  
  1162  	for _, test := range tests {
  1163  		t.Run(test.name, func(t *testing.T) {
  1164  			gotCfg, gotErr := parser.ParseConfig(test.input)
  1165  			if (gotErr != nil) != test.wantErr {
  1166  				t.Fatalf("ParseConfig(%v) = %v, wantErr %v", string(test.input), gotErr, test.wantErr)
  1167  			}
  1168  			if test.wantErr {
  1169  				return
  1170  			}
  1171  			if !cmp.Equal(gotCfg, test.wantCfg) {
  1172  				t.Fatalf("ParseConfig(%v) = %v, want %v", string(test.input), gotCfg, test.wantCfg)
  1173  			}
  1174  		})
  1175  	}
  1176  }
  1177  
  1178  func newUint32(i uint32) *uint32 {
  1179  	return &i
  1180  }