google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterimpl/balancer_test.go (about)

     1  /*
     2   *
     3   * Copyright 2020 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package clusterimpl
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/google/go-cmp/cmp"
    31  	"github.com/google/go-cmp/cmp/cmpopts"
    32  	"google.golang.org/grpc/balancer"
    33  	"google.golang.org/grpc/balancer/base"
    34  	"google.golang.org/grpc/balancer/roundrobin"
    35  	"google.golang.org/grpc/connectivity"
    36  	"google.golang.org/grpc/internal"
    37  	"google.golang.org/grpc/internal/balancer/stub"
    38  	"google.golang.org/grpc/internal/grpctest"
    39  	internalserviceconfig "google.golang.org/grpc/internal/serviceconfig"
    40  	"google.golang.org/grpc/internal/testutils"
    41  	"google.golang.org/grpc/internal/xds"
    42  	"google.golang.org/grpc/internal/xds/bootstrap"
    43  	"google.golang.org/grpc/resolver"
    44  	"google.golang.org/grpc/serviceconfig"
    45  	xdsinternal "google.golang.org/grpc/xds/internal"
    46  	"google.golang.org/grpc/xds/internal/testutils/fakeclient"
    47  	"google.golang.org/grpc/xds/internal/xdsclient"
    48  	"google.golang.org/grpc/xds/internal/xdsclient/load"
    49  
    50  	v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3"
    51  )
    52  
    53  const (
    54  	defaultTestTimeout      = 5 * time.Second
    55  	defaultShortTestTimeout = 100 * time.Microsecond
    56  
    57  	testClusterName = "test-cluster"
    58  	testServiceName = "test-eds-service"
    59  
    60  	testNamedMetricsKey1 = "test-named1"
    61  	testNamedMetricsKey2 = "test-named2"
    62  )
    63  
    64  var (
    65  	testBackendEndpoints = []resolver.Endpoint{{Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}}
    66  	cmpOpts              = cmp.Options{
    67  		cmpopts.EquateEmpty(),
    68  		cmpopts.IgnoreFields(load.Data{}, "ReportInterval"),
    69  	}
    70  	toleranceCmpOpt = cmpopts.EquateApprox(0, 1e-5)
    71  )
    72  
    73  type s struct {
    74  	grpctest.Tester
    75  }
    76  
    77  func Test(t *testing.T) {
    78  	grpctest.RunSubTests(t, s{})
    79  }
    80  
    81  func init() {
    82  	NewRandomWRR = testutils.NewTestWRR
    83  }
    84  
    85  // TestDropByCategory verifies that the balancer correctly drops the picks, and
    86  // that the drops are reported.
    87  func (s) TestDropByCategory(t *testing.T) {
    88  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
    89  	defer cancel()
    90  
    91  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
    92  	xdsC := fakeclient.NewClient()
    93  
    94  	builder := balancer.Get(Name)
    95  	cc := testutils.NewBalancerClientConn(t)
    96  	b := builder.Build(cc, balancer.BuildOptions{})
    97  	defer b.Close()
    98  
    99  	const (
   100  		dropReason      = "test-dropping-category"
   101  		dropNumerator   = 1
   102  		dropDenominator = 2
   103  	)
   104  	testLRSServerConfig, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{
   105  		URI:          "trafficdirector.googleapis.com:443",
   106  		ChannelCreds: []bootstrap.ChannelCreds{{Type: "google_default"}},
   107  	})
   108  	if err != nil {
   109  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   110  	}
   111  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   112  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   113  		BalancerConfig: &LBConfig{
   114  			Cluster:             testClusterName,
   115  			EDSServiceName:      testServiceName,
   116  			LoadReportingServer: testLRSServerConfig,
   117  			DropCategories: []DropConfig{{
   118  				Category:           dropReason,
   119  				RequestsPerMillion: million * dropNumerator / dropDenominator,
   120  			}},
   121  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   122  				Name: roundrobin.Name,
   123  			},
   124  		},
   125  	}); err != nil {
   126  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   127  	}
   128  
   129  	got, err := xdsC.WaitForReportLoad(ctx)
   130  	if err != nil {
   131  		t.Fatalf("xdsClient.ReportLoad failed with error: %v", err)
   132  	}
   133  	if got.Server != testLRSServerConfig {
   134  		t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got.Server, testLRSServerConfig)
   135  	}
   136  
   137  	sc1 := <-cc.NewSubConnCh
   138  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   139  	// This should get the connecting picker.
   140  	if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil {
   141  		t.Fatal(err.Error())
   142  	}
   143  
   144  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready})
   145  	// Test pick with one backend.
   146  
   147  	const rpcCount = 24
   148  	if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error {
   149  		for i := 0; i < rpcCount; i++ {
   150  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   151  			// Even RPCs are dropped.
   152  			if i%2 == 0 {
   153  				if err == nil || !strings.Contains(err.Error(), "dropped") {
   154  					return fmt.Errorf("pick.Pick, got %v, %v, want error RPC dropped", gotSCSt, err)
   155  				}
   156  				continue
   157  			}
   158  			if err != nil || gotSCSt.SubConn != sc1 {
   159  				return fmt.Errorf("picker.Pick, got %v, %v, want SubConn=%v", gotSCSt, err, sc1)
   160  			}
   161  			if gotSCSt.Done == nil {
   162  				continue
   163  			}
   164  			// Fail 1/4th of the requests that are not dropped.
   165  			if i%8 == 1 {
   166  				gotSCSt.Done(balancer.DoneInfo{Err: fmt.Errorf("test error")})
   167  			} else {
   168  				gotSCSt.Done(balancer.DoneInfo{})
   169  			}
   170  		}
   171  		return nil
   172  	}); err != nil {
   173  		t.Fatal(err.Error())
   174  	}
   175  
   176  	// Dump load data from the store and compare with expected counts.
   177  	loadStore := xdsC.LoadStore()
   178  	if loadStore == nil {
   179  		t.Fatal("loadStore is nil in xdsClient")
   180  	}
   181  	const dropCount = rpcCount * dropNumerator / dropDenominator
   182  	wantStatsData0 := []*load.Data{{
   183  		Cluster:    testClusterName,
   184  		Service:    testServiceName,
   185  		TotalDrops: dropCount,
   186  		Drops:      map[string]uint64{dropReason: dropCount},
   187  		LocalityStats: map[string]load.LocalityData{
   188  			assertString(xdsinternal.LocalityID{}.ToString): {RequestStats: load.RequestData{
   189  				Succeeded: (rpcCount - dropCount) * 3 / 4,
   190  				Errored:   (rpcCount - dropCount) / 4,
   191  				Issued:    rpcCount - dropCount,
   192  			}},
   193  		},
   194  	}}
   195  
   196  	gotStatsData0 := loadStore.Stats([]string{testClusterName})
   197  	if diff := cmp.Diff(gotStatsData0, wantStatsData0, cmpOpts); diff != "" {
   198  		t.Fatalf("got unexpected reports, diff (-got, +want): %v", diff)
   199  	}
   200  
   201  	// Send an update with new drop configs.
   202  	const (
   203  		dropReason2      = "test-dropping-category-2"
   204  		dropNumerator2   = 1
   205  		dropDenominator2 = 4
   206  	)
   207  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   208  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   209  		BalancerConfig: &LBConfig{
   210  			Cluster:             testClusterName,
   211  			EDSServiceName:      testServiceName,
   212  			LoadReportingServer: testLRSServerConfig,
   213  			DropCategories: []DropConfig{{
   214  				Category:           dropReason2,
   215  				RequestsPerMillion: million * dropNumerator2 / dropDenominator2,
   216  			}},
   217  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   218  				Name: roundrobin.Name,
   219  			},
   220  		},
   221  	}); err != nil {
   222  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   223  	}
   224  
   225  	if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error {
   226  		for i := 0; i < rpcCount; i++ {
   227  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   228  			// Even RPCs are dropped.
   229  			if i%4 == 0 {
   230  				if err == nil || !strings.Contains(err.Error(), "dropped") {
   231  					return fmt.Errorf("pick.Pick, got %v, %v, want error RPC dropped", gotSCSt, err)
   232  				}
   233  				continue
   234  			}
   235  			if err != nil || gotSCSt.SubConn != sc1 {
   236  				return fmt.Errorf("picker.Pick, got %v, %v, want SubConn=%v", gotSCSt, err, sc1)
   237  			}
   238  			if gotSCSt.Done != nil {
   239  				gotSCSt.Done(balancer.DoneInfo{})
   240  			}
   241  		}
   242  		return nil
   243  	}); err != nil {
   244  		t.Fatal(err.Error())
   245  	}
   246  
   247  	const dropCount2 = rpcCount * dropNumerator2 / dropDenominator2
   248  	wantStatsData1 := []*load.Data{{
   249  		Cluster:    testClusterName,
   250  		Service:    testServiceName,
   251  		TotalDrops: dropCount2,
   252  		Drops:      map[string]uint64{dropReason2: dropCount2},
   253  		LocalityStats: map[string]load.LocalityData{
   254  			assertString(xdsinternal.LocalityID{}.ToString): {RequestStats: load.RequestData{
   255  				Succeeded: rpcCount - dropCount2,
   256  				Issued:    rpcCount - dropCount2,
   257  			}},
   258  		},
   259  	}}
   260  
   261  	gotStatsData1 := loadStore.Stats([]string{testClusterName})
   262  	if diff := cmp.Diff(gotStatsData1, wantStatsData1, cmpOpts); diff != "" {
   263  		t.Fatalf("got unexpected reports, diff (-got, +want): %v", diff)
   264  	}
   265  }
   266  
   267  // TestDropCircuitBreaking verifies that the balancer correctly drops the picks
   268  // due to circuit breaking, and that the drops are reported.
   269  func (s) TestDropCircuitBreaking(t *testing.T) {
   270  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
   271  	xdsC := fakeclient.NewClient()
   272  
   273  	builder := balancer.Get(Name)
   274  	cc := testutils.NewBalancerClientConn(t)
   275  	b := builder.Build(cc, balancer.BuildOptions{})
   276  	defer b.Close()
   277  
   278  	var maxRequest uint32 = 50
   279  	testLRSServerConfig, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{
   280  		URI:          "trafficdirector.googleapis.com:443",
   281  		ChannelCreds: []bootstrap.ChannelCreds{{Type: "google_default"}},
   282  	})
   283  	if err != nil {
   284  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   285  	}
   286  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   287  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   288  		BalancerConfig: &LBConfig{
   289  			Cluster:               testClusterName,
   290  			EDSServiceName:        testServiceName,
   291  			LoadReportingServer:   testLRSServerConfig,
   292  			MaxConcurrentRequests: &maxRequest,
   293  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   294  				Name: roundrobin.Name,
   295  			},
   296  		},
   297  	}); err != nil {
   298  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   299  	}
   300  
   301  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   302  	defer cancel()
   303  
   304  	got, err := xdsC.WaitForReportLoad(ctx)
   305  	if err != nil {
   306  		t.Fatalf("xdsClient.ReportLoad failed with error: %v", err)
   307  	}
   308  	if got.Server != testLRSServerConfig {
   309  		t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got.Server, testLRSServerConfig)
   310  	}
   311  
   312  	sc1 := <-cc.NewSubConnCh
   313  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   314  	// This should get the connecting picker.
   315  	if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil {
   316  		t.Fatal(err.Error())
   317  	}
   318  
   319  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready})
   320  	// Test pick with one backend.
   321  	const rpcCount = 100
   322  	if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error {
   323  		dones := []func(){}
   324  		for i := 0; i < rpcCount; i++ {
   325  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   326  			if i < 50 && err != nil {
   327  				return fmt.Errorf("The first 50%% picks should be non-drops, got error %v", err)
   328  			} else if i > 50 && err == nil {
   329  				return fmt.Errorf("The second 50%% picks should be drops, got error <nil>")
   330  			}
   331  			dones = append(dones, func() {
   332  				if gotSCSt.Done != nil {
   333  					gotSCSt.Done(balancer.DoneInfo{})
   334  				}
   335  			})
   336  		}
   337  		for _, done := range dones {
   338  			done()
   339  		}
   340  
   341  		dones = []func(){}
   342  		// Pick without drops.
   343  		for i := 0; i < 50; i++ {
   344  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   345  			if err != nil {
   346  				t.Errorf("The third 50%% picks should be non-drops, got error %v", err)
   347  			}
   348  			dones = append(dones, func() {
   349  				if gotSCSt.Done != nil {
   350  					// Fail these requests to test error counts in the load
   351  					// report.
   352  					gotSCSt.Done(balancer.DoneInfo{Err: fmt.Errorf("test error")})
   353  				}
   354  			})
   355  		}
   356  		for _, done := range dones {
   357  			done()
   358  		}
   359  
   360  		return nil
   361  	}); err != nil {
   362  		t.Fatal(err.Error())
   363  	}
   364  
   365  	// Dump load data from the store and compare with expected counts.
   366  	loadStore := xdsC.LoadStore()
   367  	if loadStore == nil {
   368  		t.Fatal("loadStore is nil in xdsClient")
   369  	}
   370  
   371  	wantStatsData0 := []*load.Data{{
   372  		Cluster:    testClusterName,
   373  		Service:    testServiceName,
   374  		TotalDrops: uint64(maxRequest),
   375  		LocalityStats: map[string]load.LocalityData{
   376  			assertString(xdsinternal.LocalityID{}.ToString): {RequestStats: load.RequestData{
   377  				Succeeded: uint64(rpcCount - maxRequest),
   378  				Errored:   50,
   379  				Issued:    uint64(rpcCount - maxRequest + 50),
   380  			}},
   381  		},
   382  	}}
   383  
   384  	gotStatsData0 := loadStore.Stats([]string{testClusterName})
   385  	if diff := cmp.Diff(gotStatsData0, wantStatsData0, cmpOpts); diff != "" {
   386  		t.Fatalf("got unexpected drop reports, diff (-got, +want): %v", diff)
   387  	}
   388  }
   389  
   390  // TestPickerUpdateAfterClose covers the case where a child policy sends a
   391  // picker update after the cluster_impl policy is closed. Because picker updates
   392  // are handled in the run() goroutine, which exits before Close() returns, we
   393  // expect the above picker update to be dropped.
   394  func (s) TestPickerUpdateAfterClose(t *testing.T) {
   395  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
   396  	xdsC := fakeclient.NewClient()
   397  
   398  	builder := balancer.Get(Name)
   399  	cc := testutils.NewBalancerClientConn(t)
   400  	b := builder.Build(cc, balancer.BuildOptions{})
   401  
   402  	// Create a stub balancer which waits for the cluster_impl policy to be
   403  	// closed before sending a picker update (upon receipt of a subConn state
   404  	// change).
   405  	closeCh := make(chan struct{})
   406  	const childPolicyName = "stubBalancer-TestPickerUpdateAfterClose"
   407  	stub.Register(childPolicyName, stub.BalancerFuncs{
   408  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   409  			// Create a subConn which will be used later on to test the race
   410  			// between StateListener() and Close().
   411  			sc, err := bd.ClientConn.NewSubConn(ccs.ResolverState.Addresses, balancer.NewSubConnOptions{
   412  				StateListener: func(balancer.SubConnState) {
   413  					go func() {
   414  						// Wait for Close() to be called on the parent policy before
   415  						// sending the picker update.
   416  						<-closeCh
   417  						bd.ClientConn.UpdateState(balancer.State{
   418  							Picker: base.NewErrPicker(errors.New("dummy error picker")),
   419  						})
   420  					}()
   421  				},
   422  			})
   423  			if err != nil {
   424  				return err
   425  			}
   426  			sc.Connect()
   427  			return nil
   428  		},
   429  	})
   430  
   431  	var maxRequest uint32 = 50
   432  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   433  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   434  		BalancerConfig: &LBConfig{
   435  			Cluster:               testClusterName,
   436  			EDSServiceName:        testServiceName,
   437  			MaxConcurrentRequests: &maxRequest,
   438  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   439  				Name: childPolicyName,
   440  			},
   441  		},
   442  	}); err != nil {
   443  		b.Close()
   444  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   445  	}
   446  
   447  	// Send a subConn state change to trigger a picker update. The stub balancer
   448  	// that we use as the child policy will not send a picker update until the
   449  	// parent policy is closed.
   450  	sc1 := <-cc.NewSubConnCh
   451  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   452  	b.Close()
   453  	close(closeCh)
   454  
   455  	select {
   456  	case <-cc.NewPickerCh:
   457  		t.Fatalf("unexpected picker update after balancer is closed")
   458  	case <-time.After(defaultShortTestTimeout):
   459  	}
   460  }
   461  
   462  // TestClusterNameInAddressAttributes covers the case that cluster name is
   463  // attached to the subconn address attributes.
   464  func (s) TestClusterNameInAddressAttributes(t *testing.T) {
   465  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   466  	defer cancel()
   467  
   468  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
   469  	xdsC := fakeclient.NewClient()
   470  
   471  	builder := balancer.Get(Name)
   472  	cc := testutils.NewBalancerClientConn(t)
   473  	b := builder.Build(cc, balancer.BuildOptions{})
   474  	defer b.Close()
   475  
   476  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   477  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   478  		BalancerConfig: &LBConfig{
   479  			Cluster:        testClusterName,
   480  			EDSServiceName: testServiceName,
   481  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   482  				Name: roundrobin.Name,
   483  			},
   484  		},
   485  	}); err != nil {
   486  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   487  	}
   488  
   489  	sc1 := <-cc.NewSubConnCh
   490  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   491  	// This should get the connecting picker.
   492  	if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil {
   493  		t.Fatal(err.Error())
   494  	}
   495  
   496  	addrs1 := <-cc.NewSubConnAddrsCh
   497  	if got, want := addrs1[0].Addr, testBackendEndpoints[0].Addresses[0].Addr; got != want {
   498  		t.Fatalf("sc is created with addr %v, want %v", got, want)
   499  	}
   500  	cn, ok := xds.GetXDSHandshakeClusterName(addrs1[0].Attributes)
   501  	if !ok || cn != testClusterName {
   502  		t.Fatalf("sc is created with addr with cluster name %v, %v, want cluster name %v", cn, ok, testClusterName)
   503  	}
   504  
   505  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready})
   506  	// Test pick with one backend.
   507  	if err := cc.WaitForRoundRobinPicker(ctx, sc1); err != nil {
   508  		t.Fatal(err.Error())
   509  	}
   510  
   511  	const testClusterName2 = "test-cluster-2"
   512  	var addr2 = resolver.Address{Addr: "2.2.2.2"}
   513  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   514  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: []resolver.Endpoint{{Addresses: []resolver.Address{addr2}}}}, xdsC),
   515  		BalancerConfig: &LBConfig{
   516  			Cluster:        testClusterName2,
   517  			EDSServiceName: testServiceName,
   518  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   519  				Name: roundrobin.Name,
   520  			},
   521  		},
   522  	}); err != nil {
   523  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   524  	}
   525  
   526  	addrs2 := <-cc.NewSubConnAddrsCh
   527  	if got, want := addrs2[0].Addr, addr2.Addr; got != want {
   528  		t.Fatalf("sc is created with addr %v, want %v", got, want)
   529  	}
   530  	// New addresses should have the new cluster name.
   531  	cn2, ok := xds.GetXDSHandshakeClusterName(addrs2[0].Attributes)
   532  	if !ok || cn2 != testClusterName2 {
   533  		t.Fatalf("sc is created with addr with cluster name %v, %v, want cluster name %v", cn2, ok, testClusterName2)
   534  	}
   535  }
   536  
   537  // TestReResolution verifies that when a SubConn turns transient failure,
   538  // re-resolution is triggered.
   539  func (s) TestReResolution(t *testing.T) {
   540  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   541  	defer cancel()
   542  
   543  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
   544  	xdsC := fakeclient.NewClient()
   545  
   546  	builder := balancer.Get(Name)
   547  	cc := testutils.NewBalancerClientConn(t)
   548  	b := builder.Build(cc, balancer.BuildOptions{})
   549  	defer b.Close()
   550  
   551  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   552  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   553  		BalancerConfig: &LBConfig{
   554  			Cluster:        testClusterName,
   555  			EDSServiceName: testServiceName,
   556  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   557  				Name: roundrobin.Name,
   558  			},
   559  		},
   560  	}); err != nil {
   561  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   562  	}
   563  
   564  	sc1 := <-cc.NewSubConnCh
   565  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   566  	// This should get the connecting picker.
   567  	if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil {
   568  		t.Fatal(err.Error())
   569  	}
   570  
   571  	sc1.UpdateState(balancer.SubConnState{
   572  		ConnectivityState: connectivity.TransientFailure,
   573  		ConnectionError:   errors.New("test error"),
   574  	})
   575  	// This should get the transient failure picker.
   576  	if err := cc.WaitForErrPicker(ctx); err != nil {
   577  		t.Fatal(err.Error())
   578  	}
   579  
   580  	// The transient failure should trigger a re-resolution.
   581  	select {
   582  	case <-cc.ResolveNowCh:
   583  	case <-time.After(defaultTestTimeout):
   584  		t.Fatalf("timeout waiting for ResolveNow()")
   585  	}
   586  
   587  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready})
   588  	// Test pick with one backend.
   589  	if err := cc.WaitForRoundRobinPicker(ctx, sc1); err != nil {
   590  		t.Fatal(err.Error())
   591  	}
   592  
   593  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure})
   594  	// This should get the transient failure picker.
   595  	if err := cc.WaitForErrPicker(ctx); err != nil {
   596  		t.Fatal(err.Error())
   597  	}
   598  
   599  	// The transient failure should trigger a re-resolution.
   600  	select {
   601  	case <-cc.ResolveNowCh:
   602  	case <-time.After(defaultTestTimeout):
   603  		t.Fatalf("timeout waiting for ResolveNow()")
   604  	}
   605  }
   606  
   607  func (s) TestLoadReporting(t *testing.T) {
   608  	var testLocality = xdsinternal.LocalityID{
   609  		Region:  "test-region",
   610  		Zone:    "test-zone",
   611  		SubZone: "test-sub-zone",
   612  	}
   613  
   614  	xdsC := fakeclient.NewClient()
   615  
   616  	builder := balancer.Get(Name)
   617  	cc := testutils.NewBalancerClientConn(t)
   618  	b := builder.Build(cc, balancer.BuildOptions{})
   619  	defer b.Close()
   620  
   621  	endpoints := make([]resolver.Endpoint, len(testBackendEndpoints))
   622  	for i, e := range testBackendEndpoints {
   623  		endpoints[i] = xdsinternal.SetLocalityIDInEndpoint(e, testLocality)
   624  		for j, a := range e.Addresses {
   625  			endpoints[i].Addresses[j] = xdsinternal.SetLocalityID(a, testLocality)
   626  		}
   627  	}
   628  	testLRSServerConfig, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{
   629  		URI:          "trafficdirector.googleapis.com:443",
   630  		ChannelCreds: []bootstrap.ChannelCreds{{Type: "google_default"}},
   631  	})
   632  	if err != nil {
   633  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   634  	}
   635  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   636  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: endpoints}, xdsC),
   637  		BalancerConfig: &LBConfig{
   638  			Cluster:             testClusterName,
   639  			EDSServiceName:      testServiceName,
   640  			LoadReportingServer: testLRSServerConfig,
   641  			// Locality:                testLocality,
   642  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   643  				Name: roundrobin.Name,
   644  			},
   645  		},
   646  	}); err != nil {
   647  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   648  	}
   649  
   650  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   651  	defer cancel()
   652  
   653  	got, err := xdsC.WaitForReportLoad(ctx)
   654  	if err != nil {
   655  		t.Fatalf("xdsClient.ReportLoad failed with error: %v", err)
   656  	}
   657  	if got.Server != testLRSServerConfig {
   658  		t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got.Server, testLRSServerConfig)
   659  	}
   660  
   661  	sc1 := <-cc.NewSubConnCh
   662  	sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting})
   663  	// This should get the connecting picker.
   664  	if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil {
   665  		t.Fatal(err.Error())
   666  	}
   667  
   668  	scs := balancer.SubConnState{ConnectivityState: connectivity.Ready}
   669  	sca := internal.SetConnectedAddress.(func(*balancer.SubConnState, resolver.Address))
   670  	sca(&scs, endpoints[0].Addresses[0])
   671  	sc1.UpdateState(scs)
   672  	// Test pick with one backend.
   673  	const successCount = 5
   674  	const errorCount = 5
   675  	if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error {
   676  		for i := 0; i < successCount; i++ {
   677  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   678  			if gotSCSt.SubConn != sc1 {
   679  				return fmt.Errorf("picker.Pick, got %v, %v, want SubConn=%v", gotSCSt, err, sc1)
   680  			}
   681  			lr := &v3orcapb.OrcaLoadReport{
   682  				NamedMetrics: map[string]float64{testNamedMetricsKey1: 3.14, testNamedMetricsKey2: 2.718},
   683  			}
   684  			gotSCSt.Done(balancer.DoneInfo{ServerLoad: lr})
   685  		}
   686  		for i := 0; i < errorCount; i++ {
   687  			gotSCSt, err := p.Pick(balancer.PickInfo{})
   688  			if gotSCSt.SubConn != sc1 {
   689  				return fmt.Errorf("picker.Pick, got %v, %v, want SubConn=%v", gotSCSt, err, sc1)
   690  			}
   691  			gotSCSt.Done(balancer.DoneInfo{Err: fmt.Errorf("error")})
   692  		}
   693  		return nil
   694  	}); err != nil {
   695  		t.Fatal(err.Error())
   696  	}
   697  
   698  	// Dump load data from the store and compare with expected counts.
   699  	loadStore := xdsC.LoadStore()
   700  	if loadStore == nil {
   701  		t.Fatal("loadStore is nil in xdsClient")
   702  	}
   703  	sds := loadStore.Stats([]string{testClusterName})
   704  	if len(sds) == 0 {
   705  		t.Fatalf("loads for cluster %v not found in store", testClusterName)
   706  	}
   707  	sd := sds[0]
   708  	if sd.Cluster != testClusterName || sd.Service != testServiceName {
   709  		t.Fatalf("got unexpected load for %q, %q, want %q, %q", sd.Cluster, sd.Service, testClusterName, testServiceName)
   710  	}
   711  	testLocalityJSON, _ := testLocality.ToString()
   712  	localityData, ok := sd.LocalityStats[testLocalityJSON]
   713  	if !ok {
   714  		t.Fatalf("loads for %v not found in store", testLocality)
   715  	}
   716  	reqStats := localityData.RequestStats
   717  	if reqStats.Succeeded != successCount {
   718  		t.Errorf("got succeeded %v, want %v", reqStats.Succeeded, successCount)
   719  	}
   720  	if reqStats.Errored != errorCount {
   721  		t.Errorf("got errord %v, want %v", reqStats.Errored, errorCount)
   722  	}
   723  	if reqStats.InProgress != 0 {
   724  		t.Errorf("got inProgress %v, want %v", reqStats.InProgress, 0)
   725  	}
   726  	wantLoadStats := map[string]load.ServerLoadData{
   727  		testNamedMetricsKey1: {Count: 5, Sum: 15.7},  // aggregation of 5 * 3.14 = 15.7
   728  		testNamedMetricsKey2: {Count: 5, Sum: 13.59}, // aggregation of 5 * 2.718 = 13.59
   729  	}
   730  	if diff := cmp.Diff(wantLoadStats, localityData.LoadStats, toleranceCmpOpt); diff != "" {
   731  		t.Errorf("localityData.LoadStats returned unexpected diff (-want +got):\n%s", diff)
   732  	}
   733  	b.Close()
   734  	if err := xdsC.WaitForCancelReportLoad(ctx); err != nil {
   735  		t.Fatalf("unexpected error waiting form load report to be canceled: %v", err)
   736  	}
   737  }
   738  
   739  // TestUpdateLRSServer covers the cases
   740  // - the init config specifies "" as the LRS server
   741  // - config modifies LRS server to a different string
   742  // - config sets LRS server to nil to stop load reporting
   743  func (s) TestUpdateLRSServer(t *testing.T) {
   744  	var testLocality = xdsinternal.LocalityID{
   745  		Region:  "test-region",
   746  		Zone:    "test-zone",
   747  		SubZone: "test-sub-zone",
   748  	}
   749  
   750  	xdsC := fakeclient.NewClient()
   751  
   752  	builder := balancer.Get(Name)
   753  	cc := testutils.NewBalancerClientConn(t)
   754  	b := builder.Build(cc, balancer.BuildOptions{})
   755  	defer b.Close()
   756  
   757  	endpoints := make([]resolver.Endpoint, len(testBackendEndpoints))
   758  	for i, e := range testBackendEndpoints {
   759  		endpoints[i] = xdsinternal.SetLocalityIDInEndpoint(e, testLocality)
   760  	}
   761  	testLRSServerConfig, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{
   762  		URI:          "trafficdirector.googleapis.com:443",
   763  		ChannelCreds: []bootstrap.ChannelCreds{{Type: "google_default"}},
   764  	})
   765  	if err != nil {
   766  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   767  	}
   768  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   769  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: endpoints}, xdsC),
   770  		BalancerConfig: &LBConfig{
   771  			Cluster:             testClusterName,
   772  			EDSServiceName:      testServiceName,
   773  			LoadReportingServer: testLRSServerConfig,
   774  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   775  				Name: roundrobin.Name,
   776  			},
   777  		},
   778  	}); err != nil {
   779  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   780  	}
   781  
   782  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   783  	defer cancel()
   784  
   785  	got, err := xdsC.WaitForReportLoad(ctx)
   786  	if err != nil {
   787  		t.Fatalf("xdsClient.ReportLoad failed with error: %v", err)
   788  	}
   789  	if got.Server != testLRSServerConfig {
   790  		t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got.Server, testLRSServerConfig)
   791  	}
   792  
   793  	testLRSServerConfig2, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{
   794  		URI:          "trafficdirector-another.googleapis.com:443",
   795  		ChannelCreds: []bootstrap.ChannelCreds{{Type: "google_default"}},
   796  	})
   797  	if err != nil {
   798  		t.Fatalf("Failed to create LRS server config for testing: %v", err)
   799  	}
   800  
   801  	// Update LRS server to a different name.
   802  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   803  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: endpoints}, xdsC),
   804  		BalancerConfig: &LBConfig{
   805  			Cluster:             testClusterName,
   806  			EDSServiceName:      testServiceName,
   807  			LoadReportingServer: testLRSServerConfig2,
   808  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   809  				Name: roundrobin.Name,
   810  			},
   811  		},
   812  	}); err != nil {
   813  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   814  	}
   815  	if err := xdsC.WaitForCancelReportLoad(ctx); err != nil {
   816  		t.Fatalf("unexpected error waiting form load report to be canceled: %v", err)
   817  	}
   818  	got2, err2 := xdsC.WaitForReportLoad(ctx)
   819  	if err2 != nil {
   820  		t.Fatalf("xdsClient.ReportLoad failed with error: %v", err2)
   821  	}
   822  	if got2.Server != testLRSServerConfig2 {
   823  		t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got2.Server, testLRSServerConfig2)
   824  	}
   825  
   826  	// Update LRS server to nil, to disable LRS.
   827  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   828  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: endpoints}, xdsC),
   829  		BalancerConfig: &LBConfig{
   830  			Cluster:        testClusterName,
   831  			EDSServiceName: testServiceName,
   832  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   833  				Name: roundrobin.Name,
   834  			},
   835  		},
   836  	}); err != nil {
   837  		t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
   838  	}
   839  	if err := xdsC.WaitForCancelReportLoad(ctx); err != nil {
   840  		t.Fatalf("unexpected error waiting form load report to be canceled: %v", err)
   841  	}
   842  
   843  	shortCtx, shortCancel := context.WithTimeout(context.Background(), defaultShortTestTimeout)
   844  	defer shortCancel()
   845  	if s, err := xdsC.WaitForReportLoad(shortCtx); err != context.DeadlineExceeded {
   846  		t.Fatalf("unexpected load report to server: %q", s)
   847  	}
   848  }
   849  
   850  // Test verifies that child policies was updated on receipt of
   851  // configuration update.
   852  func (s) TestChildPolicyUpdatedOnConfigUpdate(t *testing.T) {
   853  	xdsC := fakeclient.NewClient()
   854  
   855  	builder := balancer.Get(Name)
   856  	cc := testutils.NewBalancerClientConn(t)
   857  	b := builder.Build(cc, balancer.BuildOptions{})
   858  	defer b.Close()
   859  
   860  	// Keep track of which child policy was updated
   861  	updatedChildPolicy := ""
   862  
   863  	// Create stub balancers to track config updates
   864  	const (
   865  		childPolicyName1 = "stubBalancer1"
   866  		childPolicyName2 = "stubBalancer2"
   867  	)
   868  
   869  	stub.Register(childPolicyName1, stub.BalancerFuncs{
   870  		UpdateClientConnState: func(_ *stub.BalancerData, _ balancer.ClientConnState) error {
   871  			updatedChildPolicy = childPolicyName1
   872  			return nil
   873  		},
   874  	})
   875  
   876  	stub.Register(childPolicyName2, stub.BalancerFuncs{
   877  		UpdateClientConnState: func(_ *stub.BalancerData, _ balancer.ClientConnState) error {
   878  			updatedChildPolicy = childPolicyName2
   879  			return nil
   880  		},
   881  	})
   882  
   883  	// Initial config update with childPolicyName1
   884  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   885  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   886  		BalancerConfig: &LBConfig{
   887  			Cluster: testClusterName,
   888  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   889  				Name: childPolicyName1,
   890  			},
   891  		},
   892  	}); err != nil {
   893  		t.Fatalf("Error updating the config: %v", err)
   894  	}
   895  
   896  	if updatedChildPolicy != childPolicyName1 {
   897  		t.Fatal("Child policy 1 was not updated on initial configuration update.")
   898  	}
   899  
   900  	// Second config update with childPolicyName2
   901  	if err := b.UpdateClientConnState(balancer.ClientConnState{
   902  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   903  		BalancerConfig: &LBConfig{
   904  			Cluster: testClusterName,
   905  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   906  				Name: childPolicyName2,
   907  			},
   908  		},
   909  	}); err != nil {
   910  		t.Fatalf("Error updating the config: %v", err)
   911  	}
   912  
   913  	if updatedChildPolicy != childPolicyName2 {
   914  		t.Fatal("Child policy 2 was not updated after child policy name change.")
   915  	}
   916  }
   917  
   918  // Test verifies that config update fails if child policy config
   919  // failed to parse.
   920  func (s) TestFailedToParseChildPolicyConfig(t *testing.T) {
   921  	xdsC := fakeclient.NewClient()
   922  
   923  	builder := balancer.Get(Name)
   924  	cc := testutils.NewBalancerClientConn(t)
   925  	b := builder.Build(cc, balancer.BuildOptions{})
   926  	defer b.Close()
   927  
   928  	// Create a stub balancer which fails to ParseConfig.
   929  	const parseConfigError = "failed to parse config"
   930  	const childPolicyName = "stubBalancer-FailedToParseChildPolicyConfig"
   931  	stub.Register(childPolicyName, stub.BalancerFuncs{
   932  		ParseConfig: func(_ json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   933  			return nil, errors.New(parseConfigError)
   934  		},
   935  	})
   936  
   937  	err := b.UpdateClientConnState(balancer.ClientConnState{
   938  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
   939  		BalancerConfig: &LBConfig{
   940  			Cluster: testClusterName,
   941  			ChildPolicy: &internalserviceconfig.BalancerConfig{
   942  				Name: childPolicyName,
   943  			},
   944  		},
   945  	})
   946  
   947  	if err == nil || !strings.Contains(err.Error(), parseConfigError) {
   948  		t.Fatalf("Got error: %v, want error: %s", err, parseConfigError)
   949  	}
   950  }
   951  
   952  // Test verify that the case picker is updated synchronously on receipt of
   953  // configuration update.
   954  func (s) TestPickerUpdatedSynchronouslyOnConfigUpdate(t *testing.T) {
   955  	// Override the pickerUpdateHook to be notified that picker was updated.
   956  	pickerUpdated := make(chan struct{}, 1)
   957  	origNewPickerUpdated := pickerUpdateHook
   958  	pickerUpdateHook = func() {
   959  		pickerUpdated <- struct{}{}
   960  	}
   961  	defer func() { pickerUpdateHook = origNewPickerUpdated }()
   962  
   963  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   964  	defer cancel()
   965  	// Override the clientConnUpdateHook to ensure client conn was updated.
   966  	clientConnUpdateDone := make(chan struct{}, 1)
   967  	origClientConnUpdateHook := clientConnUpdateHook
   968  	clientConnUpdateHook = func() {
   969  		// Verify that picker was updated before the completion of
   970  		// client conn update.
   971  		select {
   972  		case <-pickerUpdated:
   973  		case <-ctx.Done():
   974  			t.Fatal("Client conn update completed before picker update.")
   975  		}
   976  		clientConnUpdateDone <- struct{}{}
   977  	}
   978  	defer func() { clientConnUpdateHook = origClientConnUpdateHook }()
   979  
   980  	defer xdsclient.ClearCounterForTesting(testClusterName, testServiceName)
   981  	xdsC := fakeclient.NewClient()
   982  
   983  	builder := balancer.Get(Name)
   984  	cc := testutils.NewBalancerClientConn(t)
   985  	b := builder.Build(cc, balancer.BuildOptions{})
   986  	defer b.Close()
   987  
   988  	// Create a stub balancer which waits for the cluster_impl policy to be
   989  	// closed before sending a picker update (upon receipt of a resolver
   990  	// update).
   991  	stub.Register(t.Name(), stub.BalancerFuncs{
   992  		UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
   993  			bd.ClientConn.UpdateState(balancer.State{
   994  				Picker: base.NewErrPicker(errors.New("dummy error picker")),
   995  			})
   996  			return nil
   997  		},
   998  	})
   999  
  1000  	if err := b.UpdateClientConnState(balancer.ClientConnState{
  1001  		ResolverState: xdsclient.SetClient(resolver.State{Endpoints: testBackendEndpoints}, xdsC),
  1002  		BalancerConfig: &LBConfig{
  1003  			Cluster:        testClusterName,
  1004  			EDSServiceName: testServiceName,
  1005  			ChildPolicy: &internalserviceconfig.BalancerConfig{
  1006  				Name: t.Name(),
  1007  			},
  1008  		},
  1009  	}); err != nil {
  1010  		t.Fatalf("Unexpected error from UpdateClientConnState: %v", err)
  1011  	}
  1012  
  1013  	select {
  1014  	case <-clientConnUpdateDone:
  1015  	case <-ctx.Done():
  1016  		t.Fatal("Timed out waiting for client conn update to be completed.")
  1017  	}
  1018  }
  1019  
  1020  func assertString(f func() (string, error)) string {
  1021  	s, err := f()
  1022  	if err != nil {
  1023  		panic(err.Error())
  1024  	}
  1025  	return s
  1026  }