google.golang.org/grpc@v1.72.2/balancer/rls/balancer_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package rls
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/google/go-cmp/cmp"
    31  	"google.golang.org/grpc"
    32  	"google.golang.org/grpc/balancer"
    33  	"google.golang.org/grpc/balancer/pickfirst"
    34  	"google.golang.org/grpc/balancer/rls/internal/test/e2e"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/connectivity"
    37  	"google.golang.org/grpc/credentials"
    38  	"google.golang.org/grpc/credentials/insecure"
    39  	"google.golang.org/grpc/internal"
    40  	"google.golang.org/grpc/internal/balancer/stub"
    41  	internalserviceconfig "google.golang.org/grpc/internal/serviceconfig"
    42  	"google.golang.org/grpc/internal/testutils"
    43  	rlstest "google.golang.org/grpc/internal/testutils/rls"
    44  	"google.golang.org/grpc/metadata"
    45  	"google.golang.org/grpc/resolver"
    46  	"google.golang.org/grpc/resolver/manual"
    47  	"google.golang.org/grpc/serviceconfig"
    48  	"google.golang.org/grpc/testdata"
    49  
    50  	rlspb "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    51  	"google.golang.org/protobuf/types/known/durationpb"
    52  )
    53  
    54  // TestConfigUpdate_ControlChannel tests the scenario where a config update
    55  // changes the RLS server name. Verifies that the new control channel is created
    56  // and the old one is closed.
    57  func (s) TestConfigUpdate_ControlChannel(t *testing.T) {
    58  	// Start two RLS servers.
    59  	lis1 := testutils.NewListenerWrapper(t, nil)
    60  	rlsServer1, rlsReqCh1 := rlstest.SetupFakeRLSServer(t, lis1)
    61  	lis2 := testutils.NewListenerWrapper(t, nil)
    62  	rlsServer2, rlsReqCh2 := rlstest.SetupFakeRLSServer(t, lis2)
    63  
    64  	// Build RLS service config with the RLS server pointing to the first one.
    65  	// Set a very low value for maxAge to ensure that the entry expires soon.
    66  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer1.Address)
    67  	rlsConfig.RouteLookupConfig.MaxAge = durationpb.New(defaultTestShortTimeout)
    68  
    69  	// Start a couple of test backends, and set up the fake RLS servers to return
    70  	// these as a target in the RLS response.
    71  	backendCh1, backendAddress1 := startBackend(t)
    72  	rlsServer1.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
    73  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress1}}}
    74  	})
    75  	backendCh2, backendAddress2 := startBackend(t)
    76  	rlsServer2.SetResponseCallback(func(context.Context, *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
    77  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress2}}}
    78  	})
    79  
    80  	// Register a manual resolver and push the RLS service config through it.
    81  	r := startManualResolverWithConfig(t, rlsConfig)
    82  
    83  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
    84  	if err != nil {
    85  		t.Fatalf("Failed to create gRPC client: %v", err)
    86  	}
    87  	defer cc.Close()
    88  
    89  	// Make an RPC and ensure it gets routed to the test backend.
    90  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
    91  	defer cancel()
    92  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh1)
    93  
    94  	// Ensure a connection is established to the first RLS server.
    95  	val, err := lis1.NewConnCh.Receive(ctx)
    96  	if err != nil {
    97  		t.Fatal("Timeout expired when waiting for LB policy to create control channel")
    98  	}
    99  	conn1 := val.(*testutils.ConnWrapper)
   100  
   101  	// Make sure an RLS request is sent out.
   102  	verifyRLSRequest(t, rlsReqCh1, true)
   103  
   104  	// Change lookup_service field of the RLS config to point to the second one.
   105  	rlsConfig.RouteLookupConfig.LookupService = rlsServer2.Address
   106  
   107  	// Push the config update through the manual resolver.
   108  	scJSON, err := rlsConfig.ServiceConfigJSON()
   109  	if err != nil {
   110  		t.Fatal(err)
   111  	}
   112  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   113  	r.UpdateState(resolver.State{ServiceConfig: sc})
   114  
   115  	// Ensure a connection is established to the second RLS server.
   116  	if _, err := lis2.NewConnCh.Receive(ctx); err != nil {
   117  		t.Fatal("Timeout expired when waiting for LB policy to create control channel")
   118  	}
   119  
   120  	// Ensure the connection to the old one is closed.
   121  	if _, err := conn1.CloseCh.Receive(ctx); err != nil {
   122  		t.Fatal("Timeout expired when waiting for LB policy to close control channel")
   123  	}
   124  
   125  	// Make an RPC and expect it to get routed to the second test backend through
   126  	// the second RLS server.
   127  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh2)
   128  	verifyRLSRequest(t, rlsReqCh2, true)
   129  }
   130  
   131  // TestConfigUpdate_ControlChannelWithCreds tests the scenario where a config
   132  // update specified an RLS server name, and the parent ClientConn specifies
   133  // transport credentials. The RLS server and the test backend are configured to
   134  // accept those transport credentials. This test verifies that the parent
   135  // channel credentials are correctly propagated to the control channel.
   136  func (s) TestConfigUpdate_ControlChannelWithCreds(t *testing.T) {
   137  	serverCreds, err := credentials.NewServerTLSFromFile(testdata.Path("x509/server1_cert.pem"), testdata.Path("x509/server1_key.pem"))
   138  	if err != nil {
   139  		t.Fatalf("credentials.NewServerTLSFromFile(server1.pem, server1.key) = %v", err)
   140  	}
   141  	clientCreds, err := credentials.NewClientTLSFromFile(testdata.Path("x509/server_ca_cert.pem"), "")
   142  	if err != nil {
   143  		t.Fatalf("credentials.NewClientTLSFromFile(ca.pem) = %v", err)
   144  	}
   145  
   146  	// Start an RLS server with the wrapped listener and credentials.
   147  	lis := testutils.NewListenerWrapper(t, nil)
   148  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, lis, grpc.Creds(serverCreds))
   149  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   150  
   151  	// Build RLS service config.
   152  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   153  
   154  	// Start a test backend which uses the same credentials as the RLS server,
   155  	// and set up the fake RLS server to return this as the target in the RLS
   156  	// response.
   157  	backendCh, backendAddress := startBackend(t, grpc.Creds(serverCreds))
   158  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   159  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress}}}
   160  	})
   161  
   162  	// Register a manual resolver and push the RLS service config through it.
   163  	r := startManualResolverWithConfig(t, rlsConfig)
   164  
   165  	// Dial with credentials and expect the RLS server to receive the same. The
   166  	// server certificate used for the RLS server and the backend specifies a
   167  	// DNS SAN of "*.test.example.com". Hence we use a dial target which is a
   168  	// subdomain of the same here.
   169  	cc, err := grpc.NewClient(r.Scheme()+":///rls.test.example.com", grpc.WithResolvers(r), grpc.WithTransportCredentials(clientCreds))
   170  	if err != nil {
   171  		t.Fatalf("Failed to create gRPC client: %v", err)
   172  	}
   173  	defer cc.Close()
   174  
   175  	// Make an RPC and ensure it gets routed to the test backend.
   176  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   177  	defer cancel()
   178  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh)
   179  
   180  	// Make sure an RLS request is sent out.
   181  	verifyRLSRequest(t, rlsReqCh, true)
   182  
   183  	// Ensure a connection is established to the first RLS server.
   184  	if _, err := lis.NewConnCh.Receive(ctx); err != nil {
   185  		t.Fatal("Timeout expired when waiting for LB policy to create control channel")
   186  	}
   187  }
   188  
   189  // TestConfigUpdate_ControlChannelServiceConfig tests the scenario where RLS LB
   190  // policy's configuration specifies the service config for the control channel
   191  // via the `routeLookupChannelServiceConfig` field. This test verifies that the
   192  // provided service config is applied for the control channel.
   193  func (s) TestConfigUpdate_ControlChannelServiceConfig(t *testing.T) {
   194  	// Start an RLS server and set the throttler to never throttle requests.
   195  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   196  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   197  
   198  	// Register a balancer to be used for the control channel, and set up a
   199  	// callback to get notified when the balancer receives a clientConn updates.
   200  	ccUpdateCh := testutils.NewChannel()
   201  	bf := &e2e.BalancerFuncs{
   202  		UpdateClientConnState: func(cfg *e2e.RLSChildPolicyConfig) error {
   203  			if cfg.Backend != rlsServer.Address {
   204  				return fmt.Errorf("control channel LB policy received config with backend %q, want %q", cfg.Backend, rlsServer.Address)
   205  			}
   206  			ccUpdateCh.Replace(nil)
   207  			return nil
   208  		},
   209  	}
   210  	controlChannelPolicyName := "test-control-channel-" + t.Name()
   211  	e2e.RegisterRLSChildPolicy(controlChannelPolicyName, bf)
   212  	t.Logf("Registered child policy with name %q", controlChannelPolicyName)
   213  
   214  	// Build RLS service config and set the `routeLookupChannelServiceConfig`
   215  	// field to a service config which uses the above balancer.
   216  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   217  	rlsConfig.RouteLookupChannelServiceConfig = fmt.Sprintf(`{"loadBalancingConfig" : [{%q: {"backend": %q} }]}`, controlChannelPolicyName, rlsServer.Address)
   218  
   219  	// Start a test backend, and set up the fake RLS server to return this as a
   220  	// target in the RLS response.
   221  	backendCh, backendAddress := startBackend(t)
   222  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   223  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress}}}
   224  	})
   225  
   226  	// Register a manual resolver and push the RLS service config through it.
   227  	r := startManualResolverWithConfig(t, rlsConfig)
   228  
   229  	cc, err := grpc.NewClient(r.Scheme()+":///rls.test.example.com", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   230  	if err != nil {
   231  		t.Fatalf("Failed to create gRPC client: %v", err)
   232  	}
   233  	defer cc.Close()
   234  
   235  	// Make an RPC and ensure it gets routed to the test backend.
   236  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   237  	defer cancel()
   238  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh)
   239  
   240  	// Make sure an RLS request is sent out.
   241  	verifyRLSRequest(t, rlsReqCh, true)
   242  
   243  	// Verify that the control channel is using the LB policy we injected via the
   244  	// routeLookupChannelServiceConfig field.
   245  	if _, err := ccUpdateCh.Receive(ctx); err != nil {
   246  		t.Fatalf("timeout when waiting for control channel LB policy to receive a clientConn update")
   247  	}
   248  }
   249  
   250  // TestConfigUpdate_DefaultTarget tests the scenario where a config update
   251  // changes the default target. Verifies that RPCs get routed to the new default
   252  // target after the config has been applied.
   253  func (s) TestConfigUpdate_DefaultTarget(t *testing.T) {
   254  	// Start an RLS server and set the throttler to always throttle requests.
   255  	rlsServer, _ := rlstest.SetupFakeRLSServer(t, nil)
   256  	overrideAdaptiveThrottler(t, alwaysThrottlingThrottler())
   257  
   258  	// Build RLS service config with a default target.
   259  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   260  	backendCh1, backendAddress1 := startBackend(t)
   261  	rlsConfig.RouteLookupConfig.DefaultTarget = backendAddress1
   262  
   263  	// Register a manual resolver and push the RLS service config through it.
   264  	r := startManualResolverWithConfig(t, rlsConfig)
   265  
   266  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   267  	if err != nil {
   268  		t.Fatalf("Failed to create gRPC client: %v", err)
   269  	}
   270  	defer cc.Close()
   271  
   272  	// Make an RPC and ensure it gets routed to the default target.
   273  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   274  	defer cancel()
   275  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh1)
   276  
   277  	// Change default_target field of the RLS config.
   278  	backendCh2, backendAddress2 := startBackend(t)
   279  	rlsConfig.RouteLookupConfig.DefaultTarget = backendAddress2
   280  
   281  	// Push the config update through the manual resolver.
   282  	scJSON, err := rlsConfig.ServiceConfigJSON()
   283  	if err != nil {
   284  		t.Fatal(err)
   285  	}
   286  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   287  	r.UpdateState(resolver.State{ServiceConfig: sc})
   288  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh2)
   289  }
   290  
   291  // TestConfigUpdate_ChildPolicyConfigs verifies that config changes which affect
   292  // child policy configuration are propagated correctly.
   293  func (s) TestConfigUpdate_ChildPolicyConfigs(t *testing.T) {
   294  	// Start an RLS server and set the throttler to never throttle requests.
   295  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   296  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   297  
   298  	// Start a default backend and a test backend.
   299  	_, defBackendAddress := startBackend(t)
   300  	testBackendCh, testBackendAddress := startBackend(t)
   301  
   302  	// Set up the RLS server to respond with the test backend.
   303  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   304  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{testBackendAddress}}}
   305  	})
   306  
   307  	// Set up a test balancer callback to push configs received by child policies.
   308  	defBackendConfigsCh := make(chan *e2e.RLSChildPolicyConfig, 1)
   309  	testBackendConfigsCh := make(chan *e2e.RLSChildPolicyConfig, 1)
   310  	bf := &e2e.BalancerFuncs{
   311  		UpdateClientConnState: func(cfg *e2e.RLSChildPolicyConfig) error {
   312  			switch cfg.Backend {
   313  			case defBackendAddress:
   314  				defBackendConfigsCh <- cfg
   315  			case testBackendAddress:
   316  				testBackendConfigsCh <- cfg
   317  			default:
   318  				t.Errorf("Received child policy configs for unknown target %q", cfg.Backend)
   319  			}
   320  			return nil
   321  		},
   322  	}
   323  
   324  	// Register an LB policy to act as the child policy for RLS LB policy.
   325  	childPolicyName := "test-child-policy" + t.Name()
   326  	e2e.RegisterRLSChildPolicy(childPolicyName, bf)
   327  	t.Logf("Registered child policy with name %q", childPolicyName)
   328  
   329  	// Build RLS service config with default target.
   330  	rlsConfig := buildBasicRLSConfig(childPolicyName, rlsServer.Address)
   331  	rlsConfig.RouteLookupConfig.DefaultTarget = defBackendAddress
   332  
   333  	// Register a manual resolver and push the RLS service config through it.
   334  	r := startManualResolverWithConfig(t, rlsConfig)
   335  
   336  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   337  	if err != nil {
   338  		t.Fatalf("grpc.NewClient() failed: %v", err)
   339  	}
   340  	defer cc.Close()
   341  	cc.Connect()
   342  
   343  	// At this point, the RLS LB policy should have received its config, and
   344  	// should have created a child policy for the default target.
   345  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   346  	defer cancel()
   347  	wantCfg := &e2e.RLSChildPolicyConfig{Backend: defBackendAddress}
   348  	select {
   349  	case <-ctx.Done():
   350  		t.Fatal("Timed out when waiting for the default target child policy to receive its config")
   351  	case gotCfg := <-defBackendConfigsCh:
   352  		if !cmp.Equal(gotCfg, wantCfg) {
   353  			t.Fatalf("Default target child policy received config %+v, want %+v", gotCfg, wantCfg)
   354  		}
   355  	}
   356  
   357  	// Make an RPC and ensure it gets routed to the test backend.
   358  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, testBackendCh)
   359  
   360  	// Make sure an RLS request is sent out.
   361  	verifyRLSRequest(t, rlsReqCh, true)
   362  
   363  	// As part of handling the above RPC, the RLS LB policy should have created
   364  	// a child policy for the test target.
   365  	wantCfg = &e2e.RLSChildPolicyConfig{Backend: testBackendAddress}
   366  	select {
   367  	case <-ctx.Done():
   368  		t.Fatal("Timed out when waiting for the test target child policy to receive its config")
   369  	case gotCfg := <-testBackendConfigsCh:
   370  		if !cmp.Equal(gotCfg, wantCfg) {
   371  			t.Fatalf("Test target child policy received config %+v, want %+v", gotCfg, wantCfg)
   372  		}
   373  	}
   374  
   375  	// Push an RLS config update with a change in the child policy config.
   376  	childPolicyBuilder := balancer.Get(childPolicyName)
   377  	childPolicyParser := childPolicyBuilder.(balancer.ConfigParser)
   378  	lbCfg, err := childPolicyParser.ParseConfig([]byte(`{"Random": "random"}`))
   379  	if err != nil {
   380  		t.Fatal(err)
   381  	}
   382  	rlsConfig.ChildPolicy.Config = lbCfg
   383  	scJSON, err := rlsConfig.ServiceConfigJSON()
   384  	if err != nil {
   385  		t.Fatal(err)
   386  	}
   387  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   388  	r.UpdateState(resolver.State{ServiceConfig: sc})
   389  
   390  	// Expect the child policy for the test backend to receive the update.
   391  	wantCfg = &e2e.RLSChildPolicyConfig{
   392  		Backend: testBackendAddress,
   393  		Random:  "random",
   394  	}
   395  	select {
   396  	case <-ctx.Done():
   397  		t.Fatal("Timed out when waiting for the test target child policy to receive its config")
   398  	case gotCfg := <-testBackendConfigsCh:
   399  		if !cmp.Equal(gotCfg, wantCfg) {
   400  			t.Fatalf("Test target child policy received config %+v, want %+v", gotCfg, wantCfg)
   401  		}
   402  	}
   403  
   404  	// Expect the child policy for the default backend to receive the update.
   405  	wantCfg = &e2e.RLSChildPolicyConfig{
   406  		Backend: defBackendAddress,
   407  		Random:  "random",
   408  	}
   409  	select {
   410  	case <-ctx.Done():
   411  		t.Fatal("Timed out when waiting for the default target child policy to receive its config")
   412  	case gotCfg := <-defBackendConfigsCh:
   413  		if !cmp.Equal(gotCfg, wantCfg) {
   414  			t.Fatalf("Default target child policy received config %+v, want %+v", gotCfg, wantCfg)
   415  		}
   416  	}
   417  }
   418  
   419  // TestConfigUpdate_ChildPolicyChange verifies that a child policy change is
   420  // handled by closing the old balancer and creating a new one.
   421  func (s) TestConfigUpdate_ChildPolicyChange(t *testing.T) {
   422  	// Start an RLS server and set the throttler to never throttle requests.
   423  	rlsServer, _ := rlstest.SetupFakeRLSServer(t, nil)
   424  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   425  
   426  	// Set up balancer callbacks.
   427  	configsCh1 := make(chan *e2e.RLSChildPolicyConfig, 1)
   428  	closeCh1 := make(chan struct{}, 1)
   429  	bf := &e2e.BalancerFuncs{
   430  		UpdateClientConnState: func(cfg *e2e.RLSChildPolicyConfig) error {
   431  			configsCh1 <- cfg
   432  			return nil
   433  		},
   434  		Close: func() {
   435  			closeCh1 <- struct{}{}
   436  		},
   437  	}
   438  
   439  	// Register an LB policy to act as the child policy for RLS LB policy.
   440  	childPolicyName1 := "test-child-policy-1" + t.Name()
   441  	e2e.RegisterRLSChildPolicy(childPolicyName1, bf)
   442  	t.Logf("Registered child policy with name %q", childPolicyName1)
   443  
   444  	// Build RLS service config with a dummy default target.
   445  	const defaultBackend = "default-backend"
   446  	rlsConfig := buildBasicRLSConfig(childPolicyName1, rlsServer.Address)
   447  	rlsConfig.RouteLookupConfig.DefaultTarget = defaultBackend
   448  
   449  	// Register a manual resolver and push the RLS service config through it.
   450  	r := startManualResolverWithConfig(t, rlsConfig)
   451  
   452  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   453  	if err != nil {
   454  		t.Fatalf("grpc.NewClient() failed: %v", err)
   455  	}
   456  	defer cc.Close()
   457  	cc.Connect()
   458  
   459  	// At this point, the RLS LB policy should have received its config, and
   460  	// should have created a child policy for the default target.
   461  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   462  	defer cancel()
   463  	wantCfg := &e2e.RLSChildPolicyConfig{Backend: defaultBackend}
   464  	select {
   465  	case <-ctx.Done():
   466  		t.Fatal("Timed out when waiting for the first child policy to receive its config")
   467  	case gotCfg := <-configsCh1:
   468  		if !cmp.Equal(gotCfg, wantCfg) {
   469  			t.Fatalf("First child policy received config %+v, want %+v", gotCfg, wantCfg)
   470  		}
   471  	}
   472  
   473  	// Set up balancer callbacks for the second policy.
   474  	configsCh2 := make(chan *e2e.RLSChildPolicyConfig, 1)
   475  	bf = &e2e.BalancerFuncs{
   476  		UpdateClientConnState: func(cfg *e2e.RLSChildPolicyConfig) error {
   477  			configsCh2 <- cfg
   478  			return nil
   479  		},
   480  	}
   481  
   482  	// Register a second LB policy to act as the child policy for RLS LB policy.
   483  	childPolicyName2 := "test-child-policy-2" + t.Name()
   484  	e2e.RegisterRLSChildPolicy(childPolicyName2, bf)
   485  	t.Logf("Registered child policy with name %q", childPolicyName2)
   486  
   487  	// Push an RLS config update with a change in the child policy name.
   488  	rlsConfig.ChildPolicy = &internalserviceconfig.BalancerConfig{Name: childPolicyName2}
   489  	scJSON, err := rlsConfig.ServiceConfigJSON()
   490  	if err != nil {
   491  		t.Fatal(err)
   492  	}
   493  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   494  	r.UpdateState(resolver.State{ServiceConfig: sc})
   495  
   496  	// The above update should result in the first LB policy being shutdown and
   497  	// the second LB policy receiving a config update.
   498  	select {
   499  	case <-ctx.Done():
   500  		t.Fatal("Timed out when waiting for the first child policy to be shutdown")
   501  	case <-closeCh1:
   502  	}
   503  
   504  	select {
   505  	case <-ctx.Done():
   506  		t.Fatal("Timed out when waiting for the second child policy to receive its config")
   507  	case gotCfg := <-configsCh2:
   508  		if !cmp.Equal(gotCfg, wantCfg) {
   509  			t.Fatalf("First child policy received config %+v, want %+v", gotCfg, wantCfg)
   510  		}
   511  	}
   512  }
   513  
   514  // TestConfigUpdate_BadChildPolicyConfigs tests the scenario where a config
   515  // update is rejected by the child policy. Verifies that the child policy
   516  // wrapper goes "lame" and the error from the child policy is reported back to
   517  // the caller of the RPC.
   518  func (s) TestConfigUpdate_BadChildPolicyConfigs(t *testing.T) {
   519  	// Start an RLS server and set the throttler to never throttle requests.
   520  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   521  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   522  
   523  	// Set up the RLS server to respond with a bad target field which is expected
   524  	// to cause the child policy's ParseTarget to fail and should result in the LB
   525  	// policy creating a lame child policy wrapper.
   526  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   527  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{e2e.RLSChildPolicyBadTarget}}}
   528  	})
   529  
   530  	// Build RLS service config with a default target. This default backend is
   531  	// expected to be healthy (even though we don't attempt to route RPCs to it)
   532  	// and ensures that the overall connectivity state of the RLS LB policy is not
   533  	// TRANSIENT_FAILURE. This is required to make sure that the pick for the bad
   534  	// child policy actually gets delegated to the child policy picker.
   535  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   536  	_, addr := startBackend(t)
   537  	rlsConfig.RouteLookupConfig.DefaultTarget = addr
   538  
   539  	// Register a manual resolver and push the RLS service config through it.
   540  	r := startManualResolverWithConfig(t, rlsConfig)
   541  
   542  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   543  	if err != nil {
   544  		t.Fatalf("Failed to create gRPC client: %v", err)
   545  	}
   546  	defer cc.Close()
   547  
   548  	// Make an RPC and ensure that if fails with the expected error.
   549  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   550  	defer cancel()
   551  	makeTestRPCAndVerifyError(ctx, t, cc, codes.Unavailable, e2e.ErrParseConfigBadTarget)
   552  
   553  	// Make sure an RLS request is sent out.
   554  	verifyRLSRequest(t, rlsReqCh, true)
   555  }
   556  
   557  // TestConfigUpdate_DataCacheSizeDecrease tests the scenario where a config
   558  // update decreases the data cache size. Verifies that entries are evicted from
   559  // the cache.
   560  func (s) TestConfigUpdate_DataCacheSizeDecrease(t *testing.T) {
   561  	// Override the clientConn update hook to get notified.
   562  	clientConnUpdateDone := make(chan struct{}, 1)
   563  	origClientConnUpdateHook := clientConnUpdateHook
   564  	clientConnUpdateHook = func() { clientConnUpdateDone <- struct{}{} }
   565  	defer func() { clientConnUpdateHook = origClientConnUpdateHook }()
   566  
   567  	// Override the cache entry size func, and always return 1.
   568  	origEntrySizeFunc := computeDataCacheEntrySize
   569  	computeDataCacheEntrySize = func(cacheKey, *cacheEntry) int64 { return 1 }
   570  	defer func() { computeDataCacheEntrySize = origEntrySizeFunc }()
   571  
   572  	// Override the minEvictionDuration to ensure that when the config update
   573  	// reduces the cache size, the resize operation is not stopped because
   574  	// we find an entry whose minExpiryDuration has not elapsed.
   575  	origMinEvictDuration := minEvictDuration
   576  	minEvictDuration = time.Duration(0)
   577  	defer func() { minEvictDuration = origMinEvictDuration }()
   578  
   579  	// Start an RLS server and set the throttler to never throttle requests.
   580  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   581  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   582  
   583  	// Register an LB policy to act as the child policy for RLS LB policy.
   584  	childPolicyName := "test-child-policy" + t.Name()
   585  	e2e.RegisterRLSChildPolicy(childPolicyName, nil)
   586  	t.Logf("Registered child policy with name %q", childPolicyName)
   587  
   588  	// Build RLS service config with header matchers.
   589  	rlsConfig := buildBasicRLSConfig(childPolicyName, rlsServer.Address)
   590  
   591  	// Start a couple of test backends, and set up the fake RLS server to return
   592  	// these as targets in the RLS response, based on request keys.
   593  	backendCh1, backendAddress1 := startBackend(t)
   594  	backendCh2, backendAddress2 := startBackend(t)
   595  	rlsServer.SetResponseCallback(func(_ context.Context, req *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   596  		if req.KeyMap["k1"] == "v1" {
   597  			return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress1}}}
   598  		}
   599  		if req.KeyMap["k2"] == "v2" {
   600  			return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress2}}}
   601  		}
   602  		return &rlstest.RouteLookupResponse{Err: errors.New("no keys in request metadata")}
   603  	})
   604  
   605  	// Register a manual resolver and push the RLS service config through it.
   606  	r := startManualResolverWithConfig(t, rlsConfig)
   607  
   608  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   609  	if err != nil {
   610  		t.Fatalf("grpc.NewClient() failed: %v", err)
   611  	}
   612  	defer cc.Close()
   613  	cc.Connect()
   614  
   615  	<-clientConnUpdateDone
   616  
   617  	// Make an RPC and ensure it gets routed to the first backend.
   618  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   619  	defer cancel()
   620  	ctxOutgoing := metadata.AppendToOutgoingContext(ctx, "n1", "v1")
   621  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh1)
   622  
   623  	// Make sure an RLS request is sent out.
   624  	verifyRLSRequest(t, rlsReqCh, true)
   625  
   626  	// Make another RPC with a different set of headers. This will force the LB
   627  	// policy to send out a new RLS request, resulting in a new data cache
   628  	// entry.
   629  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n2", "v2")
   630  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh2)
   631  
   632  	// Make sure an RLS request is sent out.
   633  	verifyRLSRequest(t, rlsReqCh, true)
   634  
   635  	// We currently have two cache entries. Setting the size to 1, will cause
   636  	// the entry corresponding to backend1 to be evicted.
   637  	rlsConfig.RouteLookupConfig.CacheSizeBytes = 1
   638  
   639  	// Push the config update through the manual resolver.
   640  	scJSON, err := rlsConfig.ServiceConfigJSON()
   641  	if err != nil {
   642  		t.Fatal(err)
   643  	}
   644  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   645  	r.UpdateState(resolver.State{ServiceConfig: sc})
   646  
   647  	<-clientConnUpdateDone
   648  
   649  	// Make an RPC to match the cache entry which got evicted above, and expect
   650  	// an RLS request to be made to fetch the targets.
   651  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n1", "v1")
   652  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh1)
   653  
   654  	// Make sure an RLS request is sent out.
   655  	verifyRLSRequest(t, rlsReqCh, true)
   656  }
   657  
   658  // Test that when a data cache entry is evicted due to config change
   659  // in cache size, the picker is updated accordingly.
   660  func (s) TestPickerUpdateOnDataCacheSizeDecrease(t *testing.T) {
   661  	// Override the clientConn update hook to get notified.
   662  	clientConnUpdateDone := make(chan struct{}, 1)
   663  	origClientConnUpdateHook := clientConnUpdateHook
   664  	clientConnUpdateHook = func() { clientConnUpdateDone <- struct{}{} }
   665  	defer func() { clientConnUpdateHook = origClientConnUpdateHook }()
   666  
   667  	// Override the cache entry size func, and always return 1.
   668  	origEntrySizeFunc := computeDataCacheEntrySize
   669  	computeDataCacheEntrySize = func(cacheKey, *cacheEntry) int64 { return 1 }
   670  	defer func() { computeDataCacheEntrySize = origEntrySizeFunc }()
   671  
   672  	// Override the backoff strategy to return a large backoff which
   673  	// will make sure the date cache entry remains in backoff for the
   674  	// duration of the test.
   675  	origBackoffStrategy := defaultBackoffStrategy
   676  	defaultBackoffStrategy = &fakeBackoffStrategy{backoff: defaultTestTimeout}
   677  	defer func() { defaultBackoffStrategy = origBackoffStrategy }()
   678  
   679  	// Override the minEvictionDuration to ensure that when the config update
   680  	// reduces the cache size, the resize operation is not stopped because
   681  	// we find an entry whose minExpiryDuration has not elapsed.
   682  	origMinEvictDuration := minEvictDuration
   683  	minEvictDuration = time.Duration(0)
   684  	defer func() { minEvictDuration = origMinEvictDuration }()
   685  
   686  	// Register the top-level wrapping balancer which forwards calls to RLS.
   687  	topLevelBalancerName := t.Name() + "top-level"
   688  	var ccWrapper *testCCWrapper
   689  	stub.Register(topLevelBalancerName, stub.BalancerFuncs{
   690  		Init: func(bd *stub.BalancerData) {
   691  			ccWrapper = &testCCWrapper{ClientConn: bd.ClientConn}
   692  			bd.Data = balancer.Get(Name).Build(ccWrapper, bd.BuildOptions)
   693  		},
   694  		ParseConfig: func(sc json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
   695  			parser := balancer.Get(Name).(balancer.ConfigParser)
   696  			return parser.ParseConfig(sc)
   697  		},
   698  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   699  			bal := bd.Data.(balancer.Balancer)
   700  			return bal.UpdateClientConnState(ccs)
   701  		},
   702  		Close: func(bd *stub.BalancerData) {
   703  			bal := bd.Data.(balancer.Balancer)
   704  			bal.Close()
   705  		},
   706  	})
   707  
   708  	// Start an RLS server and set the throttler to never throttle requests.
   709  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   710  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   711  
   712  	// Register an LB policy to act as the child policy for RLS LB policy.
   713  	childPolicyName := "test-child-policy" + t.Name()
   714  	e2e.RegisterRLSChildPolicy(childPolicyName, nil)
   715  	t.Logf("Registered child policy with name %q", childPolicyName)
   716  
   717  	// Start a couple of test backends, and set up the fake RLS server to return
   718  	// these as targets in the RLS response, based on request keys.
   719  	// Start a couple of test backends, and set up the fake RLS server to return
   720  	// these as targets in the RLS response, based on request keys.
   721  	backendCh1, backendAddress1 := startBackend(t)
   722  	backendCh2, backendAddress2 := startBackend(t)
   723  	rlsServer.SetResponseCallback(func(_ context.Context, req *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   724  		if req.KeyMap["k1"] == "v1" {
   725  			return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress1}}}
   726  		}
   727  		if req.KeyMap["k2"] == "v2" {
   728  			return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress2}}}
   729  		}
   730  		return &rlstest.RouteLookupResponse{Err: errors.New("no keys in request metadata")}
   731  	})
   732  
   733  	// Register a manual resolver and push the RLS service config through it.
   734  	r := manual.NewBuilderWithScheme("rls-e2e")
   735  	headers := `
   736      [
   737          {
   738              "key": "k1",
   739              "names": [
   740                  "n1"
   741              ]
   742          },
   743          {
   744              "key": "k2",
   745              "names": [
   746                  "n2"
   747              ]
   748          }
   749      ]
   750      `
   751  
   752  	configJSON := `
   753  	{
   754  	  "loadBalancingConfig": [
   755  		{
   756  		  "%s": {
   757  			"routeLookupConfig": {
   758  				"grpcKeybuilders": [{
   759  					"names": [{"service": "grpc.testing.TestService"}],
   760  					"headers": %s
   761  				}],
   762  				"lookupService": "%s",
   763  				"cacheSizeBytes": %d
   764  			},
   765  			"childPolicy": [{"%s": {}}],
   766  			"childPolicyConfigTargetFieldName": "Backend"
   767  		  }
   768  		}
   769  	  ]
   770  	}`
   771  	scJSON := fmt.Sprintf(configJSON, topLevelBalancerName, headers, rlsServer.Address, 1000, childPolicyName)
   772  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
   773  	r.InitialState(resolver.State{ServiceConfig: sc})
   774  
   775  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   776  	if err != nil {
   777  		t.Fatalf("create grpc.NewClient() failed: %v", err)
   778  	}
   779  	defer cc.Close()
   780  	cc.Connect()
   781  
   782  	<-clientConnUpdateDone
   783  
   784  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   785  	defer cancel()
   786  	// Make an RPC call with empty metadata, which will eventually throw
   787  	// the error as no metadata will match from rlsServer response
   788  	// callback defined above. This will cause the control channel to
   789  	// throw the error and cause the item to get into backoff.
   790  	makeTestRPCAndVerifyError(ctx, t, cc, codes.Unavailable, nil)
   791  
   792  	ctxOutgoing := metadata.AppendToOutgoingContext(ctx, "n1", "v1")
   793  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh1)
   794  	verifyRLSRequest(t, rlsReqCh, true)
   795  
   796  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n2", "v2")
   797  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh2)
   798  	verifyRLSRequest(t, rlsReqCh, true)
   799  
   800  	initialStateCnt := len(ccWrapper.getStates())
   801  	// Setting the size to 1 will cause the entries to be
   802  	// evicted.
   803  	scJSON1 := fmt.Sprintf(`
   804  {
   805    "loadBalancingConfig": [
   806      {
   807        "%s": {
   808  		"routeLookupConfig": {
   809  			"grpcKeybuilders": [{
   810  				"names": [{"service": "grpc.testing.TestService"}],
   811  				"headers": %s
   812  			}],
   813  			"lookupService": "%s",
   814  			"cacheSizeBytes": 2
   815  		},
   816  		"childPolicy": [{"%s": {}}],
   817  		"childPolicyConfigTargetFieldName": "Backend"
   818        }
   819      }
   820    ]
   821  }`, topLevelBalancerName, headers, rlsServer.Address, childPolicyName)
   822  	sc1 := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON1)
   823  	r.UpdateState(resolver.State{ServiceConfig: sc1})
   824  	<-clientConnUpdateDone
   825  	finalStateCnt := len(ccWrapper.getStates())
   826  
   827  	if finalStateCnt != initialStateCnt+1 {
   828  		t.Errorf("Unexpected balancer state count: got %v, want %v", finalStateCnt, initialStateCnt)
   829  	}
   830  }
   831  
   832  // TestDataCachePurging verifies that the LB policy periodically evicts expired
   833  // entries from the data cache.
   834  func (s) TestDataCachePurging(t *testing.T) {
   835  	// Override the frequency of the data cache purger to a small one.
   836  	origDataCachePurgeTicker := dataCachePurgeTicker
   837  	ticker := time.NewTicker(defaultTestShortTimeout)
   838  	defer ticker.Stop()
   839  	dataCachePurgeTicker = func() *time.Ticker { return ticker }
   840  	defer func() { dataCachePurgeTicker = origDataCachePurgeTicker }()
   841  
   842  	// Override the data cache purge hook to get notified.
   843  	dataCachePurgeDone := make(chan struct{}, 1)
   844  	origDataCachePurgeHook := dataCachePurgeHook
   845  	dataCachePurgeHook = func() { dataCachePurgeDone <- struct{}{} }
   846  	defer func() { dataCachePurgeHook = origDataCachePurgeHook }()
   847  
   848  	// Start an RLS server and set the throttler to never throttle requests.
   849  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
   850  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   851  
   852  	// Register an LB policy to act as the child policy for RLS LB policy.
   853  	childPolicyName := "test-child-policy" + t.Name()
   854  	e2e.RegisterRLSChildPolicy(childPolicyName, nil)
   855  	t.Logf("Registered child policy with name %q", childPolicyName)
   856  
   857  	// Build RLS service config with header matchers and lookupService pointing to
   858  	// the fake RLS server created above. Set a very low value for maxAge to
   859  	// ensure that the entry expires soon.
   860  	rlsConfig := buildBasicRLSConfig(childPolicyName, rlsServer.Address)
   861  	rlsConfig.RouteLookupConfig.MaxAge = durationpb.New(time.Millisecond)
   862  
   863  	// Start a test backend, and set up the fake RLS server to return this as a
   864  	// target in the RLS response.
   865  	backendCh, backendAddress := startBackend(t)
   866  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   867  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress}}}
   868  	})
   869  
   870  	// Register a manual resolver and push the RLS service config through it.
   871  	r := startManualResolverWithConfig(t, rlsConfig)
   872  
   873  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   874  	if err != nil {
   875  		t.Fatalf("Failed to create gRPC client: %v", err)
   876  	}
   877  	defer cc.Close()
   878  
   879  	// Make an RPC and ensure it gets routed to the test backend.
   880  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   881  	defer cancel()
   882  	ctxOutgoing := metadata.AppendToOutgoingContext(ctx, "n1", "v1")
   883  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh)
   884  
   885  	// Make sure an RLS request is sent out.
   886  	verifyRLSRequest(t, rlsReqCh, true)
   887  
   888  	// Make another RPC with different headers. This will force the LB policy to
   889  	// send out a new RLS request, resulting in a new data cache entry.
   890  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n2", "v2")
   891  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh)
   892  
   893  	// Make sure an RLS request is sent out.
   894  	verifyRLSRequest(t, rlsReqCh, true)
   895  
   896  	// Wait for the data cache purging to happen before proceeding.
   897  	<-dataCachePurgeDone
   898  
   899  	// Perform the same RPCs again and verify that they result in RLS requests.
   900  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n1", "v1")
   901  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh)
   902  
   903  	// Make sure an RLS request is sent out.
   904  	verifyRLSRequest(t, rlsReqCh, true)
   905  
   906  	// Make another RPC with different headers. This will force the LB policy to
   907  	// send out a new RLS request, resulting in a new data cache entry.
   908  	ctxOutgoing = metadata.AppendToOutgoingContext(ctx, "n2", "v2")
   909  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh)
   910  
   911  	// Make sure an RLS request is sent out.
   912  	verifyRLSRequest(t, rlsReqCh, true)
   913  }
   914  
   915  // TestControlChannelConnectivityStateMonitoring tests the scenario where the
   916  // control channel goes down and comes back up again and verifies that backoff
   917  // state is reset for cache entries in this scenario.
   918  func (s) TestControlChannelConnectivityStateMonitoring(t *testing.T) {
   919  	// Create a restartable listener which can close existing connections.
   920  	l, err := testutils.LocalTCPListener()
   921  	if err != nil {
   922  		t.Fatalf("net.Listen() failed: %v", err)
   923  	}
   924  	lis := testutils.NewRestartableListener(l)
   925  
   926  	// Start an RLS server with the restartable listener and set the throttler to
   927  	// never throttle requests.
   928  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, lis)
   929  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
   930  
   931  	// Override the reset backoff hook to get notified.
   932  	resetBackoffDone := make(chan struct{}, 1)
   933  	origResetBackoffHook := resetBackoffHook
   934  	resetBackoffHook = func() { resetBackoffDone <- struct{}{} }
   935  	defer func() { resetBackoffHook = origResetBackoffHook }()
   936  
   937  	// Override the backoff strategy to return a large backoff which
   938  	// will make sure the date cache entry remains in backoff for the
   939  	// duration of the test.
   940  	origBackoffStrategy := defaultBackoffStrategy
   941  	defaultBackoffStrategy = &fakeBackoffStrategy{backoff: defaultTestTimeout}
   942  	defer func() { defaultBackoffStrategy = origBackoffStrategy }()
   943  
   944  	// Register an LB policy to act as the child policy for RLS LB policy.
   945  	childPolicyName := "test-child-policy" + t.Name()
   946  	e2e.RegisterRLSChildPolicy(childPolicyName, nil)
   947  	t.Logf("Registered child policy with name %q", childPolicyName)
   948  
   949  	// Build RLS service config with header matchers, and a very low value for
   950  	// maxAge to ensure that cache entries become invalid very soon.
   951  	rlsConfig := buildBasicRLSConfig(childPolicyName, rlsServer.Address)
   952  	rlsConfig.RouteLookupConfig.MaxAge = durationpb.New(defaultTestShortTimeout)
   953  
   954  	// Start a test backend, and set up the fake RLS server to return this as a
   955  	// target in the RLS response.
   956  	backendCh, backendAddress := startBackend(t)
   957  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
   958  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backendAddress}}}
   959  	})
   960  
   961  	// Register a manual resolver and push the RLS service config through it.
   962  	r := startManualResolverWithConfig(t, rlsConfig)
   963  
   964  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
   965  	if err != nil {
   966  		t.Fatalf("Failed to create gRPC client: %v", err)
   967  	}
   968  	defer cc.Close()
   969  
   970  	// Make an RPC and ensure it gets routed to the test backend.
   971  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   972  	defer cancel()
   973  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh)
   974  
   975  	// Make sure an RLS request is sent out.
   976  	verifyRLSRequest(t, rlsReqCh, true)
   977  
   978  	// Stop the RLS server.
   979  	lis.Stop()
   980  
   981  	// Make another RPC similar to the first one. Since the above cache entry
   982  	// would have expired by now, this should trigger another RLS request. And
   983  	// since the RLS server is down, RLS request will fail and the cache entry
   984  	// will enter backoff, and we have overridden the default backoff strategy to
   985  	// return a value which will keep this entry in backoff for the whole duration
   986  	// of the test.
   987  	makeTestRPCAndVerifyError(ctx, t, cc, codes.Unavailable, nil)
   988  
   989  	// Restart the RLS server.
   990  	lis.Restart()
   991  
   992  	// When we closed the RLS server earlier, the existing transport to the RLS
   993  	// server would have closed, and the RLS control channel would have moved to
   994  	// TRANSIENT_FAILURE with a subConn backoff before moving to IDLE. This
   995  	// backoff will last for about a second. We need to keep retrying RPCs for the
   996  	// subConn to eventually come out of backoff and attempt to reconnect.
   997  	//
   998  	// Make this RPC with a different set of headers leading to the creation of
   999  	// a new cache entry and a new RLS request. This RLS request will also fail
  1000  	// till the control channel comes moves back to READY. So, override the
  1001  	// backoff strategy to perform a small backoff on this entry.
  1002  	defaultBackoffStrategy = &fakeBackoffStrategy{backoff: defaultTestShortTimeout}
  1003  	ctxOutgoing := metadata.AppendToOutgoingContext(ctx, "n1", "v1")
  1004  	makeTestRPCAndExpectItToReachBackend(ctxOutgoing, t, cc, backendCh)
  1005  
  1006  	select {
  1007  	case <-ctx.Done():
  1008  		t.Fatalf("Timed out waiting for resetBackoffDone")
  1009  	case <-resetBackoffDone:
  1010  	}
  1011  
  1012  	// The fact that the above RPC succeeded indicates that the control channel
  1013  	// has moved back to READY. The connectivity state monitoring code should have
  1014  	// realized this and should have reset all backoff timers (which in this case
  1015  	// is the cache entry corresponding to the first RPC). Retrying that RPC now
  1016  	// should succeed with an RLS request being sent out.
  1017  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, backendCh)
  1018  	verifyRLSRequest(t, rlsReqCh, true)
  1019  }
  1020  
  1021  // testCCWrapper wraps a balancer.ClientConn and overrides UpdateState and
  1022  // stores all state updates pushed by the RLS LB policy.
  1023  type testCCWrapper struct {
  1024  	balancer.ClientConn
  1025  
  1026  	mu     sync.Mutex
  1027  	states []balancer.State
  1028  }
  1029  
  1030  func (t *testCCWrapper) UpdateState(bs balancer.State) {
  1031  	t.mu.Lock()
  1032  	t.states = append(t.states, bs)
  1033  	t.mu.Unlock()
  1034  	t.ClientConn.UpdateState(bs)
  1035  }
  1036  
  1037  func (t *testCCWrapper) getStates() []balancer.State {
  1038  	t.mu.Lock()
  1039  	defer t.mu.Unlock()
  1040  
  1041  	states := make([]balancer.State, len(t.states))
  1042  	copy(states, t.states)
  1043  	return states
  1044  }
  1045  
  1046  // TestUpdateStatePauses tests the scenario where a config update received by
  1047  // the RLS LB policy results in multiple UpdateState calls from the child
  1048  // policies. This test verifies that picker updates are paused when the config
  1049  // update is being processed by RLS LB policy and its child policies.
  1050  //
  1051  // The test uses a wrapping balancer as the top-level LB policy on the channel.
  1052  // The wrapping balancer wraps an RLS LB policy as a child policy and forwards
  1053  // all calls to it. It also records the UpdateState() calls from the RLS LB
  1054  // policy and makes it available for inspection by the test.
  1055  //
  1056  // The test uses another wrapped balancer (which wraps a pickfirst balancer) as
  1057  // the child policy of the RLS LB policy. This balancer makes multiple
  1058  // UpdateState calls when handling an update from its parent in
  1059  // UpdateClientConnState.
  1060  func (s) TestUpdateStatePauses(t *testing.T) {
  1061  	// Override the hook to get notified when UpdateClientConnState is done.
  1062  	clientConnUpdateDone := make(chan struct{}, 1)
  1063  	origClientConnUpdateHook := clientConnUpdateHook
  1064  	clientConnUpdateHook = func() { clientConnUpdateDone <- struct{}{} }
  1065  	defer func() { clientConnUpdateHook = origClientConnUpdateHook }()
  1066  
  1067  	// Register the top-level wrapping balancer which forwards calls to RLS.
  1068  	topLevelBalancerName := t.Name() + "top-level"
  1069  	var ccWrapper *testCCWrapper
  1070  	stub.Register(topLevelBalancerName, stub.BalancerFuncs{
  1071  		Init: func(bd *stub.BalancerData) {
  1072  			ccWrapper = &testCCWrapper{ClientConn: bd.ClientConn}
  1073  			bd.Data = balancer.Get(Name).Build(ccWrapper, bd.BuildOptions)
  1074  		},
  1075  		ParseConfig: func(sc json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
  1076  			parser := balancer.Get(Name).(balancer.ConfigParser)
  1077  			return parser.ParseConfig(sc)
  1078  		},
  1079  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
  1080  			bal := bd.Data.(balancer.Balancer)
  1081  			return bal.UpdateClientConnState(ccs)
  1082  		},
  1083  		Close: func(bd *stub.BalancerData) {
  1084  			bal := bd.Data.(balancer.Balancer)
  1085  			bal.Close()
  1086  		},
  1087  	})
  1088  
  1089  	// Register a child policy that wraps a pickfirst balancer and makes multiple calls
  1090  	// to UpdateState when handling a config update in UpdateClientConnState. When
  1091  	// this policy is used as a child policy of the RLS LB policy, it is expected
  1092  	// that the latter suppress these updates and push a single picker update on the
  1093  	// channel (after the config has been processed by all child policies).
  1094  	childPolicyName := t.Name() + "child"
  1095  	type childPolicyConfig struct {
  1096  		serviceconfig.LoadBalancingConfig
  1097  		Backend string // `json:"backend,omitempty"`
  1098  	}
  1099  	stub.Register(childPolicyName, stub.BalancerFuncs{
  1100  		Init: func(bd *stub.BalancerData) {
  1101  			bd.Data = balancer.Get(pickfirst.Name).Build(bd.ClientConn, bd.BuildOptions)
  1102  		},
  1103  		Close: func(bd *stub.BalancerData) {
  1104  			bd.Data.(balancer.Balancer).Close()
  1105  		},
  1106  		ParseConfig: func(sc json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
  1107  			cfg := &childPolicyConfig{}
  1108  			if err := json.Unmarshal(sc, cfg); err != nil {
  1109  				return nil, err
  1110  			}
  1111  			return cfg, nil
  1112  		},
  1113  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
  1114  			bal := bd.Data.(balancer.Balancer)
  1115  			bd.ClientConn.UpdateState(balancer.State{ConnectivityState: connectivity.Idle, Picker: &testutils.TestConstPicker{Err: balancer.ErrNoSubConnAvailable}})
  1116  			bd.ClientConn.UpdateState(balancer.State{ConnectivityState: connectivity.Connecting, Picker: &testutils.TestConstPicker{Err: balancer.ErrNoSubConnAvailable}})
  1117  
  1118  			cfg := ccs.BalancerConfig.(*childPolicyConfig)
  1119  			return bal.UpdateClientConnState(balancer.ClientConnState{
  1120  				ResolverState: resolver.State{Addresses: []resolver.Address{{Addr: cfg.Backend}}},
  1121  			})
  1122  		},
  1123  	})
  1124  
  1125  	// Start an RLS server and set the throttler to never throttle requests.
  1126  	rlsServer, rlsReqCh := rlstest.SetupFakeRLSServer(t, nil)
  1127  	overrideAdaptiveThrottler(t, neverThrottlingThrottler())
  1128  
  1129  	// Start a test backend and set the RLS server to respond with it.
  1130  	testBackendCh, testBackendAddress := startBackend(t)
  1131  	rlsServer.SetResponseCallback(func(_ context.Context, _ *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
  1132  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{testBackendAddress}}}
  1133  	})
  1134  
  1135  	// Register a manual resolver and push the RLS service config through it.
  1136  	r := manual.NewBuilderWithScheme("rls-e2e")
  1137  	scJSON := fmt.Sprintf(`
  1138  {
  1139    "loadBalancingConfig": [
  1140      {
  1141        "%s": {
  1142  		"routeLookupConfig": {
  1143  			"grpcKeybuilders": [{
  1144  				"names": [{"service": "grpc.testing.TestService"}]
  1145  			}],
  1146  			"lookupService": "%s",
  1147  			"cacheSizeBytes": 1000
  1148  		},
  1149  		"childPolicy": [{"%s": {}}],
  1150  		"childPolicyConfigTargetFieldName": "Backend"
  1151        }
  1152      }
  1153    ]
  1154  }`, topLevelBalancerName, rlsServer.Address, childPolicyName)
  1155  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
  1156  	r.InitialState(resolver.State{ServiceConfig: sc})
  1157  
  1158  	cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1159  	if err != nil {
  1160  		t.Fatalf("grpc.NewClient() failed: %v", err)
  1161  	}
  1162  	defer cc.Close()
  1163  	cc.Connect()
  1164  
  1165  	// Wait for the clientconn update to be processed by the RLS LB policy.
  1166  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1167  	defer cancel()
  1168  	select {
  1169  	case <-ctx.Done():
  1170  	case <-clientConnUpdateDone:
  1171  	}
  1172  
  1173  	// It is important to note that at this point no child policies have been
  1174  	// created because we have not attempted any RPC so far. When we attempt an
  1175  	// RPC (below), child policies will be created and their configs will be
  1176  	// pushed to them. But this config update will not happen in the context of
  1177  	// a config update on the parent.
  1178  
  1179  	// Make an RPC and ensure it gets routed to the test backend.
  1180  	makeTestRPCAndExpectItToReachBackend(ctx, t, cc, testBackendCh)
  1181  
  1182  	// Make sure an RLS request is sent out.
  1183  	verifyRLSRequest(t, rlsReqCh, true)
  1184  
  1185  	// Wait for the control channel to become READY, before reading the states
  1186  	// out of the wrapping top-level balancer.
  1187  	//
  1188  	// makeTestRPCAndExpectItToReachBackend repeatedly sends RPCs with short
  1189  	// deadlines until one succeeds. See its docstring for details.
  1190  	//
  1191  	// The following sequence of events is possible:
  1192  	// 1. When the first RPC is attempted above, a pending cache entry is
  1193  	//    created, an RLS request is sent out, and the pick is queued. The
  1194  	//    channel is in CONNECTING state.
  1195  	// 2. When the RLS response arrives, the pending cache entry is moved to the
  1196  	//    data cache, a child policy is created for the target specified in the
  1197  	//    response and a new picker is returned. The channel is still in
  1198  	//    CONNECTING, and retried pick is again queued.
  1199  	// 3. The child policy moves through the standard set of states, IDLE -->
  1200  	//    CONNECTING --> READY. And for each of these state changes, a new
  1201  	//    picker is sent on the channel. But the overall connectivity state of
  1202  	//    the channel is still CONNECTING.
  1203  	// 4. Right around the time when the child policy becomes READY, the
  1204  	//    deadline associated with the first RPC made by
  1205  	//    makeTestRPCAndExpectItToReachBackend() could expire, and it could send
  1206  	//    a new one. And because the internal state of the LB policy now
  1207  	//    contains a child policy which is READY, this RPC will succeed. But the
  1208  	//    RLS LB policy has yet to push a new picker on the channel.
  1209  	// 5. If we read the states seen by the top-level wrapping LB policy without
  1210  	//    waiting for the channel to become READY, there is a possibility that we
  1211  	//    might not see the READY state in there. And if that happens, we will
  1212  	//    see two extra states in the last check made in the test, and thereby
  1213  	//    the test would fail. Waiting for the channel to become READY here
  1214  	//    ensures that the test does not flake because of this rare sequence of
  1215  	//    events.
  1216  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
  1217  
  1218  	// Cache the state changes seen up to this point.
  1219  	states0 := ccWrapper.getStates()
  1220  
  1221  	// Push an updated service config. As mentioned earlier, the previous config
  1222  	// updates on the child policies did not happen in the context of a config
  1223  	// update on the parent. Hence, this update is required to force the
  1224  	// scenario which we are interesting in testing here, i.e child policies get
  1225  	// config updates as part of the parent policy getting its config update.
  1226  	scJSON = fmt.Sprintf(`
  1227  {
  1228    "loadBalancingConfig": [
  1229      {
  1230        "%s": {
  1231  		"routeLookupConfig": {
  1232  			"grpcKeybuilders": [{
  1233  				"names": [
  1234  					{"service": "grpc.testing.TestService"},
  1235  					{"service": "grpc.health.v1.Health"}
  1236  				]
  1237  			}],
  1238  			"lookupService": "%s",
  1239  			"cacheSizeBytes": 1000
  1240  		},
  1241  		"childPolicy": [{"%s": {}}],
  1242  		"childPolicyConfigTargetFieldName": "Backend"
  1243        }
  1244      }
  1245    ]
  1246  }`, topLevelBalancerName, rlsServer.Address, childPolicyName)
  1247  	sc = internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(scJSON)
  1248  	r.UpdateState(resolver.State{ServiceConfig: sc})
  1249  
  1250  	// Wait for the clientconn update to be processed by the RLS LB policy.
  1251  	select {
  1252  	case <-ctx.Done():
  1253  	case <-clientConnUpdateDone:
  1254  	}
  1255  
  1256  	// Even though the child policies used in this test make multiple calls to
  1257  	// UpdateState as part of handling their configs, we expect the RLS policy
  1258  	// to inhibit picker updates during this time frame, and send a single
  1259  	// picker once the config update is completely handled.
  1260  	states1 := ccWrapper.getStates()
  1261  	if len(states1) != len(states0)+1 {
  1262  		t.Fatalf("more than one state update seen. before %v, after %v", states0, states1)
  1263  	}
  1264  }