google.golang.org/grpc@v1.72.2/xds/internal/balancer/ringhash/e2e/ringhash_balancer_test.go

google.golang.org/grpc@v1.72.2/xds/internal/balancer/ringhash/e2e/ringhash_balancer_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package ringhash_test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"math"
    26  	rand "math/rand/v2"
    27  	"net"
    28  	"slices"
    29  	"strconv"
    30  	"sync"
    31  	"testing"
    32  	"time"
    33  
    34  	"github.com/google/go-cmp/cmp"
    35  	"github.com/google/go-cmp/cmp/cmpopts"
    36  	"github.com/google/uuid"
    37  	"google.golang.org/grpc"
    38  	"google.golang.org/grpc/backoff"
    39  	"google.golang.org/grpc/codes"
    40  	"google.golang.org/grpc/connectivity"
    41  	"google.golang.org/grpc/credentials/insecure"
    42  	"google.golang.org/grpc/internal"
    43  	"google.golang.org/grpc/internal/envconfig"
    44  	"google.golang.org/grpc/internal/grpctest"
    45  	"google.golang.org/grpc/internal/stubserver"
    46  	"google.golang.org/grpc/internal/testutils"
    47  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    48  	"google.golang.org/grpc/metadata"
    49  	"google.golang.org/grpc/peer"
    50  	"google.golang.org/grpc/resolver"
    51  	"google.golang.org/grpc/resolver/manual"
    52  	"google.golang.org/grpc/status"
    53  	"google.golang.org/grpc/xds/internal/balancer/ringhash"
    54  
    55  	v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    56  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    57  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    58  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    59  	v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
    60  	v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3"
    61  	v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3"
    62  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    63  	testpb "google.golang.org/grpc/interop/grpc_testing"
    64  	"google.golang.org/protobuf/types/known/wrapperspb"
    65  
    66  	_ "google.golang.org/grpc/xds"
    67  )
    68  
    69  type s struct {
    70  	grpctest.Tester
    71  }
    72  
    73  func Test(t *testing.T) {
    74  	grpctest.RunSubTests(t, s{})
    75  }
    76  
    77  const (
    78  	defaultTestTimeout      = 10 * time.Second
    79  	defaultTestShortTimeout = 10 * time.Millisecond
    80  
    81  	errorTolerance = .05 // For tests that rely on statistical significance.
    82  
    83  	virtualHostName = "test.server"
    84  )
    85  
    86  // fastConnectParams disables connection attempts backoffs and lowers delays.
    87  // This speeds up tests that rely on subchannel to move to transient failure.
    88  var fastConnectParams = grpc.ConnectParams{
    89  	Backoff: backoff.Config{
    90  		BaseDelay: 10 * time.Millisecond,
    91  	},
    92  	MinConnectTimeout: 100 * time.Millisecond,
    93  }
    94  
    95  // Tests the case where the ring contains a single subConn, and verifies that
    96  // when the server goes down, the LB policy on the client automatically
    97  // reconnects until the subChannel moves out of TRANSIENT_FAILURE.
    98  func (s) TestRingHash_ReconnectToMoveOutOfTransientFailure(t *testing.T) {
    99  	// Create a restartable listener to simulate server being down.
   100  	l, err := testutils.LocalTCPListener()
   101  	if err != nil {
   102  		t.Fatalf("testutils.LocalTCPListener() failed: %v", err)
   103  	}
   104  	lis := testutils.NewRestartableListener(l)
   105  	srv := stubserver.StartTestService(t, &stubserver.StubServer{
   106  		Listener:   lis,
   107  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil },
   108  	})
   109  	defer srv.Stop()
   110  
   111  	// Create a clientConn with a manual resolver (which is used to push the
   112  	// address of the test backend), and a default service config pointing to
   113  	// the use of the ring_hash_experimental LB policy.
   114  	const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{}}]}`
   115  	r := manual.NewBuilderWithScheme("whatever")
   116  	dopts := []grpc.DialOption{
   117  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   118  		grpc.WithResolvers(r),
   119  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
   120  		grpc.WithConnectParams(fastConnectParams),
   121  	}
   122  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   123  	if err != nil {
   124  		t.Fatalf("Failed to dial local test server: %v", err)
   125  	}
   126  	defer cc.Close()
   127  
   128  	// Push the address of the test backend through the manual resolver.
   129  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: lis.Addr().String()}}})
   130  
   131  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   132  	ctx = ringhash.SetXDSRequestHash(ctx, 0)
   133  	defer cancel()
   134  	client := testgrpc.NewTestServiceClient(cc)
   135  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   136  		t.Fatalf("rpc EmptyCall() failed: %v", err)
   137  	}
   138  
   139  	// Stopping the server listener will close the transport on the client,
   140  	// which will lead to the channel eventually moving to IDLE. The ring_hash
   141  	// LB policy is not expected to reconnect by itself at this point.
   142  	lis.Stop()
   143  
   144  	testutils.AwaitState(ctx, t, cc, connectivity.Idle)
   145  
   146  	// Make an RPC to get the ring_hash LB policy to reconnect and thereby move
   147  	// to TRANSIENT_FAILURE upon connection failure.
   148  	client.EmptyCall(ctx, &testpb.Empty{})
   149  
   150  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   151  
   152  	// An RPC at this point is expected to fail.
   153  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
   154  		t.Fatal("EmptyCall RPC succeeded when the channel is in TRANSIENT_FAILURE")
   155  	}
   156  
   157  	// Restart the server listener. The ring_hash LB policy is expected to
   158  	// attempt to reconnect on its own and come out of TRANSIENT_FAILURE, even
   159  	// without an RPC attempt.
   160  	lis.Restart()
   161  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
   162  
   163  	// An RPC at this point is expected to succeed.
   164  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   165  		t.Fatalf("rpc EmptyCall() failed: %v", err)
   166  	}
   167  }
   168  
   169  // startTestServiceBackends starts num stub servers. It returns the list of
   170  // stubservers. Servers are closed when the test is stopped.
   171  func startTestServiceBackends(t *testing.T, num int) []*stubserver.StubServer {
   172  	t.Helper()
   173  
   174  	servers := make([]*stubserver.StubServer, 0, num)
   175  	for i := 0; i < num; i++ {
   176  		server := stubserver.StartTestService(t, nil)
   177  		t.Cleanup(server.Stop)
   178  		servers = append(servers, server)
   179  	}
   180  	return servers
   181  }
   182  
   183  // backendAddrs returns a list of address strings for the given stubservers.
   184  func backendAddrs(servers []*stubserver.StubServer) []string {
   185  	addrs := make([]string, 0, len(servers))
   186  	for _, s := range servers {
   187  		addrs = append(addrs, s.Address)
   188  	}
   189  	return addrs
   190  }
   191  
   192  // backendOptions returns a slice of e2e.BackendOptions for the given server
   193  // addresses.
   194  func backendOptions(t *testing.T, serverAddrs []string) []e2e.BackendOptions {
   195  	t.Helper()
   196  	backendAddrs := [][]string{}
   197  	for _, addr := range serverAddrs {
   198  		backendAddrs = append(backendAddrs, []string{addr})
   199  	}
   200  	return backendOptionsForEndpointsWithMultipleAddrs(t, backendAddrs)
   201  }
   202  
   203  // backendOptions returns a slice of e2e.BackendOptions for the given server
   204  // addresses. Each endpoint can have multiple addresses.
   205  func backendOptionsForEndpointsWithMultipleAddrs(t *testing.T, backendAddrs [][]string) []e2e.BackendOptions {
   206  	t.Helper()
   207  
   208  	var backendOpts []e2e.BackendOptions
   209  	for _, backend := range backendAddrs {
   210  		ports := []uint32{}
   211  		for _, addr := range backend {
   212  			ports = append(ports, testutils.ParsePort(t, addr))
   213  		}
   214  		backendOpts = append(backendOpts, e2e.BackendOptions{Ports: ports})
   215  	}
   216  	return backendOpts
   217  }
   218  
   219  // channelIDHashRoute returns a RouteConfiguration with a hash policy that
   220  // hashes based on the channel ID.
   221  func channelIDHashRoute(routeName, virtualHostDomain, clusterName string) *v3routepb.RouteConfiguration {
   222  	route := e2e.DefaultRouteConfig(routeName, virtualHostDomain, clusterName)
   223  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   224  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{
   225  			FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{
   226  				Key: "io.grpc.channel_id",
   227  			},
   228  		},
   229  	}
   230  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   231  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy}
   232  	return route
   233  }
   234  
   235  // checkRPCSendOK sends num RPCs to the client. It returns a map of backend
   236  // addresses as keys and number of RPCs sent to this address as value. Abort the
   237  // test if any RPC fails.
   238  func checkRPCSendOK(ctx context.Context, t *testing.T, client testgrpc.TestServiceClient, num int) map[string]int {
   239  	t.Helper()
   240  
   241  	backendCount := make(map[string]int)
   242  	for i := 0; i < num; i++ {
   243  		var remote peer.Peer
   244  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
   245  			t.Fatalf("rpc EmptyCall() failed: %v", err)
   246  		}
   247  		backendCount[remote.Addr.String()]++
   248  	}
   249  	return backendCount
   250  }
   251  
   252  // makeUnreachableBackends returns a slice of addresses of backends that close
   253  // connections as soon as they are established. Useful to simulate servers that
   254  // are unreachable.
   255  func makeUnreachableBackends(t *testing.T, num int) []string {
   256  	t.Helper()
   257  
   258  	addrs := make([]string, 0, num)
   259  	for i := 0; i < num; i++ {
   260  		l, err := testutils.LocalTCPListener()
   261  		if err != nil {
   262  			t.Fatalf("testutils.LocalTCPListener() failed: %v", err)
   263  		}
   264  		lis := testutils.NewRestartableListener(l)
   265  		addrs = append(addrs, lis.Addr().String())
   266  
   267  		// It is enough to fail the first connection attempt to put the subchannel
   268  		// in TRANSIENT_FAILURE.
   269  		go func() { lis.Accept() }()
   270  
   271  		// We don't close these listeners here, to make sure ports are
   272  		// not reused across them, and across tests.
   273  		lis.Stop()
   274  		t.Cleanup(func() { lis.Close() })
   275  	}
   276  	return addrs
   277  }
   278  
   279  // setupManagementServerAndResolver sets up an xDS management server, creates
   280  // bootstrap configuration pointing to that server and creates an xDS resolver
   281  // using that configuration.
   282  //
   283  // Registers a cleanup function on t to stop the management server.
   284  //
   285  // Returns the management server, node ID and the xDS resolver builder.
   286  func setupManagementServerAndResolver(t *testing.T) (*e2e.ManagementServer, string, resolver.Builder) {
   287  	t.Helper()
   288  
   289  	// Start an xDS management server.
   290  	xdsServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   291  
   292  	// Create bootstrap configuration pointing to the above management server.
   293  	nodeID := uuid.New().String()
   294  	bc := e2e.DefaultBootstrapContents(t, nodeID, xdsServer.Address)
   295  
   296  	// Create an xDS resolver with the above bootstrap configuration.
   297  	if internal.NewXDSResolverWithConfigForTesting == nil {
   298  		t.Fatalf("internal.NewXDSResolverWithConfigForTesting is nil")
   299  	}
   300  	r, err := internal.NewXDSResolverWithConfigForTesting.(func([]byte) (resolver.Builder, error))(bc)
   301  	if err != nil {
   302  		t.Fatalf("Failed to create xDS resolver for testing: %v", err)
   303  	}
   304  
   305  	return xdsServer, nodeID, r
   306  }
   307  
   308  // xdsUpdateOpts returns an e2e.UpdateOptions for the given node ID with the given xDS resources.
   309  func xdsUpdateOpts(nodeID string, endpoints *v3endpointpb.ClusterLoadAssignment, cluster *v3clusterpb.Cluster, route *v3routepb.RouteConfiguration, listener *v3listenerpb.Listener) e2e.UpdateOptions {
   310  	return e2e.UpdateOptions{
   311  		NodeID:    nodeID,
   312  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   313  		Clusters:  []*v3clusterpb.Cluster{cluster},
   314  		Routes:    []*v3routepb.RouteConfiguration{route},
   315  		Listeners: []*v3listenerpb.Listener{listener},
   316  	}
   317  }
   318  
   319  // Tests that when an aggregate cluster is configured with ring hash policy, and
   320  // the first cluster is in transient failure, all RPCs are sent to the second
   321  // cluster using the ring hash policy.
   322  func (s) TestRingHash_AggregateClusterFallBackFromRingHashAtStartup(t *testing.T) {
   323  	addrs := backendAddrs(startTestServiceBackends(t, 2))
   324  
   325  	const primaryClusterName = "new_cluster_1"
   326  	const primaryServiceName = "new_eds_service_1"
   327  	const secondaryClusterName = "new_cluster_2"
   328  	const secondaryServiceName = "new_eds_service_2"
   329  	const clusterName = "aggregate_cluster"
   330  
   331  	ep1 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   332  		ClusterName: primaryServiceName,
   333  		Localities: []e2e.LocalityOptions{{
   334  			Name:     "locality0",
   335  			Weight:   1,
   336  			Backends: backendOptions(t, makeUnreachableBackends(t, 2)),
   337  		}},
   338  	})
   339  	ep2 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   340  		ClusterName: secondaryServiceName,
   341  		Localities: []e2e.LocalityOptions{{
   342  			Name:     "locality0",
   343  			Weight:   1,
   344  			Backends: backendOptions(t, addrs),
   345  		}},
   346  	})
   347  	primaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   348  		ClusterName: primaryClusterName,
   349  		ServiceName: primaryServiceName,
   350  	})
   351  	secondaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   352  		ClusterName: secondaryClusterName,
   353  		ServiceName: secondaryServiceName,
   354  	})
   355  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   356  		ClusterName: clusterName,
   357  		Type:        e2e.ClusterTypeAggregate,
   358  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is implemented, the
   359  		// policy will have to be set on the child clusters.
   360  		Policy:     e2e.LoadBalancingPolicyRingHash,
   361  		ChildNames: []string{primaryClusterName, secondaryClusterName},
   362  	})
   363  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   364  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   365  
   366  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   367  	defer cancel()
   368  
   369  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   370  	updateOpts := e2e.UpdateOptions{
   371  		NodeID:    nodeID,
   372  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{ep1, ep2},
   373  		Clusters:  []*v3clusterpb.Cluster{cluster, primaryCluster, secondaryCluster},
   374  		Routes:    []*v3routepb.RouteConfiguration{route},
   375  		Listeners: []*v3listenerpb.Listener{listener},
   376  	}
   377  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   378  		t.Fatalf("Failed to update xDS resources: %v", err)
   379  	}
   380  
   381  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   382  	if err != nil {
   383  		t.Fatalf("Failed to create client: %s", err)
   384  	}
   385  	defer conn.Close()
   386  	client := testgrpc.NewTestServiceClient(conn)
   387  
   388  	const numRPCs = 100
   389  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   390  
   391  	// Since this is using ring hash with the channel ID as the key, all RPCs
   392  	// are routed to the same backend of the secondary locality.
   393  	if len(gotPerBackend) != 1 {
   394  		t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), 1)
   395  	}
   396  
   397  	var backend string
   398  	var got int
   399  	for backend, got = range gotPerBackend {
   400  	}
   401  	if !slices.Contains(addrs, backend) {
   402  		t.Errorf("Got RPCs routed to an unexpected backend: %v, want one of %v", backend, addrs)
   403  	}
   404  	if got != numRPCs {
   405  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, 100)
   406  	}
   407  }
   408  
   409  func replaceDNSResolver(t *testing.T) *manual.Resolver {
   410  	mr := manual.NewBuilderWithScheme("dns")
   411  
   412  	dnsResolverBuilder := resolver.Get("dns")
   413  	resolver.Register(mr)
   414  
   415  	t.Cleanup(func() { resolver.Register(dnsResolverBuilder) })
   416  	return mr
   417  }
   418  
   419  // Tests that when an aggregate cluster is configured with ring hash policy, and
   420  // the first is an EDS cluster in transient failure, and the fallback is a
   421  // logical DNS cluster, all RPCs are sent to the second cluster using the ring
   422  // hash policy.
   423  func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartup(t *testing.T) {
   424  	const edsClusterName = "eds_cluster"
   425  	const logicalDNSClusterName = "logical_dns_cluster"
   426  	const clusterName = "aggregate_cluster"
   427  
   428  	backends := backendAddrs(startTestServiceBackends(t, 1))
   429  
   430  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   431  		ClusterName: edsClusterName,
   432  		Localities: []e2e.LocalityOptions{{
   433  			Name:     "locality0",
   434  			Weight:   1,
   435  			Backends: backendOptions(t, makeUnreachableBackends(t, 1)),
   436  			Priority: 0,
   437  		}},
   438  	})
   439  	edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   440  		ClusterName: edsClusterName,
   441  		ServiceName: edsClusterName,
   442  	})
   443  
   444  	logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   445  		Type:        e2e.ClusterTypeLogicalDNS,
   446  		ClusterName: logicalDNSClusterName,
   447  		// The DNS values are not used because we fake DNS later on, but they
   448  		// are required to be present for the resource to be valid.
   449  		DNSHostName: "server.example.com",
   450  		DNSPort:     443,
   451  	})
   452  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   453  		ClusterName: clusterName,
   454  		Type:        e2e.ClusterTypeAggregate,
   455  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the
   456  		// policy will have to be set on the child clusters.
   457  		Policy:     e2e.LoadBalancingPolicyRingHash,
   458  		ChildNames: []string{edsClusterName, logicalDNSClusterName},
   459  	})
   460  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   461  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   462  
   463  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   464  	defer cancel()
   465  
   466  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   467  	updateOpts := e2e.UpdateOptions{
   468  		NodeID:    nodeID,
   469  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   470  		Clusters:  []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster},
   471  		Routes:    []*v3routepb.RouteConfiguration{route},
   472  		Listeners: []*v3listenerpb.Listener{listener},
   473  	}
   474  
   475  	dnsR := replaceDNSResolver(t)
   476  	dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}})
   477  
   478  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   479  		t.Fatalf("Failed to update xDS resources: %v", err)
   480  	}
   481  
   482  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   483  	if err != nil {
   484  		t.Fatalf("Failed to create client: %s", err)
   485  	}
   486  	defer conn.Close()
   487  	client := testgrpc.NewTestServiceClient(conn)
   488  
   489  	gotPerBackend := checkRPCSendOK(ctx, t, client, 1)
   490  	var got string
   491  	for got = range gotPerBackend {
   492  	}
   493  	if want := backends[0]; got != want {
   494  		t.Errorf("Got RPCs routed to an unexpected got: %v, want %v", got, want)
   495  	}
   496  }
   497  
   498  // Tests that when an aggregate cluster is configured with ring hash policy, and
   499  // it's first child is in transient failure, and the fallback is a logical DNS,
   500  // the later recovers from transient failure when its backend becomes available.
   501  func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartupNoFailedRPCs(t *testing.T) {
   502  	const edsClusterName = "eds_cluster"
   503  	const logicalDNSClusterName = "logical_dns_cluster"
   504  	const clusterName = "aggregate_cluster"
   505  
   506  	backends := backendAddrs(startTestServiceBackends(t, 1))
   507  
   508  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   509  		ClusterName: edsClusterName,
   510  		Localities: []e2e.LocalityOptions{{
   511  			Name:     "locality0",
   512  			Weight:   1,
   513  			Backends: backendOptions(t, makeUnreachableBackends(t, 1)),
   514  			Priority: 0,
   515  		}},
   516  	})
   517  	edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   518  		ClusterName: edsClusterName,
   519  		ServiceName: edsClusterName,
   520  	})
   521  
   522  	logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   523  		Type:        e2e.ClusterTypeLogicalDNS,
   524  		ClusterName: logicalDNSClusterName,
   525  		// The DNS values are not used because we fake DNS later on, but they
   526  		// are required to be present for the resource to be valid.
   527  		DNSHostName: "server.example.com",
   528  		DNSPort:     443,
   529  	})
   530  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   531  		ClusterName: clusterName,
   532  		Type:        e2e.ClusterTypeAggregate,
   533  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the
   534  		// policy will have to be set on the child clusters.
   535  		Policy:     e2e.LoadBalancingPolicyRingHash,
   536  		ChildNames: []string{edsClusterName, logicalDNSClusterName},
   537  	})
   538  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   539  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   540  
   541  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   542  	defer cancel()
   543  
   544  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   545  	updateOpts := e2e.UpdateOptions{
   546  		NodeID:    nodeID,
   547  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   548  		Clusters:  []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster},
   549  		Routes:    []*v3routepb.RouteConfiguration{route},
   550  		Listeners: []*v3listenerpb.Listener{listener},
   551  	}
   552  
   553  	dnsR := replaceDNSResolver(t)
   554  	dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}})
   555  
   556  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   557  		t.Fatalf("Failed to update xDS resources: %v", err)
   558  	}
   559  
   560  	dialer := testutils.NewBlockingDialer()
   561  	cp := grpc.ConnectParams{
   562  		// Increase backoff time, so that subconns stay in TRANSIENT_FAILURE
   563  		// for long enough to trigger potential problems.
   564  		Backoff: backoff.Config{
   565  			BaseDelay: defaultTestTimeout,
   566  		},
   567  		MinConnectTimeout: 0,
   568  	}
   569  	dopts := []grpc.DialOption{
   570  		grpc.WithResolvers(xdsResolver),
   571  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   572  		grpc.WithContextDialer(dialer.DialContext),
   573  		grpc.WithConnectParams(cp)}
   574  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
   575  	if err != nil {
   576  		t.Fatalf("Failed to create client: %s", err)
   577  	}
   578  	defer conn.Close()
   579  	client := testgrpc.NewTestServiceClient(conn)
   580  
   581  	hold := dialer.Hold(backends[0])
   582  
   583  	errCh := make(chan error, 2)
   584  	go func() {
   585  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   586  			errCh <- fmt.Errorf("first rpc UnaryCall() failed: %v", err)
   587  			return
   588  		}
   589  		errCh <- nil
   590  	}()
   591  
   592  	testutils.AwaitState(ctx, t, conn, connectivity.Connecting)
   593  
   594  	go func() {
   595  		// Start a second RPC at this point, which should be queued as well.
   596  		// This will fail if the priority policy fails to update the picker to
   597  		// point to the LOGICAL_DNS child; if it leaves it pointing to the EDS
   598  		// priority 1, then the RPC will fail, because all subchannels are in
   599  		// transient failure.
   600  		//
   601  		// Note that sending only the first RPC does not catch this case,
   602  		// because if the priority policy fails to update the picker, then the
   603  		// pick for the first RPC will not be retried.
   604  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   605  			errCh <- fmt.Errorf("second UnaryCall() failed: %v", err)
   606  			return
   607  		}
   608  		errCh <- nil
   609  	}()
   610  
   611  	// Wait for a connection attempt to backends[0].
   612  	if !hold.Wait(ctx) {
   613  		t.Fatalf("Timeout while waiting for a connection attempt to %s", backends[0])
   614  	}
   615  	// Allow the connection attempts to complete.
   616  	hold.Resume()
   617  
   618  	// RPCs should complete successfully.
   619  	for range []int{0, 1} {
   620  		select {
   621  		case err := <-errCh:
   622  			if err != nil {
   623  				t.Errorf("Expected 2 rpc to succeed, but at least one failed: %v", err)
   624  			}
   625  		case <-ctx.Done():
   626  			t.Fatalf("Timed out waiting for RPCs to complete")
   627  		}
   628  	}
   629  }
   630  
   631  // endpointResource creates a ClusterLoadAssignment containing a single locality
   632  // with the given addresses.
   633  func endpointResource(t *testing.T, clusterName string, addrs []string) *v3endpointpb.ClusterLoadAssignment {
   634  	t.Helper()
   635  	backendAddrs := [][]string{}
   636  	for _, addr := range addrs {
   637  		backendAddrs = append(backendAddrs, []string{addr})
   638  	}
   639  	return endpointResourceForBackendsWithMultipleAddrs(t, clusterName, backendAddrs)
   640  }
   641  
   642  // endpointResourceForBackendsWithMultipleAddrs creates a ClusterLoadAssignment
   643  // containing a single locality with the given addresses.
   644  func endpointResourceForBackendsWithMultipleAddrs(t *testing.T, clusterName string, addrs [][]string) *v3endpointpb.ClusterLoadAssignment {
   645  	t.Helper()
   646  
   647  	// We must set the host name socket address in EDS, as the ring hash policy
   648  	// uses it to construct the ring.
   649  	host, _, err := net.SplitHostPort(addrs[0][0])
   650  	if err != nil {
   651  		t.Fatalf("Failed to split host and port from stubserver: %v", err)
   652  	}
   653  
   654  	return e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   655  		ClusterName: clusterName,
   656  		Host:        host,
   657  		Localities: []e2e.LocalityOptions{{
   658  			Backends: backendOptionsForEndpointsWithMultipleAddrs(t, addrs),
   659  			Weight:   1,
   660  		}},
   661  	})
   662  }
   663  
   664  // Tests that ring hash policy that hashes using channel id ensures all RPCs to
   665  // go 1 particular backend.
   666  func (s) TestRingHash_ChannelIdHashing(t *testing.T) {
   667  	backends := backendAddrs(startTestServiceBackends(t, 4))
   668  
   669  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   670  
   671  	const clusterName = "cluster"
   672  	endpoints := endpointResource(t, clusterName, backends)
   673  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   674  		ClusterName: clusterName,
   675  		ServiceName: clusterName,
   676  		Policy:      e2e.LoadBalancingPolicyRingHash,
   677  	})
   678  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   679  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   680  
   681  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   682  	defer cancel()
   683  
   684  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   685  		t.Fatalf("Failed to update xDS resources: %v", err)
   686  	}
   687  
   688  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   689  	if err != nil {
   690  		t.Fatalf("Failed to create client: %s", err)
   691  	}
   692  	defer conn.Close()
   693  	client := testgrpc.NewTestServiceClient(conn)
   694  
   695  	const numRPCs = 100
   696  	received := checkRPCSendOK(ctx, t, client, numRPCs)
   697  	if len(received) != 1 {
   698  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
   699  	}
   700  	var got int
   701  	for _, got = range received {
   702  	}
   703  	if got != numRPCs {
   704  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
   705  	}
   706  }
   707  
   708  // headerHashRoute creates a RouteConfiguration with a hash policy that uses the
   709  // provided header.
   710  func headerHashRoute(routeName, virtualHostName, clusterName, header string) *v3routepb.RouteConfiguration {
   711  	route := e2e.DefaultRouteConfig(routeName, virtualHostName, clusterName)
   712  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   713  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
   714  			Header: &v3routepb.RouteAction_HashPolicy_Header{
   715  				HeaderName: header,
   716  			},
   717  		},
   718  	}
   719  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   720  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy}
   721  	return route
   722  }
   723  
   724  // Tests that ring hash policy that hashes using a header value can send RPCs
   725  // to specific backends based on their hash.
   726  func (s) TestRingHash_HeaderHashing(t *testing.T) {
   727  	backends := backendAddrs(startTestServiceBackends(t, 4))
   728  
   729  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   730  
   731  	const clusterName = "cluster"
   732  	endpoints := endpointResource(t, clusterName, backends)
   733  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   734  		ClusterName: clusterName,
   735  		ServiceName: clusterName,
   736  		Policy:      e2e.LoadBalancingPolicyRingHash,
   737  	})
   738  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
   739  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   740  
   741  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   742  	defer cancel()
   743  
   744  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   745  		t.Fatalf("Failed to update xDS resources: %v", err)
   746  	}
   747  
   748  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   749  	if err != nil {
   750  		t.Fatalf("Failed to create client: %s", err)
   751  	}
   752  	defer conn.Close()
   753  	client := testgrpc.NewTestServiceClient(conn)
   754  
   755  	// Note each type of RPC contains a header value that will always be hashed
   756  	// to a specific backend as the header value matches the value used to
   757  	// create the entry in the ring.
   758  	for _, backend := range backends {
   759  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
   760  		numRPCs := 10
   761  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   762  		if reqPerBackend[backend] != numRPCs {
   763  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
   764  		}
   765  	}
   766  }
   767  
   768  // Tests that ring hash policy that hashes using a header value and regex
   769  // rewrite to aggregate RPCs to 1 backend.
   770  func (s) TestRingHash_HeaderHashingWithRegexRewrite(t *testing.T) {
   771  	backends := backendAddrs(startTestServiceBackends(t, 4))
   772  
   773  	clusterName := "cluster"
   774  	endpoints := endpointResource(t, clusterName, backends)
   775  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   776  		ClusterName: clusterName,
   777  		ServiceName: clusterName,
   778  		Policy:      e2e.LoadBalancingPolicyRingHash,
   779  	})
   780  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
   781  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   782  	action.Route.HashPolicy[0].GetHeader().RegexRewrite = &v3matcherpb.RegexMatchAndSubstitute{
   783  		Pattern: &v3matcherpb.RegexMatcher{
   784  			EngineType: &v3matcherpb.RegexMatcher_GoogleRe2{},
   785  			Regex:      "[0-9]+",
   786  		},
   787  		Substitution: "foo",
   788  	}
   789  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   790  
   791  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   792  	defer cancel()
   793  
   794  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   795  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   796  		t.Fatalf("Failed to update xDS resources: %v", err)
   797  	}
   798  
   799  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   800  	if err != nil {
   801  		t.Fatalf("Failed to create client: %s", err)
   802  	}
   803  	defer conn.Close()
   804  	client := testgrpc.NewTestServiceClient(conn)
   805  
   806  	// Note each type of RPC contains a header value that would always be hashed
   807  	// to a specific backend as the header value matches the value used to
   808  	// create the entry in the ring. However, the regex rewrites all numbers to
   809  	// "foo", and header values only differ by numbers, so they all end up
   810  	// hashing to the same value.
   811  	gotPerBackend := make(map[string]int)
   812  	for _, backend := range backends {
   813  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
   814  		res := checkRPCSendOK(ctx, t, client, 100)
   815  		for addr, count := range res {
   816  			gotPerBackend[addr] += count
   817  		}
   818  	}
   819  	if want := 1; len(gotPerBackend) != want {
   820  		t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), want)
   821  	}
   822  	var got int
   823  	for _, got = range gotPerBackend {
   824  	}
   825  	if want := 400; got != want {
   826  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, want)
   827  	}
   828  }
   829  
   830  // computeIdealNumberOfRPCs computes the ideal number of RPCs to send so that
   831  // we can observe an event happening with probability p, and the result will
   832  // have value p with the given error tolerance.
   833  //
   834  // See https://github.com/grpc/grpc/blob/4f6e13bdda9e8c26d6027af97db4b368ca2b3069/test/cpp/end2end/xds/xds_end2end_test_lib.h#L941
   835  // for an explanation of the formula.
   836  func computeIdealNumberOfRPCs(t *testing.T, p, errorTolerance float64) int {
   837  	if p < 0 || p > 1 {
   838  		t.Fatal("p must be in (0, 1)")
   839  	}
   840  	numRPCs := math.Ceil(p * (1 - p) * 5. * 5. / errorTolerance / errorTolerance)
   841  	return int(numRPCs + 1000.) // add 1k as a buffer to avoid flakiness.
   842  }
   843  
   844  // setRingHashLBPolicyWithHighMinRingSize sets the ring hash policy with a high
   845  // minimum ring size to ensure that the ring is large enough to distribute
   846  // requests more uniformly across endpoints when a random hash is used.
   847  func setRingHashLBPolicyWithHighMinRingSize(t *testing.T, cluster *v3clusterpb.Cluster) {
   848  	const minRingSize = 100000
   849  	oldVal := envconfig.RingHashCap
   850  	envconfig.RingHashCap = minRingSize
   851  	t.Cleanup(func() {
   852  		envconfig.RingHashCap = oldVal
   853  	})
   854  	// Increasing min ring size for random distribution.
   855  	config := testutils.MarshalAny(t, &v3ringhashpb.RingHash{
   856  		HashFunction:    v3ringhashpb.RingHash_XX_HASH,
   857  		MinimumRingSize: &wrapperspb.UInt64Value{Value: minRingSize},
   858  	})
   859  	cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{
   860  		Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{{
   861  			TypedExtensionConfig: &v3corepb.TypedExtensionConfig{
   862  				Name:        "envoy.load_balancing_policies.ring_hash",
   863  				TypedConfig: config,
   864  			},
   865  		}},
   866  	}
   867  }
   868  
   869  // Tests that ring hash policy that hashes using a random value.
   870  func (s) TestRingHash_NoHashPolicy(t *testing.T) {
   871  	backends := backendAddrs(startTestServiceBackends(t, 2))
   872  	numRPCs := computeIdealNumberOfRPCs(t, .5, errorTolerance)
   873  
   874  	const clusterName = "cluster"
   875  	endpoints := endpointResource(t, clusterName, backends)
   876  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   877  		ClusterName: clusterName,
   878  		ServiceName: clusterName,
   879  	})
   880  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
   881  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
   882  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   883  
   884  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   885  	defer cancel()
   886  
   887  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   888  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   889  		t.Fatalf("Failed to update xDS resources: %v", err)
   890  	}
   891  
   892  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   893  	if err != nil {
   894  		t.Fatalf("Failed to create client: %s", err)
   895  	}
   896  	defer conn.Close()
   897  	client := testgrpc.NewTestServiceClient(conn)
   898  
   899  	// Send a large number of RPCs and check that they are distributed randomly.
   900  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   901  	for _, backend := range backends {
   902  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
   903  		want := .5
   904  		if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   905  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance)
   906  		}
   907  	}
   908  }
   909  
   910  // Tests that we observe endpoint weights.
   911  func (s) TestRingHash_EndpointWeights(t *testing.T) {
   912  	backends := backendAddrs(startTestServiceBackends(t, 3))
   913  
   914  	const clusterName = "cluster"
   915  	backendOpts := []e2e.BackendOptions{
   916  		{Ports: []uint32{testutils.ParsePort(t, backends[0])}},
   917  		{Ports: []uint32{testutils.ParsePort(t, backends[1])}},
   918  		{Ports: []uint32{testutils.ParsePort(t, backends[2])}, Weight: 2},
   919  	}
   920  
   921  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   922  		ClusterName: clusterName,
   923  		Localities: []e2e.LocalityOptions{{
   924  			Backends: backendOpts,
   925  			Weight:   1,
   926  		}},
   927  	})
   928  	endpoints.Endpoints[0].LbEndpoints[0].LoadBalancingWeight = wrapperspb.UInt32(uint32(1))
   929  	endpoints.Endpoints[0].LbEndpoints[1].LoadBalancingWeight = wrapperspb.UInt32(uint32(1))
   930  	endpoints.Endpoints[0].LbEndpoints[2].LoadBalancingWeight = wrapperspb.UInt32(uint32(2))
   931  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   932  		ClusterName: clusterName,
   933  		ServiceName: clusterName,
   934  	})
   935  	// Increasing min ring size for random distribution.
   936  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
   937  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
   938  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   939  
   940  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   941  	defer cancel()
   942  
   943  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   944  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   945  		t.Fatalf("Failed to update xDS resources: %v", err)
   946  	}
   947  
   948  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   949  	if err != nil {
   950  		t.Fatalf("Failed to create client: %s", err)
   951  	}
   952  	defer conn.Close()
   953  	client := testgrpc.NewTestServiceClient(conn)
   954  
   955  	// Send a large number of RPCs and check that they are distributed randomly.
   956  	numRPCs := computeIdealNumberOfRPCs(t, .25, errorTolerance)
   957  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   958  
   959  	got := float64(gotPerBackend[backends[0]]) / float64(numRPCs)
   960  	want := .25
   961  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   962  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[0], got, want, errorTolerance)
   963  	}
   964  	got = float64(gotPerBackend[backends[1]]) / float64(numRPCs)
   965  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   966  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[1], got, want, errorTolerance)
   967  	}
   968  	got = float64(gotPerBackend[backends[2]]) / float64(numRPCs)
   969  	want = .50
   970  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   971  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, want, errorTolerance)
   972  	}
   973  }
   974  
   975  // Tests that ring hash policy evaluation will continue past the terminal hash
   976  // policy if no results are produced yet.
   977  func (s) TestRingHash_ContinuesPastTerminalPolicyThatDoesNotProduceResult(t *testing.T) {
   978  	backends := backendAddrs(startTestServiceBackends(t, 2))
   979  
   980  	const clusterName = "cluster"
   981  	endpoints := endpointResource(t, clusterName, backends)
   982  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   983  		ClusterName: clusterName,
   984  		ServiceName: clusterName,
   985  		Policy:      e2e.LoadBalancingPolicyRingHash,
   986  	})
   987  
   988  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
   989  
   990  	// Even though this hash policy is terminal, since it produces no result, we
   991  	// continue past it to find a policy that produces results.
   992  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   993  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
   994  			Header: &v3routepb.RouteAction_HashPolicy_Header{
   995  				HeaderName: "header_not_present",
   996  			},
   997  		},
   998  		Terminal: true,
   999  	}
  1000  	hashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1001  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1002  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1003  				HeaderName: "address_hash",
  1004  			},
  1005  		},
  1006  	}
  1007  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1008  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2}
  1009  
  1010  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1011  
  1012  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1013  	defer cancel()
  1014  
  1015  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1016  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1017  		t.Fatalf("Failed to update xDS resources: %v", err)
  1018  	}
  1019  
  1020  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1021  	if err != nil {
  1022  		t.Fatalf("Failed to create client: %s", err)
  1023  	}
  1024  	defer conn.Close()
  1025  	client := testgrpc.NewTestServiceClient(conn)
  1026  
  1027  	// - The first hash policy does not match because the header is not present.
  1028  	//   If this hash policy was applied, it would spread the load across
  1029  	//   backend 0 and 1, since a random hash would be used.
  1030  	// - In the second hash policy, each type of RPC contains a header
  1031  	//   value that always hashes to backend 0, as the header value
  1032  	//   matches the value used to create the entry in the ring.
  1033  	// We verify that the second hash policy is used by checking that all RPCs
  1034  	// are being routed to backend 0.
  1035  	wantBackend := backends[0]
  1036  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", wantBackend+"_0"))
  1037  	const numRPCs = 100
  1038  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1039  	if got := gotPerBackend[wantBackend]; got != numRPCs {
  1040  		t.Errorf("Got %v RPCs routed to backend %v, want %v", got, wantBackend, numRPCs)
  1041  	}
  1042  }
  1043  
  1044  // Tests that a random hash is used when header hashing policy specified a
  1045  // header field that the RPC did not have.
  1046  func (s) TestRingHash_HashOnHeaderThatIsNotPresent(t *testing.T) {
  1047  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1048  	wantFractionPerBackend := .5
  1049  	numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance)
  1050  
  1051  	const clusterName = "cluster"
  1052  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1053  		ClusterName: clusterName,
  1054  		Localities: []e2e.LocalityOptions{{
  1055  			Backends: backendOptions(t, backends),
  1056  			Weight:   1,
  1057  		}},
  1058  	})
  1059  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1060  		ClusterName: clusterName,
  1061  		ServiceName: clusterName,
  1062  	})
  1063  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1064  	route := headerHashRoute("new_route", virtualHostName, clusterName, "header_not_present")
  1065  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1066  
  1067  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1068  	defer cancel()
  1069  
  1070  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1071  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1072  		t.Fatalf("Failed to update xDS resources: %v", err)
  1073  	}
  1074  
  1075  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1076  	if err != nil {
  1077  		t.Fatalf("Failed to create client: %s", err)
  1078  	}
  1079  	defer conn.Close()
  1080  	client := testgrpc.NewTestServiceClient(conn)
  1081  
  1082  	// The first hash policy does not apply because the header is not present in
  1083  	// the RPCs that we are about to send. As a result, a random hash should be
  1084  	// used instead, resulting in a random request distribution.
  1085  	// We verify this by checking that the RPCs are distributed randomly.
  1086  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1087  	for _, backend := range backends {
  1088  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  1089  		if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) {
  1090  			t.Errorf("fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance)
  1091  		}
  1092  	}
  1093  }
  1094  
  1095  // Tests that a random hash is used when only unsupported hash policies are
  1096  // configured.
  1097  func (s) TestRingHash_UnsupportedHashPolicyDefaultToRandomHashing(t *testing.T) {
  1098  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1099  	wantFractionPerBackend := .5
  1100  	numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance)
  1101  
  1102  	const clusterName = "cluster"
  1103  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1104  		ClusterName: clusterName,
  1105  		Localities: []e2e.LocalityOptions{{
  1106  			Backends: backendOptions(t, backends),
  1107  			Weight:   1,
  1108  		}},
  1109  	})
  1110  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1111  		ClusterName: clusterName,
  1112  		ServiceName: clusterName,
  1113  	})
  1114  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1115  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1116  	unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{
  1117  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{
  1118  			Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"},
  1119  		},
  1120  	}
  1121  	unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1122  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{
  1123  			ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true},
  1124  		},
  1125  	}
  1126  	unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{
  1127  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{
  1128  			QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"},
  1129  		},
  1130  	}
  1131  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1132  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3}
  1133  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1134  
  1135  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1136  	defer cancel()
  1137  
  1138  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1139  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1140  		t.Fatalf("Failed to update xDS resources: %v", err)
  1141  	}
  1142  
  1143  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1144  	if err != nil {
  1145  		t.Fatalf("Failed to create client: %s", err)
  1146  	}
  1147  	defer conn.Close()
  1148  	client := testgrpc.NewTestServiceClient(conn)
  1149  
  1150  	// Since none of the hash policy are supported, a random hash should be
  1151  	// generated for every request.
  1152  	// We verify this by checking that the RPCs are distributed randomly.
  1153  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1154  	for _, backend := range backends {
  1155  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  1156  		if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) {
  1157  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance)
  1158  		}
  1159  	}
  1160  }
  1161  
  1162  // Tests that unsupported hash policy types are all ignored before a supported
  1163  // hash policy.
  1164  func (s) TestRingHash_UnsupportedHashPolicyUntilChannelIdHashing(t *testing.T) {
  1165  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1166  
  1167  	const clusterName = "cluster"
  1168  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1169  		ClusterName: clusterName,
  1170  		Localities: []e2e.LocalityOptions{{
  1171  			Backends: backendOptions(t, backends),
  1172  			Weight:   1,
  1173  		}},
  1174  	})
  1175  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1176  		ClusterName: clusterName,
  1177  		ServiceName: clusterName,
  1178  	})
  1179  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1180  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1181  	unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{
  1182  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{
  1183  			Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"},
  1184  		},
  1185  	}
  1186  	unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1187  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{
  1188  			ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true},
  1189  		},
  1190  	}
  1191  	unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{
  1192  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{
  1193  			QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"},
  1194  		},
  1195  	}
  1196  	channelIDhashPolicy := v3routepb.RouteAction_HashPolicy{
  1197  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{
  1198  			FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{
  1199  				Key: "io.grpc.channel_id",
  1200  			},
  1201  		},
  1202  	}
  1203  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1204  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3, &channelIDhashPolicy}
  1205  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1206  
  1207  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1208  	defer cancel()
  1209  
  1210  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1211  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1212  		t.Fatalf("Failed to update xDS resources: %v", err)
  1213  	}
  1214  
  1215  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1216  	if err != nil {
  1217  		t.Fatalf("Failed to create client: %s", err)
  1218  	}
  1219  	defer conn.Close()
  1220  	client := testgrpc.NewTestServiceClient(conn)
  1221  
  1222  	// Since only unsupported policies are present except for the last one
  1223  	// which is using the channel ID hashing policy, all requests should be
  1224  	// routed to the same backend.
  1225  	const numRPCs = 100
  1226  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1227  	if len(gotPerBackend) != 1 {
  1228  		t.Errorf("Got RPCs routed to %v backends, want 1", len(gotPerBackend))
  1229  	}
  1230  	var got int
  1231  	for _, got = range gotPerBackend {
  1232  	}
  1233  	if got != numRPCs {
  1234  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  1235  	}
  1236  }
  1237  
  1238  // Tests that ring hash policy that hashes using a random value can spread RPCs
  1239  // across all the backends according to locality weight.
  1240  func (s) TestRingHash_RandomHashingDistributionAccordingToLocalityAndEndpointWeight(t *testing.T) {
  1241  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1242  
  1243  	const clusterName = "cluster"
  1244  	const locality1Weight = uint32(1)
  1245  	const endpoint1Weight = uint32(1)
  1246  	const locality2Weight = uint32(2)
  1247  	const endpoint2Weight = uint32(2)
  1248  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1249  		ClusterName: clusterName,
  1250  		Localities: []e2e.LocalityOptions{
  1251  			{
  1252  				Backends: []e2e.BackendOptions{{
  1253  					Ports:  []uint32{testutils.ParsePort(t, backends[0])},
  1254  					Weight: endpoint1Weight,
  1255  				}},
  1256  				Weight: locality1Weight,
  1257  			},
  1258  			{
  1259  				Backends: []e2e.BackendOptions{{
  1260  					Ports:  []uint32{testutils.ParsePort(t, backends[1])},
  1261  					Weight: endpoint2Weight,
  1262  				}},
  1263  				Weight: locality2Weight,
  1264  			},
  1265  		},
  1266  	})
  1267  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1268  		ClusterName: clusterName,
  1269  		ServiceName: clusterName,
  1270  	})
  1271  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1272  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1273  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1274  
  1275  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1276  	defer cancel()
  1277  
  1278  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1279  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1280  		t.Fatalf("Failed to update xDS resources: %v", err)
  1281  	}
  1282  
  1283  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1284  	if err != nil {
  1285  		t.Fatalf("Failed to create client: %s", err)
  1286  	}
  1287  	defer conn.Close()
  1288  	client := testgrpc.NewTestServiceClient(conn)
  1289  
  1290  	const weight1 = endpoint1Weight * locality1Weight
  1291  	const weight2 = endpoint2Weight * locality2Weight
  1292  	const wantRPCs1 = float64(weight1) / float64(weight1+weight2)
  1293  	const wantRPCs2 = float64(weight2) / float64(weight1+weight2)
  1294  	numRPCs := computeIdealNumberOfRPCs(t, math.Min(wantRPCs1, wantRPCs2), errorTolerance)
  1295  
  1296  	// Send a large number of RPCs and check that they are distributed randomly.
  1297  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1298  	got := float64(gotPerBackend[backends[0]]) / float64(numRPCs)
  1299  	if !cmp.Equal(got, wantRPCs1, cmpopts.EquateApprox(0, errorTolerance)) {
  1300  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs1, errorTolerance)
  1301  	}
  1302  	got = float64(gotPerBackend[backends[1]]) / float64(numRPCs)
  1303  	if !cmp.Equal(got, wantRPCs2, cmpopts.EquateApprox(0, errorTolerance)) {
  1304  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs2, errorTolerance)
  1305  	}
  1306  }
  1307  
  1308  // Tests that ring hash policy that hashes using a fixed string ensures all RPCs
  1309  // to go 1 particular backend; and that subsequent hashing policies are ignored
  1310  // due to the setting of terminal.
  1311  func (s) TestRingHash_FixedHashingTerminalPolicy(t *testing.T) {
  1312  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1313  	const clusterName = "cluster"
  1314  	endpoints := endpointResource(t, clusterName, backends)
  1315  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1316  		ClusterName: clusterName,
  1317  		ServiceName: clusterName,
  1318  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1319  	})
  1320  
  1321  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1322  
  1323  	hashPolicy := v3routepb.RouteAction_HashPolicy{
  1324  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1325  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1326  				HeaderName: "fixed_string",
  1327  			},
  1328  		},
  1329  		Terminal: true,
  1330  	}
  1331  	hashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1332  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1333  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1334  				HeaderName: "random_string",
  1335  			},
  1336  		},
  1337  	}
  1338  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1339  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2}
  1340  
  1341  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1342  
  1343  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1344  	defer cancel()
  1345  
  1346  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1347  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1348  		t.Fatalf("Failed to update xDS resources: %v", err)
  1349  	}
  1350  
  1351  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1352  	if err != nil {
  1353  		t.Fatalf("Failed to create client: %s", err)
  1354  	}
  1355  	defer conn.Close()
  1356  	client := testgrpc.NewTestServiceClient(conn)
  1357  
  1358  	// Check that despite the matching random string header, since the fixed
  1359  	// string hash policy is terminal, only the fixed string hash policy applies
  1360  	// and requests all get routed to the same host.
  1361  	gotPerBackend := make(map[string]int)
  1362  	const numRPCs = 100
  1363  	for i := 0; i < numRPCs; i++ {
  1364  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  1365  			"fixed_string", backends[0]+"_0",
  1366  			"random_string", fmt.Sprintf("%d", rand.Int())),
  1367  		)
  1368  		var remote peer.Peer
  1369  		_, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote))
  1370  		if err != nil {
  1371  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  1372  		}
  1373  		gotPerBackend[remote.Addr.String()]++
  1374  	}
  1375  
  1376  	if len(gotPerBackend) != 1 {
  1377  		t.Error("Got RPCs routed to multiple backends, want a single backend")
  1378  	}
  1379  	if got := gotPerBackend[backends[0]]; got != numRPCs {
  1380  		t.Errorf("Got %v RPCs routed to %v, want %v", got, backends[0], numRPCs)
  1381  	}
  1382  }
  1383  
  1384  // TestRingHash_IdleToReady tests that the channel will go from idle to ready
  1385  // via connecting; (though it is not possible to catch the connecting state
  1386  // before moving to ready via the public API).
  1387  // TODO: we should be able to catch all state transitions by using the internal.SubscribeToConnectivityStateChanges API.
  1388  func (s) TestRingHash_IdleToReady(t *testing.T) {
  1389  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1390  
  1391  	const clusterName = "cluster"
  1392  	endpoints := endpointResource(t, clusterName, backends)
  1393  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1394  		ClusterName: clusterName,
  1395  		ServiceName: clusterName,
  1396  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1397  	})
  1398  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
  1399  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1400  
  1401  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1402  	defer cancel()
  1403  
  1404  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1405  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1406  		t.Fatalf("Failed to update xDS resources: %v", err)
  1407  	}
  1408  
  1409  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1410  	if err != nil {
  1411  		t.Fatalf("Failed to create client: %s", err)
  1412  	}
  1413  	defer conn.Close()
  1414  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1415  
  1416  	client := testgrpc.NewTestServiceClient(conn)
  1417  	checkRPCSendOK(ctx, t, client, 1)
  1418  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1419  }
  1420  
  1421  // Test that the channel will transition to READY once it starts
  1422  // connecting even if there are no RPCs being sent to the picker.
  1423  func (s) TestRingHash_ContinuesConnectingWithoutPicks(t *testing.T) {
  1424  	backend := stubserver.StartTestService(t, &stubserver.StubServer{
  1425  		// We expect the server EmptyCall to not be call here because the
  1426  		// aggregated channel state is never READY when the call is pending.
  1427  		EmptyCallF: func(ctx context.Context, _ *testpb.Empty) (*testpb.Empty, error) {
  1428  			t.Errorf("EmptyCall() should not have been called")
  1429  			return &testpb.Empty{}, nil
  1430  		},
  1431  	})
  1432  	defer backend.Stop()
  1433  
  1434  	unReachableServerAddr := makeUnreachableBackends(t, 1)[0]
  1435  
  1436  	const clusterName = "cluster"
  1437  	endpoints := endpointResource(t, clusterName, []string{backend.Address, unReachableServerAddr})
  1438  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1439  		ClusterName: clusterName,
  1440  		ServiceName: clusterName,
  1441  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1442  	})
  1443  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1444  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1445  
  1446  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1447  	defer cancel()
  1448  
  1449  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1450  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1451  		t.Fatalf("Failed to update xDS resources: %v", err)
  1452  	}
  1453  
  1454  	dialer := testutils.NewBlockingDialer()
  1455  	dopts := []grpc.DialOption{
  1456  		grpc.WithResolvers(xdsResolver),
  1457  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1458  		grpc.WithContextDialer(dialer.DialContext),
  1459  	}
  1460  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1461  	if err != nil {
  1462  		t.Fatalf("Failed to create client: %s", err)
  1463  	}
  1464  	defer conn.Close()
  1465  	client := testgrpc.NewTestServiceClient(conn)
  1466  
  1467  	hold := dialer.Hold(backend.Address)
  1468  
  1469  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  1470  	go func() {
  1471  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", unReachableServerAddr+"_0"))
  1472  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  1473  		if status.Code(err) != codes.Canceled {
  1474  			t.Errorf("Expected RPC to be canceled, got error: %v", err)
  1475  		}
  1476  	}()
  1477  
  1478  	// Wait for the connection attempt to the real backend.
  1479  	if !hold.Wait(ctx) {
  1480  		t.Fatalf("Timeout waiting for connection attempt to backend %v.", backend.Address)
  1481  	}
  1482  	// Now cancel the RPC while we are still connecting.
  1483  	rpcCancel()
  1484  
  1485  	// This allows the connection attempts to continue. The RPC was cancelled
  1486  	// before the backend was connected, but the backend is up. The conn
  1487  	// becomes Ready due to the connection attempt to the existing backend
  1488  	// succeeding, despite no new RPC being sent.
  1489  	hold.Resume()
  1490  
  1491  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1492  }
  1493  
  1494  // Tests that when the first pick is down leading to a transient failure, we
  1495  // will move on to the next ring hash entry.
  1496  func (s) TestRingHash_TransientFailureCheckNextOne(t *testing.T) {
  1497  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1498  	unReachableBackends := makeUnreachableBackends(t, 1)
  1499  
  1500  	const clusterName = "cluster"
  1501  	endpoints := endpointResource(t, clusterName, append(unReachableBackends, backends...))
  1502  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1503  		ClusterName: clusterName,
  1504  		ServiceName: clusterName,
  1505  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1506  	})
  1507  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1508  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1509  
  1510  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1511  	defer cancel()
  1512  
  1513  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1514  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1515  		t.Fatalf("Failed to update xDS resources: %v", err)
  1516  	}
  1517  
  1518  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1519  	if err != nil {
  1520  		t.Fatalf("Failed to create client: %s", err)
  1521  	}
  1522  	defer conn.Close()
  1523  	client := testgrpc.NewTestServiceClient(conn)
  1524  
  1525  	// Note each type of RPC contains a header value that will always be hashed
  1526  	// the value that was used to place the non-existent endpoint on the ring,
  1527  	// but it still gets routed to the backend that is up.
  1528  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", unReachableBackends[0]+"_0"))
  1529  	reqPerBackend := checkRPCSendOK(ctx, t, client, 1)
  1530  	var got string
  1531  	for got = range reqPerBackend {
  1532  	}
  1533  	if want := backends[0]; got != want {
  1534  		t.Errorf("Got RPC routed to addr %v, want %v", got, want)
  1535  	}
  1536  }
  1537  
  1538  // Tests for a bug seen in the wild in c-core, where ring_hash started with no
  1539  // endpoints and reported TRANSIENT_FAILURE, then got an update with endpoints
  1540  // and reported IDLE, but the picker update was squelched, so it failed to ever
  1541  // get reconnected.
  1542  func (s) TestRingHash_ReattemptWhenGoingFromTransientFailureToIdle(t *testing.T) {
  1543  	const clusterName = "cluster"
  1544  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1545  		ClusterName: clusterName,
  1546  		Localities:  []e2e.LocalityOptions{{}}, // note the empty locality (no endpoint).
  1547  	})
  1548  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1549  		ClusterName: clusterName,
  1550  		ServiceName: clusterName,
  1551  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1552  	})
  1553  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
  1554  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1555  
  1556  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1557  	defer cancel()
  1558  
  1559  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1560  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1561  		t.Fatalf("Failed to update xDS resources: %v", err)
  1562  	}
  1563  
  1564  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1565  	if err != nil {
  1566  		t.Fatalf("Failed to create client: %s", err)
  1567  	}
  1568  	defer conn.Close()
  1569  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1570  
  1571  	// There are no endpoints in EDS. RPCs should fail and the channel should
  1572  	// transition to transient failure.
  1573  	client := testgrpc.NewTestServiceClient(conn)
  1574  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1575  		t.Errorf("rpc EmptyCall() succeeded, want error")
  1576  	}
  1577  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1578  
  1579  	t.Log("Updating EDS with a new backend endpoint.")
  1580  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1581  	endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1582  		ClusterName: clusterName,
  1583  		Localities: []e2e.LocalityOptions{{
  1584  			Backends: backendOptions(t, backends),
  1585  			Weight:   1,
  1586  		}},
  1587  	})
  1588  	if err = xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1589  		t.Fatalf("Failed to update xDS resources: %v", err)
  1590  	}
  1591  
  1592  	// A WaitForReady RPC should succeed, and the channel should report READY.
  1593  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
  1594  		t.Errorf("rpc EmptyCall() failed: %v", err)
  1595  	}
  1596  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1597  }
  1598  
  1599  // Tests that when all backends are down and then up, we may pick a TF backend
  1600  // and we will then jump to ready backend.
  1601  func (s) TestRingHash_TransientFailureSkipToAvailableReady(t *testing.T) {
  1602  	emptyCallF := func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
  1603  		return &testpb.Empty{}, nil
  1604  	}
  1605  	lis, err := testutils.LocalTCPListener()
  1606  	if err != nil {
  1607  		t.Fatalf("Failed to create listener: %v", err)
  1608  	}
  1609  	restartableListener1 := testutils.NewRestartableListener(lis)
  1610  	restartableServer1 := stubserver.StartTestService(t, &stubserver.StubServer{
  1611  		Listener:   restartableListener1,
  1612  		EmptyCallF: emptyCallF,
  1613  	})
  1614  	defer restartableServer1.Stop()
  1615  
  1616  	lis, err = testutils.LocalTCPListener()
  1617  	if err != nil {
  1618  		t.Fatalf("Failed to create listener: %v", err)
  1619  	}
  1620  	restartableListener2 := testutils.NewRestartableListener(lis)
  1621  	restartableServer2 := stubserver.StartTestService(t, &stubserver.StubServer{
  1622  		Listener:   restartableListener2,
  1623  		EmptyCallF: emptyCallF,
  1624  	})
  1625  	defer restartableServer2.Stop()
  1626  
  1627  	unReachableBackends := makeUnreachableBackends(t, 2)
  1628  
  1629  	const clusterName = "cluster"
  1630  	backends := []string{restartableServer1.Address, restartableServer2.Address}
  1631  	backends = append(backends, unReachableBackends...)
  1632  	endpoints := endpointResource(t, clusterName, backends)
  1633  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1634  		ClusterName: clusterName,
  1635  		ServiceName: clusterName,
  1636  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1637  	})
  1638  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1639  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1640  
  1641  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1642  	defer cancel()
  1643  
  1644  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1645  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1646  		t.Fatalf("Failed to update xDS resources: %v", err)
  1647  	}
  1648  	opts := []grpc.DialOption{
  1649  		grpc.WithConnectParams(grpc.ConnectParams{
  1650  			// Disable backoff to speed up the test.
  1651  			MinConnectTimeout: 100 * time.Millisecond,
  1652  		}),
  1653  		grpc.WithResolvers(xdsResolver),
  1654  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1655  	}
  1656  	conn, err := grpc.NewClient("xds:///test.server", opts...)
  1657  	if err != nil {
  1658  		t.Fatalf("Failed to create client: %s", err)
  1659  	}
  1660  	defer conn.Close()
  1661  	client := testgrpc.NewTestServiceClient(conn)
  1662  
  1663  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1664  
  1665  	// Test starts with backends not listening.
  1666  	restartableListener1.Stop()
  1667  	restartableListener2.Stop()
  1668  
  1669  	// Send a request with a hash that should go to restartableServer1.
  1670  	// Because it is not accepting connections, and no other backend is
  1671  	// listening, the RPC fails.
  1672  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer1.Address+"_0"))
  1673  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1674  		t.Fatalf("rpc EmptyCall() succeeded, want error")
  1675  	}
  1676  
  1677  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1678  
  1679  	// Bring up first backend. The channel should become Ready without any
  1680  	// picks, because in TF, we are always trying to connect to at least one
  1681  	// backend at all times.
  1682  	restartableListener1.Restart()
  1683  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1684  
  1685  	// Bring down backend 1 and bring up backend 2.
  1686  	// Note the RPC contains a header value that will always be hashed to
  1687  	// backend 1. So by purposely bringing down backend 1 and bringing up
  1688  	// another backend, this will ensure Picker's first choice of backend 1
  1689  	// fails and it will go through the remaining subchannels to find one in
  1690  	// READY. Since the entries in the ring are pretty distributed and we have
  1691  	// unused ports to fill the ring, it is almost guaranteed that the Picker
  1692  	// will go through some non-READY entries and skip them as per design.
  1693  	t.Logf("bringing down backend 1")
  1694  	restartableListener1.Stop()
  1695  
  1696  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1697  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1698  		t.Fatalf("rpc EmptyCall() succeeded, want error")
  1699  	}
  1700  
  1701  	t.Logf("bringing up backend 2")
  1702  	restartableListener2.Restart()
  1703  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1704  
  1705  	wantPeerAddr := ""
  1706  	for wantPeerAddr != restartableServer2.Address {
  1707  		p := peer.Peer{}
  1708  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p)); errors.Is(err, context.DeadlineExceeded) {
  1709  			t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend")
  1710  		}
  1711  		wantPeerAddr = p.Addr.String()
  1712  	}
  1713  }
  1714  
  1715  // Tests that when all backends are down, we keep reattempting.
  1716  func (s) TestRingHash_ReattemptWhenAllEndpointsUnreachable(t *testing.T) {
  1717  	lis, err := testutils.LocalTCPListener()
  1718  	if err != nil {
  1719  		t.Fatalf("Failed to create listener: %v", err)
  1720  	}
  1721  	restartableListener := testutils.NewRestartableListener(lis)
  1722  	restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{
  1723  		Listener: restartableListener,
  1724  		EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
  1725  			return &testpb.Empty{}, nil
  1726  		},
  1727  	})
  1728  	defer restartableServer.Stop()
  1729  
  1730  	const clusterName = "cluster"
  1731  	endpoints := endpointResource(t, clusterName, []string{restartableServer.Address})
  1732  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1733  		ClusterName: clusterName,
  1734  		ServiceName: clusterName,
  1735  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1736  	})
  1737  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1738  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1739  
  1740  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1741  	defer cancel()
  1742  
  1743  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1744  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1745  		t.Fatalf("Failed to update xDS resources: %v", err)
  1746  	}
  1747  
  1748  	dopts := []grpc.DialOption{
  1749  		grpc.WithResolvers(xdsResolver),
  1750  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1751  		grpc.WithConnectParams(fastConnectParams),
  1752  	}
  1753  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1754  	if err != nil {
  1755  		t.Fatalf("Failed to create client: %s", err)
  1756  	}
  1757  	defer conn.Close()
  1758  	client := testgrpc.NewTestServiceClient(conn)
  1759  
  1760  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1761  
  1762  	t.Log("Stopping the backend server")
  1763  	restartableListener.Stop()
  1764  
  1765  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable {
  1766  		t.Fatalf("rpc EmptyCall() succeeded, want Unavailable error")
  1767  	}
  1768  
  1769  	// Wait for channel to fail.
  1770  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1771  
  1772  	t.Log("Restarting the backend server")
  1773  	restartableListener.Restart()
  1774  
  1775  	// Wait for channel to become READY without any pending RPC.
  1776  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1777  }
  1778  
  1779  // Tests that when a backend goes down, we will move on to the next subchannel
  1780  // (with a lower priority).  When the backend comes back up, traffic will move
  1781  // back.
  1782  func (s) TestRingHash_SwitchToLowerPriorityAndThenBack(t *testing.T) {
  1783  	lis, err := testutils.LocalTCPListener()
  1784  	if err != nil {
  1785  		t.Fatalf("Failed to create listener: %v", err)
  1786  	}
  1787  	restartableListener := testutils.NewRestartableListener(lis)
  1788  	restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{
  1789  		Listener: restartableListener,
  1790  		EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
  1791  			return &testpb.Empty{}, nil
  1792  		},
  1793  	})
  1794  	defer restartableServer.Stop()
  1795  
  1796  	otherBackend := backendAddrs(startTestServiceBackends(t, 1))[0]
  1797  
  1798  	// We must set the host name socket address in EDS, as the ring hash policy
  1799  	// uses it to construct the ring.
  1800  	host, _, err := net.SplitHostPort(otherBackend)
  1801  	if err != nil {
  1802  		t.Fatalf("Failed to split host and port from stubserver: %v", err)
  1803  	}
  1804  
  1805  	const clusterName = "cluster"
  1806  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1807  		ClusterName: clusterName,
  1808  		Host:        host,
  1809  		Localities: []e2e.LocalityOptions{{
  1810  			Backends: backendOptions(t, []string{restartableServer.Address}),
  1811  			Weight:   1,
  1812  		}, {
  1813  			Backends: backendOptions(t, []string{otherBackend}),
  1814  			Weight:   1,
  1815  			Priority: 1,
  1816  		}}})
  1817  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1818  		ClusterName: clusterName,
  1819  		ServiceName: clusterName,
  1820  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1821  	})
  1822  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1823  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1824  
  1825  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1826  	defer cancel()
  1827  
  1828  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1829  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1830  		t.Fatalf("Failed to update xDS resources: %v", err)
  1831  	}
  1832  
  1833  	dopts := []grpc.DialOption{
  1834  		grpc.WithResolvers(xdsResolver),
  1835  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1836  		grpc.WithConnectParams(fastConnectParams),
  1837  	}
  1838  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1839  	if err != nil {
  1840  		t.Fatalf("Failed to create client: %s", err)
  1841  	}
  1842  	defer conn.Close()
  1843  	client := testgrpc.NewTestServiceClient(conn)
  1844  
  1845  	// Note each type of RPC contains a header value that will always be hashed
  1846  	// to the value that was used to place the non-existent endpoint on the ring.
  1847  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer.Address+"_0"))
  1848  	var got string
  1849  	for got = range checkRPCSendOK(ctx, t, client, 1) {
  1850  	}
  1851  	if want := restartableServer.Address; got != want {
  1852  		t.Fatalf("Got RPC routed to addr %v, want %v", got, want)
  1853  	}
  1854  
  1855  	// Trigger failure with the existing backend, which should cause the
  1856  	// balancer to go in transient failure and the priority balancer to move
  1857  	// to the lower priority.
  1858  	restartableListener.Stop()
  1859  
  1860  	for {
  1861  		p := peer.Peer{}
  1862  		_, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true), grpc.Peer(&p))
  1863  
  1864  		// Ignore errors: we may need to attempt to send an RPC to detect the
  1865  		// failure (the next write on connection fails).
  1866  		if err == nil {
  1867  			if got, want := p.Addr.String(), otherBackend; got != want {
  1868  				t.Fatalf("Got RPC routed to addr %v, want %v", got, want)
  1869  			}
  1870  			break
  1871  		}
  1872  	}
  1873  
  1874  	// Now we start the backend with the address hash that is used in the
  1875  	// metadata, so eventually RPCs should be routed to it, since it is in a
  1876  	// locality with higher priority.
  1877  	peerAddr := ""
  1878  	restartableListener.Restart()
  1879  	for peerAddr != restartableServer.Address {
  1880  		p := peer.Peer{}
  1881  		_, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p))
  1882  		if errors.Is(err, context.DeadlineExceeded) {
  1883  			t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend")
  1884  		}
  1885  		peerAddr = p.Addr.String()
  1886  	}
  1887  }
  1888  
  1889  // Tests that when we trigger internal connection attempts without picks, we
  1890  // keep retrying all the SubConns that have reported TF previously.
  1891  func (s) TestRingHash_ContinuesConnectingWithoutPicksToMultipleSubConnsConcurrently(t *testing.T) {
  1892  	const backendsCount = 4
  1893  	backends := backendAddrs(startTestServiceBackends(t, backendsCount))
  1894  
  1895  	const clusterName = "cluster"
  1896  
  1897  	endpoints := endpointResource(t, clusterName, backends)
  1898  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1899  		ClusterName: clusterName,
  1900  		ServiceName: clusterName,
  1901  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1902  	})
  1903  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1904  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1905  
  1906  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1907  	defer cancel()
  1908  
  1909  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1910  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1911  		t.Fatalf("Failed to update xDS resources: %v", err)
  1912  	}
  1913  
  1914  	dialer := testutils.NewBlockingDialer()
  1915  	dialOpts := []grpc.DialOption{
  1916  		grpc.WithResolvers(xdsResolver),
  1917  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1918  		grpc.WithContextDialer(dialer.DialContext),
  1919  		grpc.WithConnectParams(fastConnectParams),
  1920  	}
  1921  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  1922  	if err != nil {
  1923  		t.Fatalf("Failed to create client: %s", err)
  1924  	}
  1925  	defer conn.Close()
  1926  
  1927  	// Create holds for each backend address to delay a successful connection
  1928  	// until the end of the test.
  1929  	holds := make([]*testutils.Hold, backendsCount)
  1930  	for i := 0; i < len(backends); i++ {
  1931  		holds[i] = dialer.Hold(backends[i])
  1932  	}
  1933  
  1934  	client := testgrpc.NewTestServiceClient(conn)
  1935  
  1936  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  1937  	errCh := make(chan error, 1)
  1938  	go func() {
  1939  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backends[0]+"_0"))
  1940  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  1941  		if status.Code(err) == codes.Canceled {
  1942  			errCh <- nil
  1943  			return
  1944  		}
  1945  		errCh <- err
  1946  	}()
  1947  
  1948  	// Wait for the RPC to trigger a connection attempt to the first address,
  1949  	// then cancel the RPC.  No other connection attempts should be started yet.
  1950  	if !holds[0].Wait(ctx) {
  1951  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  1952  	}
  1953  	rpcCancel()
  1954  	if err := <-errCh; err != nil {
  1955  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  1956  	}
  1957  
  1958  	// In every iteration of the following loop, we count the number of backends
  1959  	// that are dialed. After counting, we fail all the connection attempts.
  1960  	// This should cause the number of dialed backends to increase by 1 in every
  1961  	// iteration of the loop as ringhash tries to exit TRANSIENT_FAILURE.
  1962  	activeAddrs := map[string]bool{}
  1963  	for wantBackendCount := 1; wantBackendCount <= backendsCount; wantBackendCount++ {
  1964  		newAddrIdx := -1
  1965  		for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
  1966  			for i, hold := range holds {
  1967  				if !hold.IsStarted() {
  1968  					continue
  1969  				}
  1970  				if _, ok := activeAddrs[backends[i]]; ok {
  1971  					continue
  1972  				}
  1973  				activeAddrs[backends[i]] = true
  1974  				newAddrIdx = i
  1975  			}
  1976  			if len(activeAddrs) > wantBackendCount {
  1977  				t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantBackendCount)
  1978  			}
  1979  			if len(activeAddrs) == wantBackendCount {
  1980  				break
  1981  			}
  1982  		}
  1983  
  1984  		// Wait for a short time and verify no more backends are contacted.
  1985  		<-time.After(defaultTestShortTimeout)
  1986  		for i, hold := range holds {
  1987  			if !hold.IsStarted() {
  1988  				continue
  1989  			}
  1990  			activeAddrs[backends[i]] = true
  1991  		}
  1992  		if len(activeAddrs) != wantBackendCount {
  1993  			t.Fatalf("Unexpected number of backends dialed: got %d, want %d", len(activeAddrs), wantBackendCount)
  1994  		}
  1995  
  1996  		// Create a new hold for the address dialed in this iteration and fail
  1997  		// the existing hold.
  1998  		hold := holds[newAddrIdx]
  1999  		holds[newAddrIdx] = dialer.Hold(backends[newAddrIdx])
  2000  		hold.Fail(errors.New("Test error"))
  2001  	}
  2002  
  2003  	// Allow the request to a backend to succeed.
  2004  	if !holds[1].Wait(ctx) {
  2005  		t.Fatalf("Context timed out waiting %q to be dialed again.", backends[1])
  2006  	}
  2007  	holds[1].Resume()
  2008  
  2009  	// Wait for channel to become READY without any pending RPC.
  2010  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2011  }
  2012  
  2013  // Tests that first address of an endpoint is used to generate the ring. The
  2014  // test sends a request to a random endpoint. The test then reverses the
  2015  // addresses of every endpoint and verifies that an RPC with header pointing to
  2016  // the second address of the endpoint is sent to the initial address. The test
  2017  // then swaps the second and third address of the endpoint and verifies that an
  2018  // RPC with the header used earlier still reaches the same backend.
  2019  func (s) TestRingHash_ReorderAddressessWithinEndpoint(t *testing.T) {
  2020  	origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled
  2021  	defer func() {
  2022  		envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled
  2023  	}()
  2024  	envconfig.XDSDualstackEndpointsEnabled = true
  2025  	backends := backendAddrs(startTestServiceBackends(t, 6))
  2026  
  2027  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2028  
  2029  	const clusterName = "cluster"
  2030  	addrGroups := [][]string{
  2031  		{backends[0], backends[1], backends[2]},
  2032  		{backends[3], backends[4], backends[5]},
  2033  	}
  2034  	endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups)
  2035  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2036  		ClusterName: clusterName,
  2037  		ServiceName: clusterName,
  2038  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2039  	})
  2040  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2041  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2042  
  2043  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2044  	defer cancel()
  2045  
  2046  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2047  		t.Fatalf("Failed to update xDS resources: %v", err)
  2048  	}
  2049  
  2050  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  2051  	if err != nil {
  2052  		t.Fatalf("Failed to create client: %s", err)
  2053  	}
  2054  	defer conn.Close()
  2055  	client := testgrpc.NewTestServiceClient(conn)
  2056  
  2057  	rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2058  		"address_hash", fmt.Sprintf("%d", rand.Int()),
  2059  	))
  2060  	var remote peer.Peer
  2061  	if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2062  		t.Fatalf("rpc EmptyCall() failed: %v", err)
  2063  	}
  2064  
  2065  	initialFirstAddr := ""
  2066  	newFirstAddr := ""
  2067  	switch remote.Addr.String() {
  2068  	case addrGroups[0][0]:
  2069  		initialFirstAddr = addrGroups[0][0]
  2070  		newFirstAddr = addrGroups[0][2]
  2071  	case addrGroups[1][0]:
  2072  		initialFirstAddr = addrGroups[1][0]
  2073  		newFirstAddr = addrGroups[1][2]
  2074  	default:
  2075  		t.Fatalf("Request went to unexpected address: %q", remote.Addr)
  2076  	}
  2077  
  2078  	t.Log("Reversing addresses within each endpoint.")
  2079  	addrGroups1 := [][]string{
  2080  		{addrGroups[0][2], addrGroups[0][1], addrGroups[0][0]},
  2081  		{addrGroups[1][2], addrGroups[1][1], addrGroups[1][0]},
  2082  	}
  2083  	endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups1)
  2084  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2085  		t.Fatalf("Failed to update xDS resources: %v", err)
  2086  	}
  2087  
  2088  	// The first address of an endpoint is used to create the ring. This means
  2089  	// that requests should continue to go to the first address, but the hash
  2090  	// should be computed based on the last address in the original list.
  2091  	for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
  2092  		rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2093  			"address_hash", newFirstAddr+"_0",
  2094  		))
  2095  		if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2096  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2097  		}
  2098  		if remote.Addr.String() == initialFirstAddr {
  2099  			break
  2100  		}
  2101  	}
  2102  
  2103  	if ctx.Err() != nil {
  2104  		t.Fatalf("Context timed out waiting for request to be sent to %q, last request went to %q", initialFirstAddr, remote.Addr)
  2105  	}
  2106  
  2107  	t.Log("Swapping the second and third addresses within each endpoint.")
  2108  	// This should not effect the ring, since only the first address is used
  2109  	// by the ring.
  2110  	addrGroups2 := [][]string{
  2111  		{addrGroups1[0][0], addrGroups[0][2], addrGroups[0][1]},
  2112  		{addrGroups1[1][0], addrGroups[1][2], addrGroups[1][1]},
  2113  	}
  2114  	endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups2)
  2115  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2116  		t.Fatalf("Failed to update xDS resources: %v", err)
  2117  	}
  2118  
  2119  	// Verify that requests with the hash of the last address in chosenAddrGroup
  2120  	// continue reaching the first address in chosenAddrGroup.
  2121  	shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout)
  2122  	defer cancel()
  2123  	for ; shortCtx.Err() == nil; <-time.After(time.Millisecond) {
  2124  		rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2125  			"address_hash", newFirstAddr+"_0",
  2126  		))
  2127  		if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2128  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2129  		}
  2130  		if remote.Addr.String() == initialFirstAddr {
  2131  			continue
  2132  		}
  2133  		t.Fatalf("Request went to unexpected backend %q, want backend %q", remote.Addr, initialFirstAddr)
  2134  	}
  2135  }
  2136  
  2137  // Tests that requests are sent to the next address within the same endpoint
  2138  // after the first address becomes unreachable.
  2139  func (s) TestRingHash_FallBackWithinEndpoint(t *testing.T) {
  2140  	origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled
  2141  	defer func() {
  2142  		envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled
  2143  	}()
  2144  	envconfig.XDSDualstackEndpointsEnabled = true
  2145  	backends := startTestServiceBackends(t, 4)
  2146  	backendAddrs := backendAddrs(backends)
  2147  
  2148  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2149  
  2150  	const clusterName = "cluster"
  2151  	endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, [][]string{{backendAddrs[0], backendAddrs[1]}, {backendAddrs[2], backendAddrs[3]}})
  2152  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2153  		ClusterName: clusterName,
  2154  		ServiceName: clusterName,
  2155  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2156  	})
  2157  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
  2158  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2159  
  2160  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2161  	defer cancel()
  2162  
  2163  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2164  		t.Fatalf("Failed to update xDS resources: %v", err)
  2165  	}
  2166  
  2167  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  2168  	if err != nil {
  2169  		t.Fatalf("Failed to create client: %s", err)
  2170  	}
  2171  	defer conn.Close()
  2172  	client := testgrpc.NewTestServiceClient(conn)
  2173  
  2174  	const numRPCs = 5
  2175  	received := checkRPCSendOK(ctx, t, client, numRPCs)
  2176  	if len(received) != 1 {
  2177  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
  2178  	}
  2179  	var got int
  2180  	var initialAddr string
  2181  	for initialAddr, got = range received {
  2182  	}
  2183  	if got != numRPCs {
  2184  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  2185  	}
  2186  
  2187  	// Due to the channel ID hashing policy, the request could go to the first
  2188  	// address of either endpoint.
  2189  	var backendIdx int
  2190  	switch initialAddr {
  2191  	case backendAddrs[0]:
  2192  		backendIdx = 0
  2193  	case backendAddrs[2]:
  2194  		backendIdx = 2
  2195  	default:
  2196  		t.Fatalf("Request sent to unexpected backend: %q", initialAddr)
  2197  	}
  2198  	otherEndpointAddr := backendAddrs[backendIdx+1]
  2199  
  2200  	// Shut down the previously used backend.
  2201  	backends[backendIdx].Stop()
  2202  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  2203  
  2204  	// Verify that the requests go to the remaining address in the same
  2205  	// endpoint.
  2206  	received = checkRPCSendOK(ctx, t, client, numRPCs)
  2207  	if len(received) != 1 {
  2208  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
  2209  	}
  2210  	var newAddr string
  2211  	for newAddr, got = range received {
  2212  	}
  2213  	if got != numRPCs {
  2214  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  2215  	}
  2216  
  2217  	if newAddr != otherEndpointAddr {
  2218  		t.Errorf("Requests went to unexpected address, got=%q, want=%q", newAddr, otherEndpointAddr)
  2219  	}
  2220  }
  2221  
  2222  // Tests that ringhash is able to recover automatically in situations when a
  2223  // READY endpoint enters IDLE making the aggregated state TRANSIENT_FAILURE. The
  2224  // test creates 4 endpoints in the following connectivity states: [TF, TF,
  2225  // READY, IDLE]. The test fails the READY backend and verifies that the last
  2226  // IDLE endopint is dialed and the channel enters READY.
  2227  func (s) TestRingHash_RecoverWhenEndpointEntersIdle(t *testing.T) {
  2228  	const backendsCount = 4
  2229  	backends := startTestServiceBackends(t, backendsCount)
  2230  	backendAddrs := backendAddrs(backends)
  2231  
  2232  	const clusterName = "cluster"
  2233  
  2234  	endpoints := endpointResource(t, clusterName, backendAddrs)
  2235  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2236  		ClusterName: clusterName,
  2237  		ServiceName: clusterName,
  2238  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2239  	})
  2240  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2241  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2242  
  2243  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2244  	defer cancel()
  2245  
  2246  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2247  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2248  		t.Fatalf("Failed to update xDS resources: %v", err)
  2249  	}
  2250  
  2251  	dialer := testutils.NewBlockingDialer()
  2252  	dialOpts := []grpc.DialOption{
  2253  		grpc.WithResolvers(xdsResolver),
  2254  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2255  		grpc.WithContextDialer(dialer.DialContext),
  2256  		grpc.WithConnectParams(fastConnectParams),
  2257  	}
  2258  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  2259  	if err != nil {
  2260  		t.Fatalf("Failed to create client: %s", err)
  2261  	}
  2262  	defer conn.Close()
  2263  
  2264  	// Create holds for each backend address to delay a successful connection
  2265  	// until the end of the test.
  2266  	holds := make([]*testutils.Hold, backendsCount)
  2267  	for i := 0; i < len(backendAddrs); i++ {
  2268  		holds[i] = dialer.Hold(backendAddrs[i])
  2269  	}
  2270  
  2271  	client := testgrpc.NewTestServiceClient(conn)
  2272  
  2273  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  2274  	errCh := make(chan error, 1)
  2275  	go func() {
  2276  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0"))
  2277  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  2278  		if status.Code(err) == codes.Canceled {
  2279  			errCh <- nil
  2280  			return
  2281  		}
  2282  		errCh <- err
  2283  	}()
  2284  
  2285  	// Wait for the RPC to trigger a connection attempt to the first address,
  2286  	// then cancel the RPC.  No other connection attempts should be started yet.
  2287  	if !holds[0].Wait(ctx) {
  2288  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  2289  	}
  2290  	rpcCancel()
  2291  	if err := <-errCh; err != nil {
  2292  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  2293  	}
  2294  
  2295  	// The number of dialed backends increases by 1 in every iteration of the
  2296  	// loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to
  2297  	// get two endpoints in TRANSIENT_FAILURE.
  2298  	activeAddrs := map[string]bool{}
  2299  	for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ {
  2300  		newAddrIdx := -1
  2301  		for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) {
  2302  			for i, hold := range holds {
  2303  				if !hold.IsStarted() {
  2304  					continue
  2305  				}
  2306  				if _, ok := activeAddrs[backendAddrs[i]]; ok {
  2307  					continue
  2308  				}
  2309  				activeAddrs[backendAddrs[i]] = true
  2310  				newAddrIdx = i
  2311  			}
  2312  		}
  2313  
  2314  		if ctx.Err() != nil {
  2315  			t.Fatal("Context timed out waiting for new backneds to be dialed.")
  2316  		}
  2317  		if len(activeAddrs) > wantFailingBackendCount {
  2318  			t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount)
  2319  		}
  2320  
  2321  		// Create a new hold for the address dialed in this iteration and fail
  2322  		// the existing hold.
  2323  		hold := holds[newAddrIdx]
  2324  		holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx])
  2325  		hold.Fail(errors.New("Test error"))
  2326  	}
  2327  
  2328  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2329  	// Two endpoints failing should cause the channel to enter
  2330  	// TRANSIENT_FAILURE.
  2331  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2332  
  2333  	// Allow the request to the backend dialed next to succeed.
  2334  	readyBackendIdx := -1
  2335  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2336  		for i, addr := range backendAddrs {
  2337  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2338  				continue
  2339  			}
  2340  			readyBackendIdx = i
  2341  			activeAddrs[addr] = true
  2342  			holds[i].Resume()
  2343  			break
  2344  		}
  2345  	}
  2346  
  2347  	if ctx.Err() != nil {
  2348  		t.Fatal("Context timed out waiting for the next backend to be contacted.")
  2349  	}
  2350  
  2351  	// Wait for channel to become READY without any pending RPC.
  2352  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2353  
  2354  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2355  	// Stopping the READY backend should cause the channel to re-enter
  2356  	// TRANSIENT_FAILURE.
  2357  	backends[readyBackendIdx].Stop()
  2358  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2359  
  2360  	// To recover from TRANSIENT_FAILURE, ringhash should automatically try to
  2361  	// connect to the final endpoint.
  2362  	readyBackendIdx = -1
  2363  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2364  		for i, addr := range backendAddrs {
  2365  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2366  				continue
  2367  			}
  2368  			readyBackendIdx = i
  2369  			activeAddrs[addr] = true
  2370  			holds[i].Resume()
  2371  			break
  2372  		}
  2373  	}
  2374  
  2375  	if ctx.Err() != nil {
  2376  		t.Fatal("Context timed out waiting for next backend to be contacted.")
  2377  	}
  2378  
  2379  	// Wait for channel to become READY without any pending RPC.
  2380  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2381  }
  2382  
  2383  // Tests that ringhash is able to recover automatically in situations when a
  2384  // READY endpoint is removed by the resolver making the aggregated state
  2385  // TRANSIENT_FAILURE. The test creates 4 endpoints in the following
  2386  // connectivity states: [TF, TF, READY, IDLE]. The test removes the
  2387  // READY endpoint and verifies that the last IDLE endopint is dialed and the
  2388  // channel enters READY.
  2389  func (s) TestRingHash_RecoverWhenResolverRemovesEndpoint(t *testing.T) {
  2390  	const backendsCount = 4
  2391  	backends := startTestServiceBackends(t, backendsCount)
  2392  	backendAddrs := backendAddrs(backends)
  2393  
  2394  	const clusterName = "cluster"
  2395  
  2396  	endpoints := endpointResource(t, clusterName, backendAddrs)
  2397  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2398  		ClusterName: clusterName,
  2399  		ServiceName: clusterName,
  2400  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2401  	})
  2402  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2403  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2404  
  2405  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2406  	defer cancel()
  2407  
  2408  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2409  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2410  		t.Fatalf("Failed to update xDS resources: %v", err)
  2411  	}
  2412  
  2413  	dialer := testutils.NewBlockingDialer()
  2414  	dialOpts := []grpc.DialOption{
  2415  		grpc.WithResolvers(xdsResolver),
  2416  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2417  		grpc.WithContextDialer(dialer.DialContext),
  2418  		grpc.WithConnectParams(fastConnectParams),
  2419  	}
  2420  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  2421  	if err != nil {
  2422  		t.Fatalf("Failed to create client: %s", err)
  2423  	}
  2424  	defer conn.Close()
  2425  
  2426  	// Create holds for each backend address to delay a successful connection
  2427  	// until the end of the test.
  2428  	holds := make([]*testutils.Hold, backendsCount)
  2429  	for i := 0; i < len(backendAddrs); i++ {
  2430  		holds[i] = dialer.Hold(backendAddrs[i])
  2431  	}
  2432  
  2433  	client := testgrpc.NewTestServiceClient(conn)
  2434  
  2435  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  2436  	errCh := make(chan error, 1)
  2437  	go func() {
  2438  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0"))
  2439  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  2440  		if status.Code(err) == codes.Canceled {
  2441  			errCh <- nil
  2442  			return
  2443  		}
  2444  		errCh <- err
  2445  	}()
  2446  
  2447  	// Wait for the RPC to trigger a connection attempt to the first address,
  2448  	// then cancel the RPC.  No other connection attempts should be started yet.
  2449  	if !holds[0].Wait(ctx) {
  2450  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  2451  	}
  2452  	rpcCancel()
  2453  	if err := <-errCh; err != nil {
  2454  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  2455  	}
  2456  
  2457  	// The number of dialed backends increases by 1 in every iteration of the
  2458  	// loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to
  2459  	// get two endpoints in TRANSIENT_FAILURE.
  2460  	activeAddrs := map[string]bool{}
  2461  	for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ {
  2462  		newAddrIdx := -1
  2463  		for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) {
  2464  			for i, hold := range holds {
  2465  				if !hold.IsStarted() {
  2466  					continue
  2467  				}
  2468  				if _, ok := activeAddrs[backendAddrs[i]]; ok {
  2469  					continue
  2470  				}
  2471  				activeAddrs[backendAddrs[i]] = true
  2472  				newAddrIdx = i
  2473  			}
  2474  		}
  2475  
  2476  		if ctx.Err() != nil {
  2477  			t.Fatal("Context timed out waiting for new backneds to be dialed.")
  2478  		}
  2479  		if len(activeAddrs) > wantFailingBackendCount {
  2480  			t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount)
  2481  		}
  2482  
  2483  		// Create a new hold for the address dialed in this iteration and fail
  2484  		// the existing hold.
  2485  		hold := holds[newAddrIdx]
  2486  		holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx])
  2487  		hold.Fail(errors.New("Test error"))
  2488  	}
  2489  
  2490  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2491  	// Two endpoints failing should cause the channel to enter
  2492  	// TRANSIENT_FAILURE.
  2493  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2494  
  2495  	// Allow the request to the backend dialed next to succeed.
  2496  	readyBackendIdx := -1
  2497  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2498  		for i, addr := range backendAddrs {
  2499  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2500  				continue
  2501  			}
  2502  			readyBackendIdx = i
  2503  			activeAddrs[addr] = true
  2504  			holds[i].Resume()
  2505  			break
  2506  		}
  2507  	}
  2508  
  2509  	if ctx.Err() != nil {
  2510  		t.Fatal("Context timed out waiting for the next backend to be contacted.")
  2511  	}
  2512  
  2513  	// Wait for channel to become READY without any pending RPC.
  2514  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2515  
  2516  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2517  	// Removing the READY backend should cause the channel to re-enter
  2518  	// TRANSIENT_FAILURE.
  2519  	updatedAddrs := append([]string{}, backendAddrs[:readyBackendIdx]...)
  2520  	updatedAddrs = append(updatedAddrs, backendAddrs[readyBackendIdx+1:]...)
  2521  	updatedEndpoints := endpointResource(t, clusterName, updatedAddrs)
  2522  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, updatedEndpoints, cluster, route, listener)); err != nil {
  2523  		t.Fatalf("Failed to update xDS resources: %v", err)
  2524  	}
  2525  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2526  
  2527  	// To recover from TRANSIENT_FAILURE, ringhash should automatically try to
  2528  	// connect to the final endpoint.
  2529  	readyBackendIdx = -1
  2530  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2531  		for i, addr := range backendAddrs {
  2532  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2533  				continue
  2534  			}
  2535  			readyBackendIdx = i
  2536  			activeAddrs[addr] = true
  2537  			holds[i].Resume()
  2538  			break
  2539  		}
  2540  	}
  2541  
  2542  	if ctx.Err() != nil {
  2543  		t.Fatal("Context timed out waiting for next backend to be contacted.")
  2544  	}
  2545  
  2546  	// Wait for channel to become READY without any pending RPC.
  2547  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2548  }
  2549  
  2550  // Tests that RPCs are routed according to endpoint hash key rather than
  2551  // endpoint first address if it is set in EDS endpoint metadata.
  2552  func (s) TestRingHash_EndpointHashKey(t *testing.T) {
  2553  	testutils.SetEnvConfig(t, &envconfig.XDSEndpointHashKeyBackwardCompat, false)
  2554  
  2555  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2556  
  2557  	const clusterName = "cluster"
  2558  	var backendOpts []e2e.BackendOptions
  2559  	for i, addr := range backends {
  2560  		var ports []uint32
  2561  		ports = append(ports, testutils.ParsePort(t, addr))
  2562  		backendOpts = append(backendOpts, e2e.BackendOptions{
  2563  			Ports:    ports,
  2564  			Metadata: map[string]any{"hash_key": strconv.Itoa(i)},
  2565  		})
  2566  	}
  2567  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  2568  		ClusterName: clusterName,
  2569  		Host:        "localhost",
  2570  		Localities: []e2e.LocalityOptions{{
  2571  			Backends: backendOpts,
  2572  			Weight:   1,
  2573  		}},
  2574  	})
  2575  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2576  		ClusterName: clusterName,
  2577  		ServiceName: clusterName,
  2578  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2579  	})
  2580  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2581  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2582  
  2583  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2584  	defer cancel()
  2585  
  2586  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2587  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2588  		t.Fatalf("Failed to update xDS resources: %v", err)
  2589  	}
  2590  
  2591  	opts := []grpc.DialOption{
  2592  		grpc.WithResolvers(xdsResolver),
  2593  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2594  	}
  2595  	conn, err := grpc.NewClient("xds:///test.server", opts...)
  2596  	if err != nil {
  2597  		t.Fatalf("Failed to create client: %s", err)
  2598  	}
  2599  	defer conn.Close()
  2600  	client := testgrpc.NewTestServiceClient(conn)
  2601  
  2602  	// Make sure RPCs are routed to backends according to the endpoint metadata
  2603  	// rather than their address. Note each type of RPC contains a header value
  2604  	// that will always be hashed to a specific backend as the header value
  2605  	// matches the endpoint metadata hash key.
  2606  	for i, backend := range backends {
  2607  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(i)+"_0"))
  2608  		numRPCs := 10
  2609  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2610  		if reqPerBackend[backend] != numRPCs {
  2611  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2612  		}
  2613  	}
  2614  
  2615  	// Update the endpoints to swap the metadata hash key.
  2616  	for i := range backendOpts {
  2617  		backendOpts[i].Metadata = map[string]any{"hash_key": strconv.Itoa(len(backends) - i - 1)}
  2618  	}
  2619  	endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  2620  		ClusterName: clusterName,
  2621  		Host:        "localhost",
  2622  		Localities: []e2e.LocalityOptions{{
  2623  			Backends: backendOpts,
  2624  			Weight:   1,
  2625  		}},
  2626  	})
  2627  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2628  		t.Fatalf("Failed to update xDS resources: %v", err)
  2629  	}
  2630  
  2631  	// Wait for the resolver update to make it to the balancer. This RPC should
  2632  	// be routed to backend 3 with the reverse numbering of the hash_key
  2633  	// attribute delivered above.
  2634  	for {
  2635  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", "0_0"))
  2636  		var remote peer.Peer
  2637  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2638  			t.Fatalf("Unexpected RPC error waiting for EDS update propagation: %s", err)
  2639  		}
  2640  		if remote.Addr.String() == backends[3] {
  2641  			break
  2642  		}
  2643  	}
  2644  
  2645  	// Now that the balancer has the new endpoint attributes, make sure RPCs are
  2646  	// routed to backends according to the new endpoint metadata.
  2647  	for i, backend := range backends {
  2648  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(len(backends)-i-1)+"_0"))
  2649  		numRPCs := 10
  2650  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2651  		if reqPerBackend[backend] != numRPCs {
  2652  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2653  		}
  2654  	}
  2655  }
  2656  
  2657  // Tests that when a request hash key is set in the balancer configuration via
  2658  // service config, this header is used to route to a specific backend.
  2659  func (s) TestRingHash_RequestHashKey(t *testing.T) {
  2660  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2661  
  2662  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2663  
  2664  	// Create a clientConn with a manual resolver (which is used to push the
  2665  	// address of the test backend), and a default service config pointing to
  2666  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2667  	// header.
  2668  	const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"address_hash"}}]}`
  2669  	r := manual.NewBuilderWithScheme("whatever")
  2670  	dopts := []grpc.DialOption{
  2671  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2672  		grpc.WithResolvers(r),
  2673  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2674  		grpc.WithConnectParams(fastConnectParams),
  2675  	}
  2676  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2677  	if err != nil {
  2678  		t.Fatalf("Failed to dial local test server: %v", err)
  2679  	}
  2680  	defer cc.Close()
  2681  	var endpoints []resolver.Endpoint
  2682  	for _, backend := range backends {
  2683  		endpoints = append(endpoints, resolver.Endpoint{
  2684  			Addresses: []resolver.Address{{Addr: backend}},
  2685  		})
  2686  	}
  2687  	r.UpdateState(resolver.State{
  2688  		Endpoints: endpoints,
  2689  	})
  2690  	client := testgrpc.NewTestServiceClient(cc)
  2691  
  2692  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2693  	defer cancel()
  2694  
  2695  	// Note each type of RPC contains a header value that will always be hashed
  2696  	// to a specific backend as the header value matches the value used to
  2697  	// create the entry in the ring.
  2698  	for _, backend := range backends {
  2699  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
  2700  		numRPCs := 10
  2701  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2702  		if reqPerBackend[backend] != numRPCs {
  2703  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2704  		}
  2705  	}
  2706  
  2707  	const ringHashServiceConfigUpdate = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"other_header"}}]}`
  2708  	r.UpdateState(resolver.State{
  2709  		Endpoints:     endpoints,
  2710  		ServiceConfig: (&testutils.ResolverClientConn{}).ParseServiceConfig(ringHashServiceConfigUpdate),
  2711  	})
  2712  
  2713  	// Make sure that requests with the new hash are sent to the right backend.
  2714  	for _, backend := range backends {
  2715  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("other_header", backend+"_0"))
  2716  		numRPCs := 10
  2717  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2718  		if reqPerBackend[backend] != numRPCs {
  2719  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2720  		}
  2721  	}
  2722  }
  2723  
  2724  // Tests that when a request hash key is set in the balancer configuration via
  2725  // service config, and the header is not set in the outgoing request, then it
  2726  // is sent to a random backend.
  2727  func (s) TestRingHash_RequestHashKeyRandom(t *testing.T) {
  2728  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2729  
  2730  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2731  
  2732  	// Create a clientConn with a manual resolver (which is used to push the
  2733  	// address of the test backend), and a default service config pointing to
  2734  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2735  	// header.
  2736  	const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"address_hash"}}]}`
  2737  	r := manual.NewBuilderWithScheme("whatever")
  2738  	dopts := []grpc.DialOption{
  2739  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2740  		grpc.WithResolvers(r),
  2741  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2742  		grpc.WithConnectParams(fastConnectParams),
  2743  	}
  2744  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2745  	if err != nil {
  2746  		t.Fatalf("Failed to dial local test server: %v", err)
  2747  	}
  2748  	defer cc.Close()
  2749  	var endpoints []resolver.Endpoint
  2750  	for _, backend := range backends {
  2751  		endpoints = append(endpoints, resolver.Endpoint{
  2752  			Addresses: []resolver.Address{{Addr: backend}},
  2753  		})
  2754  	}
  2755  	r.UpdateState(resolver.State{
  2756  		Endpoints: endpoints,
  2757  	})
  2758  	client := testgrpc.NewTestServiceClient(cc)
  2759  
  2760  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2761  	defer cancel()
  2762  
  2763  	// Due to the way that ring hash lazily establishes connections when using a
  2764  	// random hash, request distribution is skewed towards the order in which we
  2765  	// connected. The test send RPCs until we are connected to all backends, so
  2766  	// we can later assert that the distribution is uniform.
  2767  	seen := make(map[string]bool)
  2768  	for len(seen) != 4 {
  2769  		var remote peer.Peer
  2770  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2771  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2772  		}
  2773  		seen[remote.String()] = true
  2774  	}
  2775  
  2776  	// Make sure that requests with the old hash are sent to random backends.
  2777  	numRPCs := computeIdealNumberOfRPCs(t, .25, errorTolerance)
  2778  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2779  	for _, backend := range backends {
  2780  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  2781  		want := .25
  2782  		if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
  2783  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance)
  2784  		}
  2785  	}
  2786  }
  2787  
  2788  // Tests that when a request hash key is set in the balancer configuration via
  2789  // service config, and the header is not set in the outgoing request (random
  2790  // behavior), then each RPC wakes up at most one SubChannel, and, if there are
  2791  // SubChannels in Ready state, RPCs are routed to them.
  2792  func (s) TestRingHash_RequestHashKeyConnecting(t *testing.T) {
  2793  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2794  
  2795  	backends := backendAddrs(startTestServiceBackends(t, 20))
  2796  
  2797  	// Create a clientConn with a manual resolver (which is used to push the
  2798  	// address of the test backend), and a default service config pointing to
  2799  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2800  	// header. Use a blocking dialer to control connection attempts.
  2801  	const ringHashServiceConfig = `{"loadBalancingConfig": [
  2802  	  {"ring_hash_experimental":{"requestHashHeader":"address_hash"}}
  2803  	]}`
  2804  	r := manual.NewBuilderWithScheme("whatever")
  2805  	blockingDialer := testutils.NewBlockingDialer()
  2806  	dopts := []grpc.DialOption{
  2807  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2808  		grpc.WithResolvers(r),
  2809  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2810  		grpc.WithConnectParams(fastConnectParams),
  2811  		grpc.WithContextDialer(blockingDialer.DialContext),
  2812  	}
  2813  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2814  	if err != nil {
  2815  		t.Fatalf("Failed to dial local test server: %v", err)
  2816  	}
  2817  	defer cc.Close()
  2818  	var endpoints []resolver.Endpoint
  2819  	for _, backend := range backends {
  2820  		endpoints = append(endpoints, resolver.Endpoint{
  2821  			Addresses: []resolver.Address{{Addr: backend}},
  2822  		})
  2823  	}
  2824  	r.UpdateState(resolver.State{
  2825  		Endpoints: endpoints,
  2826  	})
  2827  	client := testgrpc.NewTestServiceClient(cc)
  2828  
  2829  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2830  	defer cancel()
  2831  
  2832  	// Intercept all connection attempts to the backends.
  2833  	var holds []*testutils.Hold
  2834  	for i := 0; i < len(backends); i++ {
  2835  		holds = append(holds, blockingDialer.Hold(backends[i]))
  2836  	}
  2837  
  2838  	wg := sync.WaitGroup{}
  2839  	wg.Add(1)
  2840  	go func() {
  2841  		// Send 1 RPC and make sure this triggers at most 1 connection attempt.
  2842  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
  2843  		if err != nil {
  2844  			t.Errorf("EmptyCall(): got %v, want success", err)
  2845  		}
  2846  		wg.Done()
  2847  	}()
  2848  	testutils.AwaitState(ctx, t, cc, connectivity.Connecting)
  2849  
  2850  	// Check that only one connection attempt was started.
  2851  	nConn := 0
  2852  	for _, hold := range holds {
  2853  		if hold.IsStarted() {
  2854  			nConn++
  2855  		}
  2856  	}
  2857  	if wantMaxConn := 1; nConn > wantMaxConn {
  2858  		t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn)
  2859  	}
  2860  
  2861  	// Do a second RPC. Since there should already be a SubChannel in
  2862  	// Connecting state, this should not trigger a connection attempt.
  2863  	wg.Add(1)
  2864  	go func() {
  2865  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
  2866  		if err != nil {
  2867  			t.Errorf("EmptyCall(): got %v, want success", err)
  2868  		}
  2869  		wg.Done()
  2870  	}()
  2871  
  2872  	// Give extra time for more connections to be attempted.
  2873  	time.Sleep(defaultTestShortTimeout)
  2874  
  2875  	var firstConnectedBackend string
  2876  	nConn = 0
  2877  	for i, hold := range holds {
  2878  		if hold.IsStarted() {
  2879  			// Unblock the connection attempt. The SubChannel (and hence the
  2880  			// channel) should transition to Ready. RPCs should succeed and
  2881  			// be routed to this backend.
  2882  			hold.Resume()
  2883  			holds[i] = nil
  2884  			firstConnectedBackend = backends[i]
  2885  			nConn++
  2886  		}
  2887  	}
  2888  	if wantMaxConn := 1; nConn > wantMaxConn {
  2889  		t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn)
  2890  	}
  2891  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
  2892  	wg.Wait() // Make sure we're done with the 2 previous RPCs.
  2893  
  2894  	// Now send RPCs until we have at least one more connection attempt, that
  2895  	// is, the random hash did not land on the same backend on every pick (the
  2896  	// chances are low, but we don't want this to be flaky). Make sure no RPC
  2897  	// fails and that we route all of them to the only subchannel in ready
  2898  	// state.
  2899  	nConn = 0
  2900  	for nConn == 0 {
  2901  		p := peer.Peer{}
  2902  		_, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p))
  2903  		if status.Code(err) == codes.DeadlineExceeded {
  2904  			t.Fatal("EmptyCall(): test timed out while waiting for more connection attempts")
  2905  		}
  2906  		if err != nil {
  2907  			t.Fatalf("EmptyCall(): got %v, want success", err)
  2908  		}
  2909  		if p.Addr.String() != firstConnectedBackend {
  2910  			t.Errorf("RPC sent to backend %q, want %q", p.Addr.String(), firstConnectedBackend)
  2911  		}
  2912  		for _, hold := range holds {
  2913  			if hold != nil && hold.IsStarted() {
  2914  				nConn++
  2915  			}
  2916  		}
  2917  	}
  2918  }