google.golang.org/grpc@v1.74.2/balancer/ringhash/ringhash_e2e_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package ringhash_test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"math"
    26  	rand "math/rand/v2"
    27  	"net"
    28  	"slices"
    29  	"strconv"
    30  	"sync"
    31  	"testing"
    32  	"time"
    33  
    34  	"github.com/google/go-cmp/cmp"
    35  	"github.com/google/go-cmp/cmp/cmpopts"
    36  	"github.com/google/uuid"
    37  	"google.golang.org/grpc"
    38  	"google.golang.org/grpc/backoff"
    39  	"google.golang.org/grpc/codes"
    40  	"google.golang.org/grpc/connectivity"
    41  	"google.golang.org/grpc/credentials/insecure"
    42  	"google.golang.org/grpc/internal"
    43  	"google.golang.org/grpc/internal/envconfig"
    44  	"google.golang.org/grpc/internal/grpctest"
    45  	iringhash "google.golang.org/grpc/internal/ringhash"
    46  	"google.golang.org/grpc/internal/stubserver"
    47  	"google.golang.org/grpc/internal/testutils"
    48  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    49  	"google.golang.org/grpc/metadata"
    50  	"google.golang.org/grpc/peer"
    51  	"google.golang.org/grpc/resolver"
    52  	"google.golang.org/grpc/resolver/manual"
    53  	"google.golang.org/grpc/status"
    54  
    55  	v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    56  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    57  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    58  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    59  	v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
    60  	v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3"
    61  	v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3"
    62  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    63  	testpb "google.golang.org/grpc/interop/grpc_testing"
    64  	"google.golang.org/protobuf/types/known/wrapperspb"
    65  
    66  	_ "google.golang.org/grpc/xds"
    67  )
    68  
    69  type s struct {
    70  	grpctest.Tester
    71  }
    72  
    73  func Test(t *testing.T) {
    74  	grpctest.RunSubTests(t, s{})
    75  }
    76  
    77  const (
    78  	defaultTestTimeout      = 10 * time.Second
    79  	defaultTestShortTimeout = 10 * time.Millisecond
    80  
    81  	errorTolerance = .05 // For tests that rely on statistical significance.
    82  
    83  	virtualHostName = "test.server"
    84  
    85  	// minRingSize is the minimum ring size to use when testing randomly a
    86  	// backend for each request. It lowers the skew that may occur from
    87  	// an imbalanced ring.
    88  	minRingSize = 10000
    89  )
    90  
    91  // fastConnectParams disables connection attempts backoffs and lowers delays.
    92  // This speeds up tests that rely on subchannel to move to transient failure.
    93  var fastConnectParams = grpc.ConnectParams{
    94  	Backoff: backoff.Config{
    95  		BaseDelay: 10 * time.Millisecond,
    96  	},
    97  	MinConnectTimeout: 100 * time.Millisecond,
    98  }
    99  
   100  // Tests the case where the ring contains a single subConn, and verifies that
   101  // when the server goes down, the LB policy on the client automatically
   102  // reconnects until the subChannel moves out of TRANSIENT_FAILURE.
   103  func (s) TestRingHash_ReconnectToMoveOutOfTransientFailure(t *testing.T) {
   104  	// Create a restartable listener to simulate server being down.
   105  	l, err := testutils.LocalTCPListener()
   106  	if err != nil {
   107  		t.Fatalf("testutils.LocalTCPListener() failed: %v", err)
   108  	}
   109  	lis := testutils.NewRestartableListener(l)
   110  	srv := stubserver.StartTestService(t, &stubserver.StubServer{
   111  		Listener:   lis,
   112  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil },
   113  	})
   114  	defer srv.Stop()
   115  
   116  	// Create a clientConn with a manual resolver (which is used to push the
   117  	// address of the test backend), and a default service config pointing to
   118  	// the use of the ring_hash_experimental LB policy.
   119  	const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{}}]}`
   120  	r := manual.NewBuilderWithScheme("whatever")
   121  	dopts := []grpc.DialOption{
   122  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   123  		grpc.WithResolvers(r),
   124  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
   125  		grpc.WithConnectParams(fastConnectParams),
   126  	}
   127  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   128  	if err != nil {
   129  		t.Fatalf("Failed to dial local test server: %v", err)
   130  	}
   131  	defer cc.Close()
   132  
   133  	// Push the address of the test backend through the manual resolver.
   134  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: lis.Addr().String()}}})
   135  
   136  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   137  	ctx = iringhash.SetXDSRequestHash(ctx, 0)
   138  	defer cancel()
   139  	client := testgrpc.NewTestServiceClient(cc)
   140  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   141  		t.Fatalf("rpc EmptyCall() failed: %v", err)
   142  	}
   143  
   144  	// Stopping the server listener will close the transport on the client,
   145  	// which will lead to the channel eventually moving to IDLE. The ring_hash
   146  	// LB policy is not expected to reconnect by itself at this point.
   147  	lis.Stop()
   148  
   149  	testutils.AwaitState(ctx, t, cc, connectivity.Idle)
   150  
   151  	// Make an RPC to get the ring_hash LB policy to reconnect and thereby move
   152  	// to TRANSIENT_FAILURE upon connection failure.
   153  	client.EmptyCall(ctx, &testpb.Empty{})
   154  
   155  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   156  
   157  	// An RPC at this point is expected to fail.
   158  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
   159  		t.Fatal("EmptyCall RPC succeeded when the channel is in TRANSIENT_FAILURE")
   160  	}
   161  
   162  	// Restart the server listener. The ring_hash LB policy is expected to
   163  	// attempt to reconnect on its own and come out of TRANSIENT_FAILURE, even
   164  	// without an RPC attempt.
   165  	lis.Restart()
   166  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
   167  
   168  	// An RPC at this point is expected to succeed.
   169  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   170  		t.Fatalf("rpc EmptyCall() failed: %v", err)
   171  	}
   172  }
   173  
   174  // startTestServiceBackends starts num stub servers. It returns the list of
   175  // stubservers. Servers are closed when the test is stopped.
   176  func startTestServiceBackends(t *testing.T, num int) []*stubserver.StubServer {
   177  	t.Helper()
   178  
   179  	servers := make([]*stubserver.StubServer, 0, num)
   180  	for i := 0; i < num; i++ {
   181  		server := stubserver.StartTestService(t, nil)
   182  		t.Cleanup(server.Stop)
   183  		servers = append(servers, server)
   184  	}
   185  	return servers
   186  }
   187  
   188  // backendAddrs returns a list of address strings for the given stubservers.
   189  func backendAddrs(servers []*stubserver.StubServer) []string {
   190  	addrs := make([]string, 0, len(servers))
   191  	for _, s := range servers {
   192  		addrs = append(addrs, s.Address)
   193  	}
   194  	return addrs
   195  }
   196  
   197  // backendOptions returns a slice of e2e.BackendOptions for the given server
   198  // addresses.
   199  func backendOptions(t *testing.T, serverAddrs []string) []e2e.BackendOptions {
   200  	t.Helper()
   201  	backendAddrs := [][]string{}
   202  	for _, addr := range serverAddrs {
   203  		backendAddrs = append(backendAddrs, []string{addr})
   204  	}
   205  	return backendOptionsForEndpointsWithMultipleAddrs(t, backendAddrs)
   206  }
   207  
   208  // backendOptions returns a slice of e2e.BackendOptions for the given server
   209  // addresses. Each endpoint can have multiple addresses.
   210  func backendOptionsForEndpointsWithMultipleAddrs(t *testing.T, backendAddrs [][]string) []e2e.BackendOptions {
   211  	t.Helper()
   212  
   213  	var backendOpts []e2e.BackendOptions
   214  	for _, backend := range backendAddrs {
   215  		ports := []uint32{}
   216  		for _, addr := range backend {
   217  			ports = append(ports, testutils.ParsePort(t, addr))
   218  		}
   219  		backendOpts = append(backendOpts, e2e.BackendOptions{Ports: ports})
   220  	}
   221  	return backendOpts
   222  }
   223  
   224  // channelIDHashRoute returns a RouteConfiguration with a hash policy that
   225  // hashes based on the channel ID.
   226  func channelIDHashRoute(routeName, virtualHostDomain, clusterName string) *v3routepb.RouteConfiguration {
   227  	route := e2e.DefaultRouteConfig(routeName, virtualHostDomain, clusterName)
   228  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   229  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{
   230  			FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{
   231  				Key: "io.grpc.channel_id",
   232  			},
   233  		},
   234  	}
   235  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   236  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy}
   237  	return route
   238  }
   239  
   240  // checkRPCSendOK sends num RPCs to the client. It returns a map of backend
   241  // addresses as keys and number of RPCs sent to this address as value. Abort the
   242  // test if any RPC fails.
   243  func checkRPCSendOK(ctx context.Context, t *testing.T, client testgrpc.TestServiceClient, num int) map[string]int {
   244  	t.Helper()
   245  
   246  	backendCount := make(map[string]int)
   247  	for i := 0; i < num; i++ {
   248  		var remote peer.Peer
   249  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
   250  			t.Fatalf("rpc EmptyCall() failed: %v", err)
   251  		}
   252  		backendCount[remote.Addr.String()]++
   253  	}
   254  	return backendCount
   255  }
   256  
   257  // makeUnreachableBackends returns a slice of addresses of backends that close
   258  // connections as soon as they are established. Useful to simulate servers that
   259  // are unreachable.
   260  func makeUnreachableBackends(t *testing.T, num int) []string {
   261  	t.Helper()
   262  
   263  	addrs := make([]string, 0, num)
   264  	for i := 0; i < num; i++ {
   265  		l, err := testutils.LocalTCPListener()
   266  		if err != nil {
   267  			t.Fatalf("testutils.LocalTCPListener() failed: %v", err)
   268  		}
   269  		lis := testutils.NewRestartableListener(l)
   270  		addrs = append(addrs, lis.Addr().String())
   271  
   272  		// It is enough to fail the first connection attempt to put the subchannel
   273  		// in TRANSIENT_FAILURE.
   274  		go func() { lis.Accept() }()
   275  
   276  		// We don't close these listeners here, to make sure ports are
   277  		// not reused across them, and across tests.
   278  		lis.Stop()
   279  		t.Cleanup(func() { lis.Close() })
   280  	}
   281  	return addrs
   282  }
   283  
   284  // setupManagementServerAndResolver sets up an xDS management server, creates
   285  // bootstrap configuration pointing to that server and creates an xDS resolver
   286  // using that configuration.
   287  //
   288  // Registers a cleanup function on t to stop the management server.
   289  //
   290  // Returns the management server, node ID and the xDS resolver builder.
   291  func setupManagementServerAndResolver(t *testing.T) (*e2e.ManagementServer, string, resolver.Builder) {
   292  	t.Helper()
   293  
   294  	// Start an xDS management server.
   295  	xdsServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   296  
   297  	// Create bootstrap configuration pointing to the above management server.
   298  	nodeID := uuid.New().String()
   299  	bc := e2e.DefaultBootstrapContents(t, nodeID, xdsServer.Address)
   300  
   301  	// Create an xDS resolver with the above bootstrap configuration.
   302  	if internal.NewXDSResolverWithConfigForTesting == nil {
   303  		t.Fatalf("internal.NewXDSResolverWithConfigForTesting is nil")
   304  	}
   305  	r, err := internal.NewXDSResolverWithConfigForTesting.(func([]byte) (resolver.Builder, error))(bc)
   306  	if err != nil {
   307  		t.Fatalf("Failed to create xDS resolver for testing: %v", err)
   308  	}
   309  
   310  	return xdsServer, nodeID, r
   311  }
   312  
   313  // xdsUpdateOpts returns an e2e.UpdateOptions for the given node ID with the given xDS resources.
   314  func xdsUpdateOpts(nodeID string, endpoints *v3endpointpb.ClusterLoadAssignment, cluster *v3clusterpb.Cluster, route *v3routepb.RouteConfiguration, listener *v3listenerpb.Listener) e2e.UpdateOptions {
   315  	return e2e.UpdateOptions{
   316  		NodeID:    nodeID,
   317  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   318  		Clusters:  []*v3clusterpb.Cluster{cluster},
   319  		Routes:    []*v3routepb.RouteConfiguration{route},
   320  		Listeners: []*v3listenerpb.Listener{listener},
   321  	}
   322  }
   323  
   324  // Tests that when an aggregate cluster is configured with ring hash policy, and
   325  // the first cluster is in transient failure, all RPCs are sent to the second
   326  // cluster using the ring hash policy.
   327  func (s) TestRingHash_AggregateClusterFallBackFromRingHashAtStartup(t *testing.T) {
   328  	addrs := backendAddrs(startTestServiceBackends(t, 2))
   329  
   330  	const primaryClusterName = "new_cluster_1"
   331  	const primaryServiceName = "new_eds_service_1"
   332  	const secondaryClusterName = "new_cluster_2"
   333  	const secondaryServiceName = "new_eds_service_2"
   334  	const clusterName = "aggregate_cluster"
   335  
   336  	ep1 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   337  		ClusterName: primaryServiceName,
   338  		Localities: []e2e.LocalityOptions{{
   339  			Name:     "locality0",
   340  			Weight:   1,
   341  			Backends: backendOptions(t, makeUnreachableBackends(t, 2)),
   342  		}},
   343  	})
   344  	ep2 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   345  		ClusterName: secondaryServiceName,
   346  		Localities: []e2e.LocalityOptions{{
   347  			Name:     "locality0",
   348  			Weight:   1,
   349  			Backends: backendOptions(t, addrs),
   350  		}},
   351  	})
   352  	primaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   353  		ClusterName: primaryClusterName,
   354  		ServiceName: primaryServiceName,
   355  	})
   356  	secondaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   357  		ClusterName: secondaryClusterName,
   358  		ServiceName: secondaryServiceName,
   359  	})
   360  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   361  		ClusterName: clusterName,
   362  		Type:        e2e.ClusterTypeAggregate,
   363  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is implemented, the
   364  		// policy will have to be set on the child clusters.
   365  		Policy:     e2e.LoadBalancingPolicyRingHash,
   366  		ChildNames: []string{primaryClusterName, secondaryClusterName},
   367  	})
   368  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   369  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   370  
   371  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   372  	defer cancel()
   373  
   374  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   375  	updateOpts := e2e.UpdateOptions{
   376  		NodeID:    nodeID,
   377  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{ep1, ep2},
   378  		Clusters:  []*v3clusterpb.Cluster{cluster, primaryCluster, secondaryCluster},
   379  		Routes:    []*v3routepb.RouteConfiguration{route},
   380  		Listeners: []*v3listenerpb.Listener{listener},
   381  	}
   382  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   383  		t.Fatalf("Failed to update xDS resources: %v", err)
   384  	}
   385  
   386  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   387  	if err != nil {
   388  		t.Fatalf("Failed to create client: %s", err)
   389  	}
   390  	defer conn.Close()
   391  	client := testgrpc.NewTestServiceClient(conn)
   392  
   393  	const numRPCs = 100
   394  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   395  
   396  	// Since this is using ring hash with the channel ID as the key, all RPCs
   397  	// are routed to the same backend of the secondary locality.
   398  	if len(gotPerBackend) != 1 {
   399  		t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), 1)
   400  	}
   401  
   402  	var backend string
   403  	var got int
   404  	for backend, got = range gotPerBackend {
   405  	}
   406  	if !slices.Contains(addrs, backend) {
   407  		t.Errorf("Got RPCs routed to an unexpected backend: %v, want one of %v", backend, addrs)
   408  	}
   409  	if got != numRPCs {
   410  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, 100)
   411  	}
   412  }
   413  
   414  func replaceDNSResolver(t *testing.T) *manual.Resolver {
   415  	mr := manual.NewBuilderWithScheme("dns")
   416  
   417  	dnsResolverBuilder := resolver.Get("dns")
   418  	resolver.Register(mr)
   419  
   420  	t.Cleanup(func() { resolver.Register(dnsResolverBuilder) })
   421  	return mr
   422  }
   423  
   424  // Tests that when an aggregate cluster is configured with ring hash policy, and
   425  // the first is an EDS cluster in transient failure, and the fallback is a
   426  // logical DNS cluster, all RPCs are sent to the second cluster using the ring
   427  // hash policy.
   428  func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartup(t *testing.T) {
   429  	const edsClusterName = "eds_cluster"
   430  	const logicalDNSClusterName = "logical_dns_cluster"
   431  	const clusterName = "aggregate_cluster"
   432  
   433  	backends := backendAddrs(startTestServiceBackends(t, 1))
   434  
   435  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   436  		ClusterName: edsClusterName,
   437  		Localities: []e2e.LocalityOptions{{
   438  			Name:     "locality0",
   439  			Weight:   1,
   440  			Backends: backendOptions(t, makeUnreachableBackends(t, 1)),
   441  			Priority: 0,
   442  		}},
   443  	})
   444  	edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   445  		ClusterName: edsClusterName,
   446  		ServiceName: edsClusterName,
   447  	})
   448  
   449  	logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   450  		Type:        e2e.ClusterTypeLogicalDNS,
   451  		ClusterName: logicalDNSClusterName,
   452  		// The DNS values are not used because we fake DNS later on, but they
   453  		// are required to be present for the resource to be valid.
   454  		DNSHostName: "server.example.com",
   455  		DNSPort:     443,
   456  	})
   457  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   458  		ClusterName: clusterName,
   459  		Type:        e2e.ClusterTypeAggregate,
   460  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the
   461  		// policy will have to be set on the child clusters.
   462  		Policy:     e2e.LoadBalancingPolicyRingHash,
   463  		ChildNames: []string{edsClusterName, logicalDNSClusterName},
   464  	})
   465  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   466  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   467  
   468  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   469  	defer cancel()
   470  
   471  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   472  	updateOpts := e2e.UpdateOptions{
   473  		NodeID:    nodeID,
   474  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   475  		Clusters:  []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster},
   476  		Routes:    []*v3routepb.RouteConfiguration{route},
   477  		Listeners: []*v3listenerpb.Listener{listener},
   478  	}
   479  
   480  	dnsR := replaceDNSResolver(t)
   481  	dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}})
   482  
   483  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   484  		t.Fatalf("Failed to update xDS resources: %v", err)
   485  	}
   486  
   487  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   488  	if err != nil {
   489  		t.Fatalf("Failed to create client: %s", err)
   490  	}
   491  	defer conn.Close()
   492  	client := testgrpc.NewTestServiceClient(conn)
   493  
   494  	gotPerBackend := checkRPCSendOK(ctx, t, client, 1)
   495  	var got string
   496  	for got = range gotPerBackend {
   497  	}
   498  	if want := backends[0]; got != want {
   499  		t.Errorf("Got RPCs routed to an unexpected got: %v, want %v", got, want)
   500  	}
   501  }
   502  
   503  // Tests that when an aggregate cluster is configured with ring hash policy, and
   504  // it's first child is in transient failure, and the fallback is a logical DNS,
   505  // the later recovers from transient failure when its backend becomes available.
   506  func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartupNoFailedRPCs(t *testing.T) {
   507  	const edsClusterName = "eds_cluster"
   508  	const logicalDNSClusterName = "logical_dns_cluster"
   509  	const clusterName = "aggregate_cluster"
   510  
   511  	backends := backendAddrs(startTestServiceBackends(t, 1))
   512  
   513  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   514  		ClusterName: edsClusterName,
   515  		Localities: []e2e.LocalityOptions{{
   516  			Name:     "locality0",
   517  			Weight:   1,
   518  			Backends: backendOptions(t, makeUnreachableBackends(t, 1)),
   519  			Priority: 0,
   520  		}},
   521  	})
   522  	edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   523  		ClusterName: edsClusterName,
   524  		ServiceName: edsClusterName,
   525  	})
   526  
   527  	logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   528  		Type:        e2e.ClusterTypeLogicalDNS,
   529  		ClusterName: logicalDNSClusterName,
   530  		// The DNS values are not used because we fake DNS later on, but they
   531  		// are required to be present for the resource to be valid.
   532  		DNSHostName: "server.example.com",
   533  		DNSPort:     443,
   534  	})
   535  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   536  		ClusterName: clusterName,
   537  		Type:        e2e.ClusterTypeAggregate,
   538  		// TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the
   539  		// policy will have to be set on the child clusters.
   540  		Policy:     e2e.LoadBalancingPolicyRingHash,
   541  		ChildNames: []string{edsClusterName, logicalDNSClusterName},
   542  	})
   543  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   544  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   545  
   546  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   547  	defer cancel()
   548  
   549  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   550  	updateOpts := e2e.UpdateOptions{
   551  		NodeID:    nodeID,
   552  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints},
   553  		Clusters:  []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster},
   554  		Routes:    []*v3routepb.RouteConfiguration{route},
   555  		Listeners: []*v3listenerpb.Listener{listener},
   556  	}
   557  
   558  	dnsR := replaceDNSResolver(t)
   559  	dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}})
   560  
   561  	if err := xdsServer.Update(ctx, updateOpts); err != nil {
   562  		t.Fatalf("Failed to update xDS resources: %v", err)
   563  	}
   564  
   565  	dialer := testutils.NewBlockingDialer()
   566  	cp := grpc.ConnectParams{
   567  		// Increase backoff time, so that subconns stay in TRANSIENT_FAILURE
   568  		// for long enough to trigger potential problems.
   569  		Backoff: backoff.Config{
   570  			BaseDelay: defaultTestTimeout,
   571  		},
   572  		MinConnectTimeout: 0,
   573  	}
   574  	dopts := []grpc.DialOption{
   575  		grpc.WithResolvers(xdsResolver),
   576  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   577  		grpc.WithContextDialer(dialer.DialContext),
   578  		grpc.WithConnectParams(cp)}
   579  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
   580  	if err != nil {
   581  		t.Fatalf("Failed to create client: %s", err)
   582  	}
   583  	defer conn.Close()
   584  	client := testgrpc.NewTestServiceClient(conn)
   585  
   586  	hold := dialer.Hold(backends[0])
   587  
   588  	errCh := make(chan error, 2)
   589  	go func() {
   590  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   591  			errCh <- fmt.Errorf("first rpc UnaryCall() failed: %v", err)
   592  			return
   593  		}
   594  		errCh <- nil
   595  	}()
   596  
   597  	testutils.AwaitState(ctx, t, conn, connectivity.Connecting)
   598  
   599  	go func() {
   600  		// Start a second RPC at this point, which should be queued as well.
   601  		// This will fail if the priority policy fails to update the picker to
   602  		// point to the LOGICAL_DNS child; if it leaves it pointing to the EDS
   603  		// priority 1, then the RPC will fail, because all subchannels are in
   604  		// transient failure.
   605  		//
   606  		// Note that sending only the first RPC does not catch this case,
   607  		// because if the priority policy fails to update the picker, then the
   608  		// pick for the first RPC will not be retried.
   609  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   610  			errCh <- fmt.Errorf("second UnaryCall() failed: %v", err)
   611  			return
   612  		}
   613  		errCh <- nil
   614  	}()
   615  
   616  	// Wait for a connection attempt to backends[0].
   617  	if !hold.Wait(ctx) {
   618  		t.Fatalf("Timeout while waiting for a connection attempt to %s", backends[0])
   619  	}
   620  	// Allow the connection attempts to complete.
   621  	hold.Resume()
   622  
   623  	// RPCs should complete successfully.
   624  	for range []int{0, 1} {
   625  		select {
   626  		case err := <-errCh:
   627  			if err != nil {
   628  				t.Errorf("Expected 2 rpc to succeed, but at least one failed: %v", err)
   629  			}
   630  		case <-ctx.Done():
   631  			t.Fatalf("Timed out waiting for RPCs to complete")
   632  		}
   633  	}
   634  }
   635  
   636  // endpointResource creates a ClusterLoadAssignment containing a single locality
   637  // with the given addresses.
   638  func endpointResource(t *testing.T, clusterName string, addrs []string) *v3endpointpb.ClusterLoadAssignment {
   639  	t.Helper()
   640  	backendAddrs := [][]string{}
   641  	for _, addr := range addrs {
   642  		backendAddrs = append(backendAddrs, []string{addr})
   643  	}
   644  	return endpointResourceForBackendsWithMultipleAddrs(t, clusterName, backendAddrs)
   645  }
   646  
   647  // endpointResourceForBackendsWithMultipleAddrs creates a ClusterLoadAssignment
   648  // containing a single locality with the given addresses.
   649  func endpointResourceForBackendsWithMultipleAddrs(t *testing.T, clusterName string, addrs [][]string) *v3endpointpb.ClusterLoadAssignment {
   650  	t.Helper()
   651  
   652  	// We must set the host name socket address in EDS, as the ring hash policy
   653  	// uses it to construct the ring.
   654  	host, _, err := net.SplitHostPort(addrs[0][0])
   655  	if err != nil {
   656  		t.Fatalf("Failed to split host and port from stubserver: %v", err)
   657  	}
   658  
   659  	return e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   660  		ClusterName: clusterName,
   661  		Host:        host,
   662  		Localities: []e2e.LocalityOptions{{
   663  			Backends: backendOptionsForEndpointsWithMultipleAddrs(t, addrs),
   664  			Weight:   1,
   665  		}},
   666  	})
   667  }
   668  
   669  // Tests that ring hash policy that hashes using channel id ensures all RPCs to
   670  // go 1 particular backend.
   671  func (s) TestRingHash_ChannelIdHashing(t *testing.T) {
   672  	backends := backendAddrs(startTestServiceBackends(t, 4))
   673  
   674  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   675  
   676  	const clusterName = "cluster"
   677  	endpoints := endpointResource(t, clusterName, backends)
   678  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   679  		ClusterName: clusterName,
   680  		ServiceName: clusterName,
   681  		Policy:      e2e.LoadBalancingPolicyRingHash,
   682  	})
   683  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
   684  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   685  
   686  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   687  	defer cancel()
   688  
   689  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   690  		t.Fatalf("Failed to update xDS resources: %v", err)
   691  	}
   692  
   693  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   694  	if err != nil {
   695  		t.Fatalf("Failed to create client: %s", err)
   696  	}
   697  	defer conn.Close()
   698  	client := testgrpc.NewTestServiceClient(conn)
   699  
   700  	const numRPCs = 100
   701  	received := checkRPCSendOK(ctx, t, client, numRPCs)
   702  	if len(received) != 1 {
   703  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
   704  	}
   705  	var got int
   706  	for _, got = range received {
   707  	}
   708  	if got != numRPCs {
   709  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
   710  	}
   711  }
   712  
   713  // headerHashRoute creates a RouteConfiguration with a hash policy that uses the
   714  // provided header.
   715  func headerHashRoute(routeName, virtualHostName, clusterName, header string) *v3routepb.RouteConfiguration {
   716  	route := e2e.DefaultRouteConfig(routeName, virtualHostName, clusterName)
   717  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   718  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
   719  			Header: &v3routepb.RouteAction_HashPolicy_Header{
   720  				HeaderName: header,
   721  			},
   722  		},
   723  	}
   724  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   725  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy}
   726  	return route
   727  }
   728  
   729  // Tests that ring hash policy that hashes using a header value can send RPCs
   730  // to specific backends based on their hash.
   731  func (s) TestRingHash_HeaderHashing(t *testing.T) {
   732  	backends := backendAddrs(startTestServiceBackends(t, 4))
   733  
   734  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   735  
   736  	const clusterName = "cluster"
   737  	endpoints := endpointResource(t, clusterName, backends)
   738  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   739  		ClusterName: clusterName,
   740  		ServiceName: clusterName,
   741  		Policy:      e2e.LoadBalancingPolicyRingHash,
   742  	})
   743  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
   744  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   745  
   746  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   747  	defer cancel()
   748  
   749  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   750  		t.Fatalf("Failed to update xDS resources: %v", err)
   751  	}
   752  
   753  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   754  	if err != nil {
   755  		t.Fatalf("Failed to create client: %s", err)
   756  	}
   757  	defer conn.Close()
   758  	client := testgrpc.NewTestServiceClient(conn)
   759  
   760  	// Note each type of RPC contains a header value that will always be hashed
   761  	// to a specific backend as the header value matches the value used to
   762  	// create the entry in the ring.
   763  	for _, backend := range backends {
   764  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
   765  		numRPCs := 10
   766  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   767  		if reqPerBackend[backend] != numRPCs {
   768  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
   769  		}
   770  	}
   771  }
   772  
   773  // Tests that ring hash policy that hashes using a header value and regex
   774  // rewrite to aggregate RPCs to 1 backend.
   775  func (s) TestRingHash_HeaderHashingWithRegexRewrite(t *testing.T) {
   776  	backends := backendAddrs(startTestServiceBackends(t, 4))
   777  
   778  	clusterName := "cluster"
   779  	endpoints := endpointResource(t, clusterName, backends)
   780  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   781  		ClusterName: clusterName,
   782  		ServiceName: clusterName,
   783  		Policy:      e2e.LoadBalancingPolicyRingHash,
   784  	})
   785  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
   786  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
   787  	action.Route.HashPolicy[0].GetHeader().RegexRewrite = &v3matcherpb.RegexMatchAndSubstitute{
   788  		Pattern: &v3matcherpb.RegexMatcher{
   789  			EngineType: &v3matcherpb.RegexMatcher_GoogleRe2{},
   790  			Regex:      "[0-9]+",
   791  		},
   792  		Substitution: "foo",
   793  	}
   794  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   795  
   796  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   797  	defer cancel()
   798  
   799  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   800  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   801  		t.Fatalf("Failed to update xDS resources: %v", err)
   802  	}
   803  
   804  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   805  	if err != nil {
   806  		t.Fatalf("Failed to create client: %s", err)
   807  	}
   808  	defer conn.Close()
   809  	client := testgrpc.NewTestServiceClient(conn)
   810  
   811  	// Note each type of RPC contains a header value that would always be hashed
   812  	// to a specific backend as the header value matches the value used to
   813  	// create the entry in the ring. However, the regex rewrites all numbers to
   814  	// "foo", and header values only differ by numbers, so they all end up
   815  	// hashing to the same value.
   816  	gotPerBackend := make(map[string]int)
   817  	for _, backend := range backends {
   818  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
   819  		res := checkRPCSendOK(ctx, t, client, 100)
   820  		for addr, count := range res {
   821  			gotPerBackend[addr] += count
   822  		}
   823  	}
   824  	if want := 1; len(gotPerBackend) != want {
   825  		t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), want)
   826  	}
   827  	var got int
   828  	for _, got = range gotPerBackend {
   829  	}
   830  	if want := 400; got != want {
   831  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, want)
   832  	}
   833  }
   834  
   835  // computeIdealNumberOfRPCs computes the ideal number of RPCs to send so that
   836  // we can observe an event happening with probability p, and the result will
   837  // have value p with the given error tolerance.
   838  //
   839  // See https://github.com/grpc/grpc/blob/4f6e13bdda9e8c26d6027af97db4b368ca2b3069/test/cpp/end2end/xds/xds_end2end_test_lib.h#L941
   840  // for an explanation of the formula.
   841  func computeIdealNumberOfRPCs(t *testing.T, p, errorTolerance float64) int {
   842  	if p < 0 || p > 1 {
   843  		t.Fatal("p must be in (0, 1)")
   844  	}
   845  	numRPCs := math.Ceil(p * (1 - p) * 5. * 5. / errorTolerance / errorTolerance)
   846  	return int(numRPCs + 1000.) // add 1k as a buffer to avoid flakiness.
   847  }
   848  
   849  // setRingHashLBPolicyWithHighMinRingSize sets the ring hash policy with a high
   850  // minimum ring size to ensure that the ring is large enough to distribute
   851  // requests more uniformly across endpoints when a random hash is used.
   852  func setRingHashLBPolicyWithHighMinRingSize(t *testing.T, cluster *v3clusterpb.Cluster) {
   853  	testutils.SetEnvConfig(t, &envconfig.RingHashCap, minRingSize)
   854  
   855  	// Increasing min ring size for random distribution.
   856  	config := testutils.MarshalAny(t, &v3ringhashpb.RingHash{
   857  		HashFunction:    v3ringhashpb.RingHash_XX_HASH,
   858  		MinimumRingSize: &wrapperspb.UInt64Value{Value: minRingSize},
   859  	})
   860  	cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{
   861  		Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{{
   862  			TypedExtensionConfig: &v3corepb.TypedExtensionConfig{
   863  				Name:        "envoy.load_balancing_policies.ring_hash",
   864  				TypedConfig: config,
   865  			},
   866  		}},
   867  	}
   868  }
   869  
   870  // Tests that ring hash policy that hashes using a random value.
   871  func (s) TestRingHash_NoHashPolicy(t *testing.T) {
   872  	backends := backendAddrs(startTestServiceBackends(t, 2))
   873  	numRPCs := computeIdealNumberOfRPCs(t, .5, errorTolerance)
   874  
   875  	const clusterName = "cluster"
   876  	endpoints := endpointResource(t, clusterName, backends)
   877  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   878  		ClusterName: clusterName,
   879  		ServiceName: clusterName,
   880  	})
   881  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
   882  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
   883  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   884  
   885  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   886  	defer cancel()
   887  
   888  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   889  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   890  		t.Fatalf("Failed to update xDS resources: %v", err)
   891  	}
   892  
   893  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   894  	if err != nil {
   895  		t.Fatalf("Failed to create client: %s", err)
   896  	}
   897  	defer conn.Close()
   898  	client := testgrpc.NewTestServiceClient(conn)
   899  
   900  	// Send a large number of RPCs and check that they are distributed randomly.
   901  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   902  	for _, backend := range backends {
   903  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
   904  		want := .5
   905  		if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   906  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance)
   907  		}
   908  	}
   909  }
   910  
   911  // Tests that we observe endpoint weights.
   912  func (s) TestRingHash_EndpointWeights(t *testing.T) {
   913  	backends := backendAddrs(startTestServiceBackends(t, 3))
   914  
   915  	const clusterName = "cluster"
   916  	backendOpts := []e2e.BackendOptions{
   917  		{Ports: []uint32{testutils.ParsePort(t, backends[0])}},
   918  		{Ports: []uint32{testutils.ParsePort(t, backends[1])}},
   919  		{Ports: []uint32{testutils.ParsePort(t, backends[2])}, Weight: 2},
   920  	}
   921  
   922  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
   923  		ClusterName: clusterName,
   924  		Localities: []e2e.LocalityOptions{{
   925  			Backends: backendOpts,
   926  			Weight:   1,
   927  		}},
   928  	})
   929  	endpoints.Endpoints[0].LbEndpoints[0].LoadBalancingWeight = wrapperspb.UInt32(uint32(1))
   930  	endpoints.Endpoints[0].LbEndpoints[1].LoadBalancingWeight = wrapperspb.UInt32(uint32(1))
   931  	endpoints.Endpoints[0].LbEndpoints[2].LoadBalancingWeight = wrapperspb.UInt32(uint32(2))
   932  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   933  		ClusterName: clusterName,
   934  		ServiceName: clusterName,
   935  	})
   936  	// Increasing min ring size for random distribution.
   937  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
   938  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
   939  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
   940  
   941  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   942  	defer cancel()
   943  
   944  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
   945  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
   946  		t.Fatalf("Failed to update xDS resources: %v", err)
   947  	}
   948  
   949  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
   950  	if err != nil {
   951  		t.Fatalf("Failed to create client: %s", err)
   952  	}
   953  	defer conn.Close()
   954  	client := testgrpc.NewTestServiceClient(conn)
   955  
   956  	// Send a large number of RPCs and check that they are distributed randomly.
   957  	numRPCs := computeIdealNumberOfRPCs(t, .25, errorTolerance)
   958  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
   959  
   960  	got := float64(gotPerBackend[backends[0]]) / float64(numRPCs)
   961  	want := .25
   962  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   963  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[0], got, want, errorTolerance)
   964  	}
   965  	got = float64(gotPerBackend[backends[1]]) / float64(numRPCs)
   966  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   967  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[1], got, want, errorTolerance)
   968  	}
   969  	got = float64(gotPerBackend[backends[2]]) / float64(numRPCs)
   970  	want = .50
   971  	if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
   972  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, want, errorTolerance)
   973  	}
   974  }
   975  
   976  // Tests that ring hash policy evaluation will continue past the terminal hash
   977  // policy if no results are produced yet.
   978  func (s) TestRingHash_ContinuesPastTerminalPolicyThatDoesNotProduceResult(t *testing.T) {
   979  	backends := backendAddrs(startTestServiceBackends(t, 2))
   980  
   981  	const clusterName = "cluster"
   982  	endpoints := endpointResource(t, clusterName, backends)
   983  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
   984  		ClusterName: clusterName,
   985  		ServiceName: clusterName,
   986  		Policy:      e2e.LoadBalancingPolicyRingHash,
   987  	})
   988  
   989  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
   990  
   991  	// Even though this hash policy is terminal, since it produces no result, we
   992  	// continue past it to find a policy that produces results.
   993  	hashPolicy := v3routepb.RouteAction_HashPolicy{
   994  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
   995  			Header: &v3routepb.RouteAction_HashPolicy_Header{
   996  				HeaderName: "header_not_present",
   997  			},
   998  		},
   999  		Terminal: true,
  1000  	}
  1001  	hashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1002  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1003  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1004  				HeaderName: "address_hash",
  1005  			},
  1006  		},
  1007  	}
  1008  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1009  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2}
  1010  
  1011  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1012  
  1013  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1014  	defer cancel()
  1015  
  1016  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1017  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1018  		t.Fatalf("Failed to update xDS resources: %v", err)
  1019  	}
  1020  
  1021  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1022  	if err != nil {
  1023  		t.Fatalf("Failed to create client: %s", err)
  1024  	}
  1025  	defer conn.Close()
  1026  	client := testgrpc.NewTestServiceClient(conn)
  1027  
  1028  	// - The first hash policy does not match because the header is not present.
  1029  	//   If this hash policy was applied, it would spread the load across
  1030  	//   backend 0 and 1, since a random hash would be used.
  1031  	// - In the second hash policy, each type of RPC contains a header
  1032  	//   value that always hashes to backend 0, as the header value
  1033  	//   matches the value used to create the entry in the ring.
  1034  	// We verify that the second hash policy is used by checking that all RPCs
  1035  	// are being routed to backend 0.
  1036  	wantBackend := backends[0]
  1037  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", wantBackend+"_0"))
  1038  	const numRPCs = 100
  1039  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1040  	if got := gotPerBackend[wantBackend]; got != numRPCs {
  1041  		t.Errorf("Got %v RPCs routed to backend %v, want %v", got, wantBackend, numRPCs)
  1042  	}
  1043  }
  1044  
  1045  // Tests that a random hash is used when header hashing policy specified a
  1046  // header field that the RPC did not have.
  1047  func (s) TestRingHash_HashOnHeaderThatIsNotPresent(t *testing.T) {
  1048  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1049  	wantFractionPerBackend := .5
  1050  	numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance)
  1051  
  1052  	const clusterName = "cluster"
  1053  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1054  		ClusterName: clusterName,
  1055  		Localities: []e2e.LocalityOptions{{
  1056  			Backends: backendOptions(t, backends),
  1057  			Weight:   1,
  1058  		}},
  1059  	})
  1060  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1061  		ClusterName: clusterName,
  1062  		ServiceName: clusterName,
  1063  	})
  1064  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1065  	route := headerHashRoute("new_route", virtualHostName, clusterName, "header_not_present")
  1066  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1067  
  1068  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1069  	defer cancel()
  1070  
  1071  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1072  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1073  		t.Fatalf("Failed to update xDS resources: %v", err)
  1074  	}
  1075  
  1076  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1077  	if err != nil {
  1078  		t.Fatalf("Failed to create client: %s", err)
  1079  	}
  1080  	defer conn.Close()
  1081  	client := testgrpc.NewTestServiceClient(conn)
  1082  
  1083  	// The first hash policy does not apply because the header is not present in
  1084  	// the RPCs that we are about to send. As a result, a random hash should be
  1085  	// used instead, resulting in a random request distribution.
  1086  	// We verify this by checking that the RPCs are distributed randomly.
  1087  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1088  	for _, backend := range backends {
  1089  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  1090  		if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) {
  1091  			t.Errorf("fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance)
  1092  		}
  1093  	}
  1094  }
  1095  
  1096  // Tests that a random hash is used when only unsupported hash policies are
  1097  // configured.
  1098  func (s) TestRingHash_UnsupportedHashPolicyDefaultToRandomHashing(t *testing.T) {
  1099  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1100  	wantFractionPerBackend := .5
  1101  	numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance)
  1102  
  1103  	const clusterName = "cluster"
  1104  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1105  		ClusterName: clusterName,
  1106  		Localities: []e2e.LocalityOptions{{
  1107  			Backends: backendOptions(t, backends),
  1108  			Weight:   1,
  1109  		}},
  1110  	})
  1111  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1112  		ClusterName: clusterName,
  1113  		ServiceName: clusterName,
  1114  	})
  1115  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1116  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1117  	unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{
  1118  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{
  1119  			Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"},
  1120  		},
  1121  	}
  1122  	unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1123  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{
  1124  			ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true},
  1125  		},
  1126  	}
  1127  	unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{
  1128  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{
  1129  			QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"},
  1130  		},
  1131  	}
  1132  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1133  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3}
  1134  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1135  
  1136  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1137  	defer cancel()
  1138  
  1139  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1140  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1141  		t.Fatalf("Failed to update xDS resources: %v", err)
  1142  	}
  1143  
  1144  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1145  	if err != nil {
  1146  		t.Fatalf("Failed to create client: %s", err)
  1147  	}
  1148  	defer conn.Close()
  1149  	client := testgrpc.NewTestServiceClient(conn)
  1150  
  1151  	// Since none of the hash policy are supported, a random hash should be
  1152  	// generated for every request.
  1153  	// We verify this by checking that the RPCs are distributed randomly.
  1154  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1155  	for _, backend := range backends {
  1156  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  1157  		if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) {
  1158  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance)
  1159  		}
  1160  	}
  1161  }
  1162  
  1163  // Tests that unsupported hash policy types are all ignored before a supported
  1164  // hash policy.
  1165  func (s) TestRingHash_UnsupportedHashPolicyUntilChannelIdHashing(t *testing.T) {
  1166  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1167  
  1168  	const clusterName = "cluster"
  1169  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1170  		ClusterName: clusterName,
  1171  		Localities: []e2e.LocalityOptions{{
  1172  			Backends: backendOptions(t, backends),
  1173  			Weight:   1,
  1174  		}},
  1175  	})
  1176  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1177  		ClusterName: clusterName,
  1178  		ServiceName: clusterName,
  1179  	})
  1180  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1181  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1182  	unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{
  1183  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{
  1184  			Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"},
  1185  		},
  1186  	}
  1187  	unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1188  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{
  1189  			ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true},
  1190  		},
  1191  	}
  1192  	unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{
  1193  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{
  1194  			QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"},
  1195  		},
  1196  	}
  1197  	channelIDhashPolicy := v3routepb.RouteAction_HashPolicy{
  1198  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{
  1199  			FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{
  1200  				Key: "io.grpc.channel_id",
  1201  			},
  1202  		},
  1203  	}
  1204  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1205  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3, &channelIDhashPolicy}
  1206  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1207  
  1208  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1209  	defer cancel()
  1210  
  1211  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1212  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1213  		t.Fatalf("Failed to update xDS resources: %v", err)
  1214  	}
  1215  
  1216  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1217  	if err != nil {
  1218  		t.Fatalf("Failed to create client: %s", err)
  1219  	}
  1220  	defer conn.Close()
  1221  	client := testgrpc.NewTestServiceClient(conn)
  1222  
  1223  	// Since only unsupported policies are present except for the last one
  1224  	// which is using the channel ID hashing policy, all requests should be
  1225  	// routed to the same backend.
  1226  	const numRPCs = 100
  1227  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1228  	if len(gotPerBackend) != 1 {
  1229  		t.Errorf("Got RPCs routed to %v backends, want 1", len(gotPerBackend))
  1230  	}
  1231  	var got int
  1232  	for _, got = range gotPerBackend {
  1233  	}
  1234  	if got != numRPCs {
  1235  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  1236  	}
  1237  }
  1238  
  1239  // Tests that ring hash policy that hashes using a random value can spread RPCs
  1240  // across all the backends according to locality weight.
  1241  func (s) TestRingHash_RandomHashingDistributionAccordingToLocalityAndEndpointWeight(t *testing.T) {
  1242  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1243  
  1244  	const clusterName = "cluster"
  1245  	const locality1Weight = uint32(1)
  1246  	const endpoint1Weight = uint32(1)
  1247  	const locality2Weight = uint32(2)
  1248  	const endpoint2Weight = uint32(2)
  1249  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1250  		ClusterName: clusterName,
  1251  		Localities: []e2e.LocalityOptions{
  1252  			{
  1253  				Backends: []e2e.BackendOptions{{
  1254  					Ports:  []uint32{testutils.ParsePort(t, backends[0])},
  1255  					Weight: endpoint1Weight,
  1256  				}},
  1257  				Weight: locality1Weight,
  1258  			},
  1259  			{
  1260  				Backends: []e2e.BackendOptions{{
  1261  					Ports:  []uint32{testutils.ParsePort(t, backends[1])},
  1262  					Weight: endpoint2Weight,
  1263  				}},
  1264  				Weight: locality2Weight,
  1265  			},
  1266  		},
  1267  	})
  1268  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1269  		ClusterName: clusterName,
  1270  		ServiceName: clusterName,
  1271  	})
  1272  	setRingHashLBPolicyWithHighMinRingSize(t, cluster)
  1273  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1274  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1275  
  1276  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1277  	defer cancel()
  1278  
  1279  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1280  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1281  		t.Fatalf("Failed to update xDS resources: %v", err)
  1282  	}
  1283  
  1284  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1285  	if err != nil {
  1286  		t.Fatalf("Failed to create client: %s", err)
  1287  	}
  1288  	defer conn.Close()
  1289  	client := testgrpc.NewTestServiceClient(conn)
  1290  
  1291  	const weight1 = endpoint1Weight * locality1Weight
  1292  	const weight2 = endpoint2Weight * locality2Weight
  1293  	const wantRPCs1 = float64(weight1) / float64(weight1+weight2)
  1294  	const wantRPCs2 = float64(weight2) / float64(weight1+weight2)
  1295  	numRPCs := computeIdealNumberOfRPCs(t, math.Min(wantRPCs1, wantRPCs2), errorTolerance)
  1296  
  1297  	// Send a large number of RPCs and check that they are distributed randomly.
  1298  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  1299  	got := float64(gotPerBackend[backends[0]]) / float64(numRPCs)
  1300  	if !cmp.Equal(got, wantRPCs1, cmpopts.EquateApprox(0, errorTolerance)) {
  1301  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs1, errorTolerance)
  1302  	}
  1303  	got = float64(gotPerBackend[backends[1]]) / float64(numRPCs)
  1304  	if !cmp.Equal(got, wantRPCs2, cmpopts.EquateApprox(0, errorTolerance)) {
  1305  		t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs2, errorTolerance)
  1306  	}
  1307  }
  1308  
  1309  // Tests that ring hash policy that hashes using a fixed string ensures all RPCs
  1310  // to go 1 particular backend; and that subsequent hashing policies are ignored
  1311  // due to the setting of terminal.
  1312  func (s) TestRingHash_FixedHashingTerminalPolicy(t *testing.T) {
  1313  	backends := backendAddrs(startTestServiceBackends(t, 2))
  1314  	const clusterName = "cluster"
  1315  	endpoints := endpointResource(t, clusterName, backends)
  1316  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1317  		ClusterName: clusterName,
  1318  		ServiceName: clusterName,
  1319  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1320  	})
  1321  
  1322  	route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName)
  1323  
  1324  	hashPolicy := v3routepb.RouteAction_HashPolicy{
  1325  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1326  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1327  				HeaderName: "fixed_string",
  1328  			},
  1329  		},
  1330  		Terminal: true,
  1331  	}
  1332  	hashPolicy2 := v3routepb.RouteAction_HashPolicy{
  1333  		PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{
  1334  			Header: &v3routepb.RouteAction_HashPolicy_Header{
  1335  				HeaderName: "random_string",
  1336  			},
  1337  		},
  1338  	}
  1339  	action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route)
  1340  	action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2}
  1341  
  1342  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1343  
  1344  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1345  	defer cancel()
  1346  
  1347  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1348  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1349  		t.Fatalf("Failed to update xDS resources: %v", err)
  1350  	}
  1351  
  1352  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1353  	if err != nil {
  1354  		t.Fatalf("Failed to create client: %s", err)
  1355  	}
  1356  	defer conn.Close()
  1357  	client := testgrpc.NewTestServiceClient(conn)
  1358  
  1359  	// Check that despite the matching random string header, since the fixed
  1360  	// string hash policy is terminal, only the fixed string hash policy applies
  1361  	// and requests all get routed to the same host.
  1362  	gotPerBackend := make(map[string]int)
  1363  	const numRPCs = 100
  1364  	for i := 0; i < numRPCs; i++ {
  1365  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  1366  			"fixed_string", backends[0]+"_0",
  1367  			"random_string", fmt.Sprintf("%d", rand.Int())),
  1368  		)
  1369  		var remote peer.Peer
  1370  		_, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote))
  1371  		if err != nil {
  1372  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  1373  		}
  1374  		gotPerBackend[remote.Addr.String()]++
  1375  	}
  1376  
  1377  	if len(gotPerBackend) != 1 {
  1378  		t.Error("Got RPCs routed to multiple backends, want a single backend")
  1379  	}
  1380  	if got := gotPerBackend[backends[0]]; got != numRPCs {
  1381  		t.Errorf("Got %v RPCs routed to %v, want %v", got, backends[0], numRPCs)
  1382  	}
  1383  }
  1384  
  1385  // TestRingHash_IdleToReady tests that the channel will go from idle to ready
  1386  // via connecting; (though it is not possible to catch the connecting state
  1387  // before moving to ready via the public API).
  1388  // TODO: we should be able to catch all state transitions by using the internal.SubscribeToConnectivityStateChanges API.
  1389  func (s) TestRingHash_IdleToReady(t *testing.T) {
  1390  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1391  
  1392  	const clusterName = "cluster"
  1393  	endpoints := endpointResource(t, clusterName, backends)
  1394  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1395  		ClusterName: clusterName,
  1396  		ServiceName: clusterName,
  1397  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1398  	})
  1399  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
  1400  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1401  
  1402  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1403  	defer cancel()
  1404  
  1405  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1406  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1407  		t.Fatalf("Failed to update xDS resources: %v", err)
  1408  	}
  1409  
  1410  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1411  	if err != nil {
  1412  		t.Fatalf("Failed to create client: %s", err)
  1413  	}
  1414  	defer conn.Close()
  1415  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1416  
  1417  	client := testgrpc.NewTestServiceClient(conn)
  1418  	checkRPCSendOK(ctx, t, client, 1)
  1419  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1420  }
  1421  
  1422  // Test that the channel will transition to READY once it starts
  1423  // connecting even if there are no RPCs being sent to the picker.
  1424  func (s) TestRingHash_ContinuesConnectingWithoutPicks(t *testing.T) {
  1425  	backend := stubserver.StartTestService(t, &stubserver.StubServer{
  1426  		// We expect the server EmptyCall to not be call here because the
  1427  		// aggregated channel state is never READY when the call is pending.
  1428  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  1429  			t.Errorf("EmptyCall() should not have been called")
  1430  			return &testpb.Empty{}, nil
  1431  		},
  1432  	})
  1433  	defer backend.Stop()
  1434  
  1435  	unReachableServerAddr := makeUnreachableBackends(t, 1)[0]
  1436  
  1437  	const clusterName = "cluster"
  1438  	endpoints := endpointResource(t, clusterName, []string{backend.Address, unReachableServerAddr})
  1439  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1440  		ClusterName: clusterName,
  1441  		ServiceName: clusterName,
  1442  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1443  	})
  1444  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1445  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1446  
  1447  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1448  	defer cancel()
  1449  
  1450  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1451  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1452  		t.Fatalf("Failed to update xDS resources: %v", err)
  1453  	}
  1454  
  1455  	dialer := testutils.NewBlockingDialer()
  1456  	dopts := []grpc.DialOption{
  1457  		grpc.WithResolvers(xdsResolver),
  1458  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1459  		grpc.WithContextDialer(dialer.DialContext),
  1460  	}
  1461  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1462  	if err != nil {
  1463  		t.Fatalf("Failed to create client: %s", err)
  1464  	}
  1465  	defer conn.Close()
  1466  	client := testgrpc.NewTestServiceClient(conn)
  1467  
  1468  	hold := dialer.Hold(backend.Address)
  1469  
  1470  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  1471  	go func() {
  1472  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", unReachableServerAddr+"_0"))
  1473  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  1474  		if status.Code(err) != codes.Canceled {
  1475  			t.Errorf("Expected RPC to be canceled, got error: %v", err)
  1476  		}
  1477  	}()
  1478  
  1479  	// Wait for the connection attempt to the real backend.
  1480  	if !hold.Wait(ctx) {
  1481  		t.Fatalf("Timeout waiting for connection attempt to backend %v.", backend.Address)
  1482  	}
  1483  	// Now cancel the RPC while we are still connecting.
  1484  	rpcCancel()
  1485  
  1486  	// This allows the connection attempts to continue. The RPC was cancelled
  1487  	// before the backend was connected, but the backend is up. The conn
  1488  	// becomes Ready due to the connection attempt to the existing backend
  1489  	// succeeding, despite no new RPC being sent.
  1490  	hold.Resume()
  1491  
  1492  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1493  }
  1494  
  1495  // Tests that when the first pick is down leading to a transient failure, we
  1496  // will move on to the next ring hash entry.
  1497  func (s) TestRingHash_TransientFailureCheckNextOne(t *testing.T) {
  1498  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1499  	unReachableBackends := makeUnreachableBackends(t, 1)
  1500  
  1501  	const clusterName = "cluster"
  1502  	endpoints := endpointResource(t, clusterName, append(unReachableBackends, backends...))
  1503  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1504  		ClusterName: clusterName,
  1505  		ServiceName: clusterName,
  1506  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1507  	})
  1508  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1509  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1510  
  1511  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1512  	defer cancel()
  1513  
  1514  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1515  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1516  		t.Fatalf("Failed to update xDS resources: %v", err)
  1517  	}
  1518  
  1519  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1520  	if err != nil {
  1521  		t.Fatalf("Failed to create client: %s", err)
  1522  	}
  1523  	defer conn.Close()
  1524  	client := testgrpc.NewTestServiceClient(conn)
  1525  
  1526  	// Note each type of RPC contains a header value that will always be hashed
  1527  	// the value that was used to place the non-existent endpoint on the ring,
  1528  	// but it still gets routed to the backend that is up.
  1529  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", unReachableBackends[0]+"_0"))
  1530  	reqPerBackend := checkRPCSendOK(ctx, t, client, 1)
  1531  	var got string
  1532  	for got = range reqPerBackend {
  1533  	}
  1534  	if want := backends[0]; got != want {
  1535  		t.Errorf("Got RPC routed to addr %v, want %v", got, want)
  1536  	}
  1537  }
  1538  
  1539  // Tests for a bug seen in the wild in c-core, where ring_hash started with no
  1540  // endpoints and reported TRANSIENT_FAILURE, then got an update with endpoints
  1541  // and reported IDLE, but the picker update was squelched, so it failed to ever
  1542  // get reconnected.
  1543  func (s) TestRingHash_ReattemptWhenGoingFromTransientFailureToIdle(t *testing.T) {
  1544  	const clusterName = "cluster"
  1545  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1546  		ClusterName: clusterName,
  1547  		Localities:  []e2e.LocalityOptions{{}}, // note the empty locality (no endpoint).
  1548  	})
  1549  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1550  		ClusterName: clusterName,
  1551  		ServiceName: clusterName,
  1552  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1553  	})
  1554  	route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName)
  1555  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1556  
  1557  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1558  	defer cancel()
  1559  
  1560  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1561  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1562  		t.Fatalf("Failed to update xDS resources: %v", err)
  1563  	}
  1564  
  1565  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  1566  	if err != nil {
  1567  		t.Fatalf("Failed to create client: %s", err)
  1568  	}
  1569  	defer conn.Close()
  1570  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1571  
  1572  	// There are no endpoints in EDS. RPCs should fail and the channel should
  1573  	// transition to transient failure.
  1574  	client := testgrpc.NewTestServiceClient(conn)
  1575  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1576  		t.Errorf("rpc EmptyCall() succeeded, want error")
  1577  	}
  1578  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1579  
  1580  	t.Log("Updating EDS with a new backend endpoint.")
  1581  	backends := backendAddrs(startTestServiceBackends(t, 1))
  1582  	endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1583  		ClusterName: clusterName,
  1584  		Localities: []e2e.LocalityOptions{{
  1585  			Backends: backendOptions(t, backends),
  1586  			Weight:   1,
  1587  		}},
  1588  	})
  1589  	if err = xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1590  		t.Fatalf("Failed to update xDS resources: %v", err)
  1591  	}
  1592  
  1593  	// A WaitForReady RPC should succeed, and the channel should report READY.
  1594  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
  1595  		t.Errorf("rpc EmptyCall() failed: %v", err)
  1596  	}
  1597  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1598  }
  1599  
  1600  // Tests that when all backends are down and then up, we may pick a TF backend
  1601  // and we will then jump to ready backend.
  1602  func (s) TestRingHash_TransientFailureSkipToAvailableReady(t *testing.T) {
  1603  	emptyCallF := func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  1604  		return &testpb.Empty{}, nil
  1605  	}
  1606  	lis, err := testutils.LocalTCPListener()
  1607  	if err != nil {
  1608  		t.Fatalf("Failed to create listener: %v", err)
  1609  	}
  1610  	restartableListener1 := testutils.NewRestartableListener(lis)
  1611  	restartableServer1 := stubserver.StartTestService(t, &stubserver.StubServer{
  1612  		Listener:   restartableListener1,
  1613  		EmptyCallF: emptyCallF,
  1614  	})
  1615  	defer restartableServer1.Stop()
  1616  
  1617  	lis, err = testutils.LocalTCPListener()
  1618  	if err != nil {
  1619  		t.Fatalf("Failed to create listener: %v", err)
  1620  	}
  1621  	restartableListener2 := testutils.NewRestartableListener(lis)
  1622  	restartableServer2 := stubserver.StartTestService(t, &stubserver.StubServer{
  1623  		Listener:   restartableListener2,
  1624  		EmptyCallF: emptyCallF,
  1625  	})
  1626  	defer restartableServer2.Stop()
  1627  
  1628  	unReachableBackends := makeUnreachableBackends(t, 2)
  1629  
  1630  	const clusterName = "cluster"
  1631  	backends := []string{restartableServer1.Address, restartableServer2.Address}
  1632  	backends = append(backends, unReachableBackends...)
  1633  	endpoints := endpointResource(t, clusterName, backends)
  1634  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1635  		ClusterName: clusterName,
  1636  		ServiceName: clusterName,
  1637  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1638  	})
  1639  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1640  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1641  
  1642  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1643  	defer cancel()
  1644  
  1645  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1646  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1647  		t.Fatalf("Failed to update xDS resources: %v", err)
  1648  	}
  1649  	opts := []grpc.DialOption{
  1650  		grpc.WithConnectParams(grpc.ConnectParams{
  1651  			// Disable backoff to speed up the test.
  1652  			MinConnectTimeout: 100 * time.Millisecond,
  1653  		}),
  1654  		grpc.WithResolvers(xdsResolver),
  1655  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1656  	}
  1657  	conn, err := grpc.NewClient("xds:///test.server", opts...)
  1658  	if err != nil {
  1659  		t.Fatalf("Failed to create client: %s", err)
  1660  	}
  1661  	defer conn.Close()
  1662  	client := testgrpc.NewTestServiceClient(conn)
  1663  
  1664  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1665  
  1666  	// Test starts with backends not listening.
  1667  	restartableListener1.Stop()
  1668  	restartableListener2.Stop()
  1669  
  1670  	// Send a request with a hash that should go to restartableServer1.
  1671  	// Because it is not accepting connections, and no other backend is
  1672  	// listening, the RPC fails.
  1673  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer1.Address+"_0"))
  1674  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1675  		t.Fatalf("rpc EmptyCall() succeeded, want error")
  1676  	}
  1677  
  1678  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1679  
  1680  	// Bring up first backend. The channel should become Ready without any
  1681  	// picks, because in TF, we are always trying to connect to at least one
  1682  	// backend at all times.
  1683  	restartableListener1.Restart()
  1684  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1685  
  1686  	// Bring down backend 1 and bring up backend 2.
  1687  	// Note the RPC contains a header value that will always be hashed to
  1688  	// backend 1. So by purposely bringing down backend 1 and bringing up
  1689  	// another backend, this will ensure Picker's first choice of backend 1
  1690  	// fails and it will go through the remaining subchannels to find one in
  1691  	// READY. Since the entries in the ring are pretty distributed and we have
  1692  	// unused ports to fill the ring, it is almost guaranteed that the Picker
  1693  	// will go through some non-READY entries and skip them as per design.
  1694  	t.Logf("bringing down backend 1")
  1695  	restartableListener1.Stop()
  1696  
  1697  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1698  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
  1699  		t.Fatalf("rpc EmptyCall() succeeded, want error")
  1700  	}
  1701  
  1702  	t.Logf("bringing up backend 2")
  1703  	restartableListener2.Restart()
  1704  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1705  
  1706  	wantPeerAddr := ""
  1707  	for wantPeerAddr != restartableServer2.Address {
  1708  		p := peer.Peer{}
  1709  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p)); errors.Is(err, context.DeadlineExceeded) {
  1710  			t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend")
  1711  		}
  1712  		wantPeerAddr = p.Addr.String()
  1713  	}
  1714  }
  1715  
  1716  // Tests that when all backends are down, we keep reattempting.
  1717  func (s) TestRingHash_ReattemptWhenAllEndpointsUnreachable(t *testing.T) {
  1718  	lis, err := testutils.LocalTCPListener()
  1719  	if err != nil {
  1720  		t.Fatalf("Failed to create listener: %v", err)
  1721  	}
  1722  	restartableListener := testutils.NewRestartableListener(lis)
  1723  	restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{
  1724  		Listener: restartableListener,
  1725  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  1726  			return &testpb.Empty{}, nil
  1727  		},
  1728  	})
  1729  	defer restartableServer.Stop()
  1730  
  1731  	const clusterName = "cluster"
  1732  	endpoints := endpointResource(t, clusterName, []string{restartableServer.Address})
  1733  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1734  		ClusterName: clusterName,
  1735  		ServiceName: clusterName,
  1736  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1737  	})
  1738  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1739  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1740  
  1741  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1742  	defer cancel()
  1743  
  1744  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1745  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1746  		t.Fatalf("Failed to update xDS resources: %v", err)
  1747  	}
  1748  
  1749  	dopts := []grpc.DialOption{
  1750  		grpc.WithResolvers(xdsResolver),
  1751  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1752  		grpc.WithConnectParams(fastConnectParams),
  1753  	}
  1754  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1755  	if err != nil {
  1756  		t.Fatalf("Failed to create client: %s", err)
  1757  	}
  1758  	defer conn.Close()
  1759  	client := testgrpc.NewTestServiceClient(conn)
  1760  
  1761  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  1762  
  1763  	t.Log("Stopping the backend server")
  1764  	restartableListener.Stop()
  1765  
  1766  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable {
  1767  		t.Fatalf("rpc EmptyCall() succeeded, want Unavailable error")
  1768  	}
  1769  
  1770  	// Wait for channel to fail.
  1771  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  1772  
  1773  	t.Log("Restarting the backend server")
  1774  	restartableListener.Restart()
  1775  
  1776  	// Wait for channel to become READY without any pending RPC.
  1777  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  1778  }
  1779  
  1780  // Tests that when a backend goes down, we will move on to the next subchannel
  1781  // (with a lower priority).  When the backend comes back up, traffic will move
  1782  // back.
  1783  func (s) TestRingHash_SwitchToLowerPriorityAndThenBack(t *testing.T) {
  1784  	lis, err := testutils.LocalTCPListener()
  1785  	if err != nil {
  1786  		t.Fatalf("Failed to create listener: %v", err)
  1787  	}
  1788  	restartableListener := testutils.NewRestartableListener(lis)
  1789  	restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{
  1790  		Listener: restartableListener,
  1791  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  1792  			return &testpb.Empty{}, nil
  1793  		},
  1794  	})
  1795  	defer restartableServer.Stop()
  1796  
  1797  	otherBackend := backendAddrs(startTestServiceBackends(t, 1))[0]
  1798  
  1799  	// We must set the host name socket address in EDS, as the ring hash policy
  1800  	// uses it to construct the ring.
  1801  	host, _, err := net.SplitHostPort(otherBackend)
  1802  	if err != nil {
  1803  		t.Fatalf("Failed to split host and port from stubserver: %v", err)
  1804  	}
  1805  
  1806  	const clusterName = "cluster"
  1807  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  1808  		ClusterName: clusterName,
  1809  		Host:        host,
  1810  		Localities: []e2e.LocalityOptions{{
  1811  			Backends: backendOptions(t, []string{restartableServer.Address}),
  1812  			Weight:   1,
  1813  		}, {
  1814  			Backends: backendOptions(t, []string{otherBackend}),
  1815  			Weight:   1,
  1816  			Priority: 1,
  1817  		}}})
  1818  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1819  		ClusterName: clusterName,
  1820  		ServiceName: clusterName,
  1821  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1822  	})
  1823  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1824  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1825  
  1826  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1827  	defer cancel()
  1828  
  1829  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1830  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1831  		t.Fatalf("Failed to update xDS resources: %v", err)
  1832  	}
  1833  
  1834  	dopts := []grpc.DialOption{
  1835  		grpc.WithResolvers(xdsResolver),
  1836  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1837  		grpc.WithConnectParams(fastConnectParams),
  1838  	}
  1839  	conn, err := grpc.NewClient("xds:///test.server", dopts...)
  1840  	if err != nil {
  1841  		t.Fatalf("Failed to create client: %s", err)
  1842  	}
  1843  	defer conn.Close()
  1844  	client := testgrpc.NewTestServiceClient(conn)
  1845  
  1846  	// Note each type of RPC contains a header value that will always be hashed
  1847  	// to the value that was used to place the non-existent endpoint on the ring.
  1848  	ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer.Address+"_0"))
  1849  	var got string
  1850  	for got = range checkRPCSendOK(ctx, t, client, 1) {
  1851  	}
  1852  	if want := restartableServer.Address; got != want {
  1853  		t.Fatalf("Got RPC routed to addr %v, want %v", got, want)
  1854  	}
  1855  
  1856  	// Trigger failure with the existing backend, which should cause the
  1857  	// balancer to go in transient failure and the priority balancer to move
  1858  	// to the lower priority.
  1859  	restartableListener.Stop()
  1860  
  1861  	for {
  1862  		p := peer.Peer{}
  1863  		_, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true), grpc.Peer(&p))
  1864  
  1865  		// Ignore errors: we may need to attempt to send an RPC to detect the
  1866  		// failure (the next write on connection fails).
  1867  		if err == nil {
  1868  			if got, want := p.Addr.String(), otherBackend; got != want {
  1869  				t.Fatalf("Got RPC routed to addr %v, want %v", got, want)
  1870  			}
  1871  			break
  1872  		}
  1873  	}
  1874  
  1875  	// Now we start the backend with the address hash that is used in the
  1876  	// metadata, so eventually RPCs should be routed to it, since it is in a
  1877  	// locality with higher priority.
  1878  	peerAddr := ""
  1879  	restartableListener.Restart()
  1880  	for peerAddr != restartableServer.Address {
  1881  		p := peer.Peer{}
  1882  		_, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p))
  1883  		if errors.Is(err, context.DeadlineExceeded) {
  1884  			t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend")
  1885  		}
  1886  		peerAddr = p.Addr.String()
  1887  	}
  1888  }
  1889  
  1890  // Tests that when we trigger internal connection attempts without picks, we
  1891  // keep retrying all the SubConns that have reported TF previously.
  1892  func (s) TestRingHash_ContinuesConnectingWithoutPicksToMultipleSubConnsConcurrently(t *testing.T) {
  1893  	const backendsCount = 4
  1894  	backends := backendAddrs(startTestServiceBackends(t, backendsCount))
  1895  
  1896  	const clusterName = "cluster"
  1897  
  1898  	endpoints := endpointResource(t, clusterName, backends)
  1899  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  1900  		ClusterName: clusterName,
  1901  		ServiceName: clusterName,
  1902  		Policy:      e2e.LoadBalancingPolicyRingHash,
  1903  	})
  1904  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  1905  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  1906  
  1907  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1908  	defer cancel()
  1909  
  1910  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  1911  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  1912  		t.Fatalf("Failed to update xDS resources: %v", err)
  1913  	}
  1914  
  1915  	dialer := testutils.NewBlockingDialer()
  1916  	dialOpts := []grpc.DialOption{
  1917  		grpc.WithResolvers(xdsResolver),
  1918  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  1919  		grpc.WithContextDialer(dialer.DialContext),
  1920  		grpc.WithConnectParams(fastConnectParams),
  1921  	}
  1922  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  1923  	if err != nil {
  1924  		t.Fatalf("Failed to create client: %s", err)
  1925  	}
  1926  	defer conn.Close()
  1927  
  1928  	// Create holds for each backend address to delay a successful connection
  1929  	// until the end of the test.
  1930  	holds := make([]*testutils.Hold, backendsCount)
  1931  	for i := 0; i < len(backends); i++ {
  1932  		holds[i] = dialer.Hold(backends[i])
  1933  	}
  1934  
  1935  	client := testgrpc.NewTestServiceClient(conn)
  1936  
  1937  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  1938  	errCh := make(chan error, 1)
  1939  	go func() {
  1940  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backends[0]+"_0"))
  1941  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  1942  		if status.Code(err) == codes.Canceled {
  1943  			errCh <- nil
  1944  			return
  1945  		}
  1946  		errCh <- err
  1947  	}()
  1948  
  1949  	// Wait for the RPC to trigger a connection attempt to the first address,
  1950  	// then cancel the RPC.  No other connection attempts should be started yet.
  1951  	if !holds[0].Wait(ctx) {
  1952  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  1953  	}
  1954  	rpcCancel()
  1955  	if err := <-errCh; err != nil {
  1956  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  1957  	}
  1958  
  1959  	// In every iteration of the following loop, we count the number of backends
  1960  	// that are dialed. After counting, we fail all the connection attempts.
  1961  	// This should cause the number of dialed backends to increase by 1 in every
  1962  	// iteration of the loop as ringhash tries to exit TRANSIENT_FAILURE.
  1963  	activeAddrs := map[string]bool{}
  1964  	for wantBackendCount := 1; wantBackendCount <= backendsCount; wantBackendCount++ {
  1965  		newAddrIdx := -1
  1966  		for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
  1967  			for i, hold := range holds {
  1968  				if !hold.IsStarted() {
  1969  					continue
  1970  				}
  1971  				if _, ok := activeAddrs[backends[i]]; ok {
  1972  					continue
  1973  				}
  1974  				activeAddrs[backends[i]] = true
  1975  				newAddrIdx = i
  1976  			}
  1977  			if len(activeAddrs) > wantBackendCount {
  1978  				t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantBackendCount)
  1979  			}
  1980  			if len(activeAddrs) == wantBackendCount {
  1981  				break
  1982  			}
  1983  		}
  1984  
  1985  		// Wait for a short time and verify no more backends are contacted.
  1986  		<-time.After(defaultTestShortTimeout)
  1987  		for i, hold := range holds {
  1988  			if !hold.IsStarted() {
  1989  				continue
  1990  			}
  1991  			activeAddrs[backends[i]] = true
  1992  		}
  1993  		if len(activeAddrs) != wantBackendCount {
  1994  			t.Fatalf("Unexpected number of backends dialed: got %d, want %d", len(activeAddrs), wantBackendCount)
  1995  		}
  1996  
  1997  		// Create a new hold for the address dialed in this iteration and fail
  1998  		// the existing hold.
  1999  		hold := holds[newAddrIdx]
  2000  		holds[newAddrIdx] = dialer.Hold(backends[newAddrIdx])
  2001  		hold.Fail(errors.New("Test error"))
  2002  	}
  2003  
  2004  	// Allow the request to a backend to succeed.
  2005  	if !holds[1].Wait(ctx) {
  2006  		t.Fatalf("Context timed out waiting %q to be dialed again.", backends[1])
  2007  	}
  2008  	holds[1].Resume()
  2009  
  2010  	// Wait for channel to become READY without any pending RPC.
  2011  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2012  }
  2013  
  2014  // Tests that first address of an endpoint is used to generate the ring. The
  2015  // test sends a request to a random endpoint. The test then reverses the
  2016  // addresses of every endpoint and verifies that an RPC with header pointing to
  2017  // the second address of the endpoint is sent to the initial address. The test
  2018  // then swaps the second and third address of the endpoint and verifies that an
  2019  // RPC with the header used earlier still reaches the same backend.
  2020  func (s) TestRingHash_ReorderAddressessWithinEndpoint(t *testing.T) {
  2021  	origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled
  2022  	defer func() {
  2023  		envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled
  2024  	}()
  2025  	envconfig.XDSDualstackEndpointsEnabled = true
  2026  	backends := backendAddrs(startTestServiceBackends(t, 6))
  2027  
  2028  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2029  
  2030  	const clusterName = "cluster"
  2031  	addrGroups := [][]string{
  2032  		{backends[0], backends[1], backends[2]},
  2033  		{backends[3], backends[4], backends[5]},
  2034  	}
  2035  	endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups)
  2036  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2037  		ClusterName: clusterName,
  2038  		ServiceName: clusterName,
  2039  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2040  	})
  2041  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2042  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2043  
  2044  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2045  	defer cancel()
  2046  
  2047  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2048  		t.Fatalf("Failed to update xDS resources: %v", err)
  2049  	}
  2050  
  2051  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  2052  	if err != nil {
  2053  		t.Fatalf("Failed to create client: %s", err)
  2054  	}
  2055  	defer conn.Close()
  2056  	client := testgrpc.NewTestServiceClient(conn)
  2057  
  2058  	rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2059  		"address_hash", fmt.Sprintf("%d", rand.Int()),
  2060  	))
  2061  	var remote peer.Peer
  2062  	if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2063  		t.Fatalf("rpc EmptyCall() failed: %v", err)
  2064  	}
  2065  
  2066  	initialFirstAddr := ""
  2067  	newFirstAddr := ""
  2068  	switch remote.Addr.String() {
  2069  	case addrGroups[0][0]:
  2070  		initialFirstAddr = addrGroups[0][0]
  2071  		newFirstAddr = addrGroups[0][2]
  2072  	case addrGroups[1][0]:
  2073  		initialFirstAddr = addrGroups[1][0]
  2074  		newFirstAddr = addrGroups[1][2]
  2075  	default:
  2076  		t.Fatalf("Request went to unexpected address: %q", remote.Addr)
  2077  	}
  2078  
  2079  	t.Log("Reversing addresses within each endpoint.")
  2080  	addrGroups1 := [][]string{
  2081  		{addrGroups[0][2], addrGroups[0][1], addrGroups[0][0]},
  2082  		{addrGroups[1][2], addrGroups[1][1], addrGroups[1][0]},
  2083  	}
  2084  	endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups1)
  2085  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2086  		t.Fatalf("Failed to update xDS resources: %v", err)
  2087  	}
  2088  
  2089  	// The first address of an endpoint is used to create the ring. This means
  2090  	// that requests should continue to go to the first address, but the hash
  2091  	// should be computed based on the last address in the original list.
  2092  	for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
  2093  		rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2094  			"address_hash", newFirstAddr+"_0",
  2095  		))
  2096  		if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2097  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2098  		}
  2099  		if remote.Addr.String() == initialFirstAddr {
  2100  			break
  2101  		}
  2102  	}
  2103  
  2104  	if ctx.Err() != nil {
  2105  		t.Fatalf("Context timed out waiting for request to be sent to %q, last request went to %q", initialFirstAddr, remote.Addr)
  2106  	}
  2107  
  2108  	t.Log("Swapping the second and third addresses within each endpoint.")
  2109  	// This should not effect the ring, since only the first address is used
  2110  	// by the ring.
  2111  	addrGroups2 := [][]string{
  2112  		{addrGroups1[0][0], addrGroups[0][2], addrGroups[0][1]},
  2113  		{addrGroups1[1][0], addrGroups[1][2], addrGroups[1][1]},
  2114  	}
  2115  	endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups2)
  2116  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2117  		t.Fatalf("Failed to update xDS resources: %v", err)
  2118  	}
  2119  
  2120  	// Verify that requests with the hash of the last address in chosenAddrGroup
  2121  	// continue reaching the first address in chosenAddrGroup.
  2122  	shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout)
  2123  	defer cancel()
  2124  	for ; shortCtx.Err() == nil; <-time.After(time.Millisecond) {
  2125  		rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(
  2126  			"address_hash", newFirstAddr+"_0",
  2127  		))
  2128  		if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2129  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2130  		}
  2131  		if remote.Addr.String() == initialFirstAddr {
  2132  			continue
  2133  		}
  2134  		t.Fatalf("Request went to unexpected backend %q, want backend %q", remote.Addr, initialFirstAddr)
  2135  	}
  2136  }
  2137  
  2138  // Tests that requests are sent to the next address within the same endpoint
  2139  // after the first address becomes unreachable.
  2140  func (s) TestRingHash_FallBackWithinEndpoint(t *testing.T) {
  2141  	origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled
  2142  	defer func() {
  2143  		envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled
  2144  	}()
  2145  	envconfig.XDSDualstackEndpointsEnabled = true
  2146  	backends := startTestServiceBackends(t, 4)
  2147  	backendAddrs := backendAddrs(backends)
  2148  
  2149  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2150  
  2151  	const clusterName = "cluster"
  2152  	endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, [][]string{{backendAddrs[0], backendAddrs[1]}, {backendAddrs[2], backendAddrs[3]}})
  2153  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2154  		ClusterName: clusterName,
  2155  		ServiceName: clusterName,
  2156  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2157  	})
  2158  	route := channelIDHashRoute("new_route", virtualHostName, clusterName)
  2159  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2160  
  2161  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2162  	defer cancel()
  2163  
  2164  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2165  		t.Fatalf("Failed to update xDS resources: %v", err)
  2166  	}
  2167  
  2168  	conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials()))
  2169  	if err != nil {
  2170  		t.Fatalf("Failed to create client: %s", err)
  2171  	}
  2172  	defer conn.Close()
  2173  	client := testgrpc.NewTestServiceClient(conn)
  2174  
  2175  	const numRPCs = 5
  2176  	received := checkRPCSendOK(ctx, t, client, numRPCs)
  2177  	if len(received) != 1 {
  2178  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
  2179  	}
  2180  	var got int
  2181  	var initialAddr string
  2182  	for initialAddr, got = range received {
  2183  	}
  2184  	if got != numRPCs {
  2185  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  2186  	}
  2187  
  2188  	// Due to the channel ID hashing policy, the request could go to the first
  2189  	// address of either endpoint.
  2190  	var backendIdx int
  2191  	switch initialAddr {
  2192  	case backendAddrs[0]:
  2193  		backendIdx = 0
  2194  	case backendAddrs[2]:
  2195  		backendIdx = 2
  2196  	default:
  2197  		t.Fatalf("Request sent to unexpected backend: %q", initialAddr)
  2198  	}
  2199  	otherEndpointAddr := backendAddrs[backendIdx+1]
  2200  
  2201  	// Shut down the previously used backend.
  2202  	backends[backendIdx].Stop()
  2203  	testutils.AwaitState(ctx, t, conn, connectivity.Idle)
  2204  
  2205  	// Verify that the requests go to the remaining address in the same
  2206  	// endpoint.
  2207  	received = checkRPCSendOK(ctx, t, client, numRPCs)
  2208  	if len(received) != 1 {
  2209  		t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1)
  2210  	}
  2211  	var newAddr string
  2212  	for newAddr, got = range received {
  2213  	}
  2214  	if got != numRPCs {
  2215  		t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs)
  2216  	}
  2217  
  2218  	if newAddr != otherEndpointAddr {
  2219  		t.Errorf("Requests went to unexpected address, got=%q, want=%q", newAddr, otherEndpointAddr)
  2220  	}
  2221  }
  2222  
  2223  // Tests that ringhash is able to recover automatically in situations when a
  2224  // READY endpoint enters IDLE making the aggregated state TRANSIENT_FAILURE. The
  2225  // test creates 4 endpoints in the following connectivity states: [TF, TF,
  2226  // READY, IDLE]. The test fails the READY backend and verifies that the last
  2227  // IDLE endopint is dialed and the channel enters READY.
  2228  func (s) TestRingHash_RecoverWhenEndpointEntersIdle(t *testing.T) {
  2229  	const backendsCount = 4
  2230  	backends := startTestServiceBackends(t, backendsCount)
  2231  	backendAddrs := backendAddrs(backends)
  2232  
  2233  	const clusterName = "cluster"
  2234  
  2235  	endpoints := endpointResource(t, clusterName, backendAddrs)
  2236  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2237  		ClusterName: clusterName,
  2238  		ServiceName: clusterName,
  2239  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2240  	})
  2241  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2242  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2243  
  2244  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2245  	defer cancel()
  2246  
  2247  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2248  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2249  		t.Fatalf("Failed to update xDS resources: %v", err)
  2250  	}
  2251  
  2252  	dialer := testutils.NewBlockingDialer()
  2253  	dialOpts := []grpc.DialOption{
  2254  		grpc.WithResolvers(xdsResolver),
  2255  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2256  		grpc.WithContextDialer(dialer.DialContext),
  2257  		grpc.WithConnectParams(fastConnectParams),
  2258  	}
  2259  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  2260  	if err != nil {
  2261  		t.Fatalf("Failed to create client: %s", err)
  2262  	}
  2263  	defer conn.Close()
  2264  
  2265  	// Create holds for each backend address to delay a successful connection
  2266  	// until the end of the test.
  2267  	holds := make([]*testutils.Hold, backendsCount)
  2268  	for i := 0; i < len(backendAddrs); i++ {
  2269  		holds[i] = dialer.Hold(backendAddrs[i])
  2270  	}
  2271  
  2272  	client := testgrpc.NewTestServiceClient(conn)
  2273  
  2274  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  2275  	errCh := make(chan error, 1)
  2276  	go func() {
  2277  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0"))
  2278  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  2279  		if status.Code(err) == codes.Canceled {
  2280  			errCh <- nil
  2281  			return
  2282  		}
  2283  		errCh <- err
  2284  	}()
  2285  
  2286  	// Wait for the RPC to trigger a connection attempt to the first address,
  2287  	// then cancel the RPC.  No other connection attempts should be started yet.
  2288  	if !holds[0].Wait(ctx) {
  2289  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  2290  	}
  2291  	rpcCancel()
  2292  	if err := <-errCh; err != nil {
  2293  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  2294  	}
  2295  
  2296  	// The number of dialed backends increases by 1 in every iteration of the
  2297  	// loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to
  2298  	// get two endpoints in TRANSIENT_FAILURE.
  2299  	activeAddrs := map[string]bool{}
  2300  	for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ {
  2301  		newAddrIdx := -1
  2302  		for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) {
  2303  			for i, hold := range holds {
  2304  				if !hold.IsStarted() {
  2305  					continue
  2306  				}
  2307  				if _, ok := activeAddrs[backendAddrs[i]]; ok {
  2308  					continue
  2309  				}
  2310  				activeAddrs[backendAddrs[i]] = true
  2311  				newAddrIdx = i
  2312  			}
  2313  		}
  2314  
  2315  		if ctx.Err() != nil {
  2316  			t.Fatal("Context timed out waiting for new backneds to be dialed.")
  2317  		}
  2318  		if len(activeAddrs) > wantFailingBackendCount {
  2319  			t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount)
  2320  		}
  2321  
  2322  		// Create a new hold for the address dialed in this iteration and fail
  2323  		// the existing hold.
  2324  		hold := holds[newAddrIdx]
  2325  		holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx])
  2326  		hold.Fail(errors.New("Test error"))
  2327  	}
  2328  
  2329  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2330  	// Two endpoints failing should cause the channel to enter
  2331  	// TRANSIENT_FAILURE.
  2332  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2333  
  2334  	// Allow the request to the backend dialed next to succeed.
  2335  	readyBackendIdx := -1
  2336  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2337  		for i, addr := range backendAddrs {
  2338  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2339  				continue
  2340  			}
  2341  			readyBackendIdx = i
  2342  			activeAddrs[addr] = true
  2343  			holds[i].Resume()
  2344  			break
  2345  		}
  2346  	}
  2347  
  2348  	if ctx.Err() != nil {
  2349  		t.Fatal("Context timed out waiting for the next backend to be contacted.")
  2350  	}
  2351  
  2352  	// Wait for channel to become READY without any pending RPC.
  2353  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2354  
  2355  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2356  	// Stopping the READY backend should cause the channel to re-enter
  2357  	// TRANSIENT_FAILURE.
  2358  	backends[readyBackendIdx].Stop()
  2359  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2360  
  2361  	// To recover from TRANSIENT_FAILURE, ringhash should automatically try to
  2362  	// connect to the final endpoint.
  2363  	readyBackendIdx = -1
  2364  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2365  		for i, addr := range backendAddrs {
  2366  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2367  				continue
  2368  			}
  2369  			readyBackendIdx = i
  2370  			activeAddrs[addr] = true
  2371  			holds[i].Resume()
  2372  			break
  2373  		}
  2374  	}
  2375  
  2376  	if ctx.Err() != nil {
  2377  		t.Fatal("Context timed out waiting for next backend to be contacted.")
  2378  	}
  2379  
  2380  	// Wait for channel to become READY without any pending RPC.
  2381  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2382  }
  2383  
  2384  // Tests that ringhash is able to recover automatically in situations when a
  2385  // READY endpoint is removed by the resolver making the aggregated state
  2386  // TRANSIENT_FAILURE. The test creates 4 endpoints in the following
  2387  // connectivity states: [TF, TF, READY, IDLE]. The test removes the
  2388  // READY endpoint and verifies that the last IDLE endopint is dialed and the
  2389  // channel enters READY.
  2390  func (s) TestRingHash_RecoverWhenResolverRemovesEndpoint(t *testing.T) {
  2391  	const backendsCount = 4
  2392  	backends := startTestServiceBackends(t, backendsCount)
  2393  	backendAddrs := backendAddrs(backends)
  2394  
  2395  	const clusterName = "cluster"
  2396  
  2397  	endpoints := endpointResource(t, clusterName, backendAddrs)
  2398  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2399  		ClusterName: clusterName,
  2400  		ServiceName: clusterName,
  2401  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2402  	})
  2403  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2404  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2405  
  2406  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2407  	defer cancel()
  2408  
  2409  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2410  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2411  		t.Fatalf("Failed to update xDS resources: %v", err)
  2412  	}
  2413  
  2414  	dialer := testutils.NewBlockingDialer()
  2415  	dialOpts := []grpc.DialOption{
  2416  		grpc.WithResolvers(xdsResolver),
  2417  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2418  		grpc.WithContextDialer(dialer.DialContext),
  2419  		grpc.WithConnectParams(fastConnectParams),
  2420  	}
  2421  	conn, err := grpc.NewClient("xds:///test.server", dialOpts...)
  2422  	if err != nil {
  2423  		t.Fatalf("Failed to create client: %s", err)
  2424  	}
  2425  	defer conn.Close()
  2426  
  2427  	// Create holds for each backend address to delay a successful connection
  2428  	// until the end of the test.
  2429  	holds := make([]*testutils.Hold, backendsCount)
  2430  	for i := 0; i < len(backendAddrs); i++ {
  2431  		holds[i] = dialer.Hold(backendAddrs[i])
  2432  	}
  2433  
  2434  	client := testgrpc.NewTestServiceClient(conn)
  2435  
  2436  	rpcCtx, rpcCancel := context.WithCancel(ctx)
  2437  	errCh := make(chan error, 1)
  2438  	go func() {
  2439  		rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0"))
  2440  		_, err := client.EmptyCall(rpcCtx, &testpb.Empty{})
  2441  		if status.Code(err) == codes.Canceled {
  2442  			errCh <- nil
  2443  			return
  2444  		}
  2445  		errCh <- err
  2446  	}()
  2447  
  2448  	// Wait for the RPC to trigger a connection attempt to the first address,
  2449  	// then cancel the RPC.  No other connection attempts should be started yet.
  2450  	if !holds[0].Wait(ctx) {
  2451  		t.Fatalf("Timeout waiting for connection attempt to backend 0")
  2452  	}
  2453  	rpcCancel()
  2454  	if err := <-errCh; err != nil {
  2455  		t.Fatalf("Expected RPC to fail be canceled, got %v", err)
  2456  	}
  2457  
  2458  	// The number of dialed backends increases by 1 in every iteration of the
  2459  	// loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to
  2460  	// get two endpoints in TRANSIENT_FAILURE.
  2461  	activeAddrs := map[string]bool{}
  2462  	for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ {
  2463  		newAddrIdx := -1
  2464  		for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) {
  2465  			for i, hold := range holds {
  2466  				if !hold.IsStarted() {
  2467  					continue
  2468  				}
  2469  				if _, ok := activeAddrs[backendAddrs[i]]; ok {
  2470  					continue
  2471  				}
  2472  				activeAddrs[backendAddrs[i]] = true
  2473  				newAddrIdx = i
  2474  			}
  2475  		}
  2476  
  2477  		if ctx.Err() != nil {
  2478  			t.Fatal("Context timed out waiting for new backneds to be dialed.")
  2479  		}
  2480  		if len(activeAddrs) > wantFailingBackendCount {
  2481  			t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount)
  2482  		}
  2483  
  2484  		// Create a new hold for the address dialed in this iteration and fail
  2485  		// the existing hold.
  2486  		hold := holds[newAddrIdx]
  2487  		holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx])
  2488  		hold.Fail(errors.New("Test error"))
  2489  	}
  2490  
  2491  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2492  	// Two endpoints failing should cause the channel to enter
  2493  	// TRANSIENT_FAILURE.
  2494  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2495  
  2496  	// Allow the request to the backend dialed next to succeed.
  2497  	readyBackendIdx := -1
  2498  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2499  		for i, addr := range backendAddrs {
  2500  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2501  				continue
  2502  			}
  2503  			readyBackendIdx = i
  2504  			activeAddrs[addr] = true
  2505  			holds[i].Resume()
  2506  			break
  2507  		}
  2508  	}
  2509  
  2510  	if ctx.Err() != nil {
  2511  		t.Fatal("Context timed out waiting for the next backend to be contacted.")
  2512  	}
  2513  
  2514  	// Wait for channel to become READY without any pending RPC.
  2515  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2516  
  2517  	// Current state of endpoints: [TF, TF, READY, IDLE].
  2518  	// Removing the READY backend should cause the channel to re-enter
  2519  	// TRANSIENT_FAILURE.
  2520  	updatedAddrs := append([]string{}, backendAddrs[:readyBackendIdx]...)
  2521  	updatedAddrs = append(updatedAddrs, backendAddrs[readyBackendIdx+1:]...)
  2522  	updatedEndpoints := endpointResource(t, clusterName, updatedAddrs)
  2523  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, updatedEndpoints, cluster, route, listener)); err != nil {
  2524  		t.Fatalf("Failed to update xDS resources: %v", err)
  2525  	}
  2526  	testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure)
  2527  
  2528  	// To recover from TRANSIENT_FAILURE, ringhash should automatically try to
  2529  	// connect to the final endpoint.
  2530  	readyBackendIdx = -1
  2531  	for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) {
  2532  		for i, addr := range backendAddrs {
  2533  			if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() {
  2534  				continue
  2535  			}
  2536  			readyBackendIdx = i
  2537  			activeAddrs[addr] = true
  2538  			holds[i].Resume()
  2539  			break
  2540  		}
  2541  	}
  2542  
  2543  	if ctx.Err() != nil {
  2544  		t.Fatal("Context timed out waiting for next backend to be contacted.")
  2545  	}
  2546  
  2547  	// Wait for channel to become READY without any pending RPC.
  2548  	testutils.AwaitState(ctx, t, conn, connectivity.Ready)
  2549  }
  2550  
  2551  // Tests that RPCs are routed according to endpoint hash key rather than
  2552  // endpoint first address if it is set in EDS endpoint metadata.
  2553  func (s) TestRingHash_EndpointHashKey(t *testing.T) {
  2554  	testutils.SetEnvConfig(t, &envconfig.XDSEndpointHashKeyBackwardCompat, false)
  2555  
  2556  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2557  
  2558  	const clusterName = "cluster"
  2559  	var backendOpts []e2e.BackendOptions
  2560  	for i, addr := range backends {
  2561  		var ports []uint32
  2562  		ports = append(ports, testutils.ParsePort(t, addr))
  2563  		backendOpts = append(backendOpts, e2e.BackendOptions{
  2564  			Ports:    ports,
  2565  			Metadata: map[string]any{"hash_key": strconv.Itoa(i)},
  2566  		})
  2567  	}
  2568  	endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  2569  		ClusterName: clusterName,
  2570  		Host:        "localhost",
  2571  		Localities: []e2e.LocalityOptions{{
  2572  			Backends: backendOpts,
  2573  			Weight:   1,
  2574  		}},
  2575  	})
  2576  	cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
  2577  		ClusterName: clusterName,
  2578  		ServiceName: clusterName,
  2579  		Policy:      e2e.LoadBalancingPolicyRingHash,
  2580  	})
  2581  	route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash")
  2582  	listener := e2e.DefaultClientListener(virtualHostName, route.Name)
  2583  
  2584  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2585  	defer cancel()
  2586  
  2587  	xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t)
  2588  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2589  		t.Fatalf("Failed to update xDS resources: %v", err)
  2590  	}
  2591  
  2592  	opts := []grpc.DialOption{
  2593  		grpc.WithResolvers(xdsResolver),
  2594  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2595  	}
  2596  	conn, err := grpc.NewClient("xds:///test.server", opts...)
  2597  	if err != nil {
  2598  		t.Fatalf("Failed to create client: %s", err)
  2599  	}
  2600  	defer conn.Close()
  2601  	client := testgrpc.NewTestServiceClient(conn)
  2602  
  2603  	// Make sure RPCs are routed to backends according to the endpoint metadata
  2604  	// rather than their address. Note each type of RPC contains a header value
  2605  	// that will always be hashed to a specific backend as the header value
  2606  	// matches the endpoint metadata hash key.
  2607  	for i, backend := range backends {
  2608  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(i)+"_0"))
  2609  		numRPCs := 10
  2610  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2611  		if reqPerBackend[backend] != numRPCs {
  2612  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2613  		}
  2614  	}
  2615  
  2616  	// Update the endpoints to swap the metadata hash key.
  2617  	for i := range backendOpts {
  2618  		backendOpts[i].Metadata = map[string]any{"hash_key": strconv.Itoa(len(backends) - i - 1)}
  2619  	}
  2620  	endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{
  2621  		ClusterName: clusterName,
  2622  		Host:        "localhost",
  2623  		Localities: []e2e.LocalityOptions{{
  2624  			Backends: backendOpts,
  2625  			Weight:   1,
  2626  		}},
  2627  	})
  2628  	if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil {
  2629  		t.Fatalf("Failed to update xDS resources: %v", err)
  2630  	}
  2631  
  2632  	// Wait for the resolver update to make it to the balancer. This RPC should
  2633  	// be routed to backend 3 with the reverse numbering of the hash_key
  2634  	// attribute delivered above.
  2635  	for {
  2636  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", "0_0"))
  2637  		var remote peer.Peer
  2638  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2639  			t.Fatalf("Unexpected RPC error waiting for EDS update propagation: %s", err)
  2640  		}
  2641  		if remote.Addr.String() == backends[3] {
  2642  			break
  2643  		}
  2644  	}
  2645  
  2646  	// Now that the balancer has the new endpoint attributes, make sure RPCs are
  2647  	// routed to backends according to the new endpoint metadata.
  2648  	for i, backend := range backends {
  2649  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(len(backends)-i-1)+"_0"))
  2650  		numRPCs := 10
  2651  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2652  		if reqPerBackend[backend] != numRPCs {
  2653  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2654  		}
  2655  	}
  2656  }
  2657  
  2658  // Tests that when a request hash key is set in the balancer configuration via
  2659  // service config, this header is used to route to a specific backend.
  2660  func (s) TestRingHash_RequestHashKey(t *testing.T) {
  2661  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2662  
  2663  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2664  
  2665  	// Create a clientConn with a manual resolver (which is used to push the
  2666  	// address of the test backend), and a default service config pointing to
  2667  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2668  	// header.
  2669  	const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"address_hash"}}]}`
  2670  	r := manual.NewBuilderWithScheme("whatever")
  2671  	dopts := []grpc.DialOption{
  2672  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2673  		grpc.WithResolvers(r),
  2674  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2675  		grpc.WithConnectParams(fastConnectParams),
  2676  	}
  2677  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2678  	if err != nil {
  2679  		t.Fatalf("Failed to dial local test server: %v", err)
  2680  	}
  2681  	defer cc.Close()
  2682  	var endpoints []resolver.Endpoint
  2683  	for _, backend := range backends {
  2684  		endpoints = append(endpoints, resolver.Endpoint{
  2685  			Addresses: []resolver.Address{{Addr: backend}},
  2686  		})
  2687  	}
  2688  	r.UpdateState(resolver.State{
  2689  		Endpoints: endpoints,
  2690  	})
  2691  	client := testgrpc.NewTestServiceClient(cc)
  2692  
  2693  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2694  	defer cancel()
  2695  
  2696  	// Note each type of RPC contains a header value that will always be hashed
  2697  	// to a specific backend as the header value matches the value used to
  2698  	// create the entry in the ring.
  2699  	for _, backend := range backends {
  2700  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0"))
  2701  		numRPCs := 10
  2702  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2703  		if reqPerBackend[backend] != numRPCs {
  2704  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2705  		}
  2706  	}
  2707  
  2708  	const ringHashServiceConfigUpdate = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"other_header"}}]}`
  2709  	r.UpdateState(resolver.State{
  2710  		Endpoints:     endpoints,
  2711  		ServiceConfig: (&testutils.ResolverClientConn{}).ParseServiceConfig(ringHashServiceConfigUpdate),
  2712  	})
  2713  
  2714  	// Make sure that requests with the new hash are sent to the right backend.
  2715  	for _, backend := range backends {
  2716  		ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("other_header", backend+"_0"))
  2717  		numRPCs := 10
  2718  		reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2719  		if reqPerBackend[backend] != numRPCs {
  2720  			t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend)
  2721  		}
  2722  	}
  2723  }
  2724  
  2725  func highRingSizeServiceConfig(t *testing.T) string {
  2726  	t.Helper()
  2727  	testutils.SetEnvConfig(t, &envconfig.RingHashCap, minRingSize)
  2728  
  2729  	return fmt.Sprintf(`{
  2730    "loadBalancingConfig": [{"ring_hash_experimental":{
  2731      "requestHashHeader": "address_hash",
  2732      "minRingSize": %d,
  2733      "maxRingSize": %d
  2734    }
  2735  }]}`, minRingSize, minRingSize)
  2736  }
  2737  
  2738  // Tests that when a request hash key is set in the balancer configuration via
  2739  // service config, and the header is not set in the outgoing request, then it
  2740  // is sent to a random backend.
  2741  func (s) TestRingHash_RequestHashKeyRandom(t *testing.T) {
  2742  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2743  
  2744  	backends := backendAddrs(startTestServiceBackends(t, 4))
  2745  
  2746  	// Create a clientConn with a manual resolver (which is used to push the
  2747  	// address of the test backend), and a default service config pointing to
  2748  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2749  	// header.
  2750  	ringHashServiceConfig := highRingSizeServiceConfig(t)
  2751  	r := manual.NewBuilderWithScheme("whatever")
  2752  	dopts := []grpc.DialOption{
  2753  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2754  		grpc.WithResolvers(r),
  2755  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2756  		grpc.WithConnectParams(fastConnectParams),
  2757  	}
  2758  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2759  	if err != nil {
  2760  		t.Fatalf("Failed to dial local test server: %v", err)
  2761  	}
  2762  	defer cc.Close()
  2763  	var endpoints []resolver.Endpoint
  2764  	for _, backend := range backends {
  2765  		endpoints = append(endpoints, resolver.Endpoint{
  2766  			Addresses: []resolver.Address{{Addr: backend}},
  2767  		})
  2768  	}
  2769  	r.UpdateState(resolver.State{
  2770  		Endpoints: endpoints,
  2771  	})
  2772  	client := testgrpc.NewTestServiceClient(cc)
  2773  
  2774  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2775  	defer cancel()
  2776  
  2777  	// Due to the way that ring hash lazily establishes connections when using a
  2778  	// random hash, request distribution is skewed towards the order in which we
  2779  	// connected. The test send RPCs until we are connected to all backends, so
  2780  	// we can later assert that the distribution is uniform.
  2781  	seen := make(map[string]bool)
  2782  	for len(seen) != 4 {
  2783  		var remote peer.Peer
  2784  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil {
  2785  			t.Fatalf("rpc EmptyCall() failed: %v", err)
  2786  		}
  2787  		seen[remote.String()] = true
  2788  	}
  2789  
  2790  	// Make sure that requests with the old hash are sent to random backends.
  2791  	const want = 1.0 / 4
  2792  	numRPCs := computeIdealNumberOfRPCs(t, want, errorTolerance)
  2793  	gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs)
  2794  	for _, backend := range backends {
  2795  		got := float64(gotPerBackend[backend]) / float64(numRPCs)
  2796  		if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) {
  2797  			t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance)
  2798  		}
  2799  	}
  2800  }
  2801  
  2802  // Tests that when a request hash key is set in the balancer configuration via
  2803  // service config, and the header is not set in the outgoing request (random
  2804  // behavior), then each RPC wakes up at most one SubChannel, and, if there are
  2805  // SubChannels in Ready state, RPCs are routed to them.
  2806  func (s) TestRingHash_RequestHashKeyConnecting(t *testing.T) {
  2807  	testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true)
  2808  
  2809  	backends := backendAddrs(startTestServiceBackends(t, 20))
  2810  
  2811  	// Create a clientConn with a manual resolver (which is used to push the
  2812  	// address of the test backend), and a default service config pointing to
  2813  	// the use of the ring_hash_experimental LB policy with an explicit hash
  2814  	// header. Use a blocking dialer to control connection attempts.
  2815  	const ringHashServiceConfig = `{"loadBalancingConfig": [
  2816  	  {"ring_hash_experimental":{"requestHashHeader":"address_hash"}}
  2817  	]}`
  2818  	r := manual.NewBuilderWithScheme("whatever")
  2819  	blockingDialer := testutils.NewBlockingDialer()
  2820  	dopts := []grpc.DialOption{
  2821  		grpc.WithTransportCredentials(insecure.NewCredentials()),
  2822  		grpc.WithResolvers(r),
  2823  		grpc.WithDefaultServiceConfig(ringHashServiceConfig),
  2824  		grpc.WithConnectParams(fastConnectParams),
  2825  		grpc.WithContextDialer(blockingDialer.DialContext),
  2826  	}
  2827  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
  2828  	if err != nil {
  2829  		t.Fatalf("Failed to dial local test server: %v", err)
  2830  	}
  2831  	defer cc.Close()
  2832  	var endpoints []resolver.Endpoint
  2833  	for _, backend := range backends {
  2834  		endpoints = append(endpoints, resolver.Endpoint{
  2835  			Addresses: []resolver.Address{{Addr: backend}},
  2836  		})
  2837  	}
  2838  	r.UpdateState(resolver.State{
  2839  		Endpoints: endpoints,
  2840  	})
  2841  	client := testgrpc.NewTestServiceClient(cc)
  2842  
  2843  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  2844  	defer cancel()
  2845  
  2846  	// Intercept all connection attempts to the backends.
  2847  	var holds []*testutils.Hold
  2848  	for i := 0; i < len(backends); i++ {
  2849  		holds = append(holds, blockingDialer.Hold(backends[i]))
  2850  	}
  2851  
  2852  	wg := sync.WaitGroup{}
  2853  	wg.Add(1)
  2854  	go func() {
  2855  		// Send 1 RPC and make sure this triggers at most 1 connection attempt.
  2856  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
  2857  		if err != nil {
  2858  			t.Errorf("EmptyCall(): got %v, want success", err)
  2859  		}
  2860  		wg.Done()
  2861  	}()
  2862  
  2863  	// Wait for at least one connection attempt.
  2864  	nConn := 0
  2865  	for nConn == 0 {
  2866  		if ctx.Err() != nil {
  2867  			t.Fatal("Test timed out waiting for a connection attempt")
  2868  		}
  2869  		time.Sleep(1 * time.Millisecond)
  2870  		for _, hold := range holds {
  2871  			if hold.IsStarted() {
  2872  				nConn++
  2873  			}
  2874  		}
  2875  	}
  2876  	if wantMaxConn := 1; nConn > wantMaxConn {
  2877  		t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn)
  2878  	}
  2879  
  2880  	// Do a second RPC. Since there should already be a SubChannel in
  2881  	// Connecting state, this should not trigger a connection attempt.
  2882  	wg.Add(1)
  2883  	go func() {
  2884  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
  2885  		if err != nil {
  2886  			t.Errorf("EmptyCall(): got %v, want success", err)
  2887  		}
  2888  		wg.Done()
  2889  	}()
  2890  
  2891  	// Give extra time for more connections to be attempted.
  2892  	time.Sleep(defaultTestShortTimeout)
  2893  
  2894  	var firstConnectedBackend string
  2895  	nConn = 0
  2896  	for i, hold := range holds {
  2897  		if hold.IsStarted() {
  2898  			// Unblock the connection attempt. The SubChannel (and hence the
  2899  			// channel) should transition to Ready. RPCs should succeed and
  2900  			// be routed to this backend.
  2901  			hold.Resume()
  2902  			holds[i] = nil
  2903  			firstConnectedBackend = backends[i]
  2904  			nConn++
  2905  		}
  2906  	}
  2907  	if wantMaxConn := 1; nConn > wantMaxConn {
  2908  		t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn)
  2909  	}
  2910  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
  2911  	wg.Wait() // Make sure we're done with the 2 previous RPCs.
  2912  
  2913  	// Now send RPCs until we have at least one more connection attempt, that
  2914  	// is, the random hash did not land on the same backend on every pick (the
  2915  	// chances are low, but we don't want this to be flaky). Make sure no RPC
  2916  	// fails and that we route all of them to the only subchannel in ready
  2917  	// state.
  2918  	nConn = 0
  2919  	for nConn == 0 {
  2920  		p := peer.Peer{}
  2921  		_, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p))
  2922  		if status.Code(err) == codes.DeadlineExceeded {
  2923  			t.Fatal("EmptyCall(): test timed out while waiting for more connection attempts")
  2924  		}
  2925  		if err != nil {
  2926  			t.Fatalf("EmptyCall(): got %v, want success", err)
  2927  		}
  2928  		if p.Addr.String() != firstConnectedBackend {
  2929  			t.Errorf("RPC sent to backend %q, want %q", p.Addr.String(), firstConnectedBackend)
  2930  		}
  2931  		for _, hold := range holds {
  2932  			if hold != nil && hold.IsStarted() {
  2933  				nConn++
  2934  			}
  2935  		}
  2936  	}
  2937  }