google.golang.org/grpc@v1.72.2/balancer/pickfirst/pickfirst_ext_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package pickfirst_test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"strings"
    26  	"testing"
    27  	"time"
    28  
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/backoff"
    31  	pfinternal "google.golang.org/grpc/balancer/pickfirst/internal"
    32  	"google.golang.org/grpc/codes"
    33  	"google.golang.org/grpc/connectivity"
    34  	"google.golang.org/grpc/credentials/insecure"
    35  	"google.golang.org/grpc/internal"
    36  	"google.golang.org/grpc/internal/channelz"
    37  	"google.golang.org/grpc/internal/grpctest"
    38  	"google.golang.org/grpc/internal/stubserver"
    39  	"google.golang.org/grpc/internal/testutils"
    40  	"google.golang.org/grpc/internal/testutils/pickfirst"
    41  	"google.golang.org/grpc/resolver"
    42  	"google.golang.org/grpc/resolver/manual"
    43  	"google.golang.org/grpc/serviceconfig"
    44  	"google.golang.org/grpc/status"
    45  
    46  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    47  	testpb "google.golang.org/grpc/interop/grpc_testing"
    48  )
    49  
    50  const (
    51  	pickFirstServiceConfig = `{"loadBalancingConfig": [{"pick_first":{}}]}`
    52  	// Default timeout for tests in this package.
    53  	defaultTestTimeout = 10 * time.Second
    54  	// Default short timeout, to be used when waiting for events which are not
    55  	// expected to happen.
    56  	defaultTestShortTimeout = 100 * time.Millisecond
    57  )
    58  
    59  func init() {
    60  	channelz.TurnOn()
    61  }
    62  
    63  type s struct {
    64  	grpctest.Tester
    65  }
    66  
    67  func Test(t *testing.T) {
    68  	grpctest.RunSubTests(t, s{})
    69  }
    70  
    71  // parseServiceConfig is a test helper which uses the manual resolver to parse
    72  // the given service config. It calls t.Fatal() if service config parsing fails.
    73  func parseServiceConfig(t *testing.T, r *manual.Resolver, sc string) *serviceconfig.ParseResult {
    74  	t.Helper()
    75  
    76  	scpr := r.CC().ParseServiceConfig(sc)
    77  	if scpr.Err != nil {
    78  		t.Fatalf("Failed to parse service config %q: %v", sc, scpr.Err)
    79  	}
    80  	return scpr
    81  }
    82  
    83  // setupPickFirst performs steps required for pick_first tests. It starts a
    84  // bunch of backends exporting the TestService, creates a ClientConn to them
    85  // with service config specifying the use of the pick_first LB policy.
    86  func setupPickFirst(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer) {
    87  	t.Helper()
    88  
    89  	r := manual.NewBuilderWithScheme("whatever")
    90  
    91  	backends := make([]*stubserver.StubServer, backendCount)
    92  	addrs := make([]resolver.Address, backendCount)
    93  	for i := 0; i < backendCount; i++ {
    94  		backend := &stubserver.StubServer{
    95  			EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
    96  				return &testpb.Empty{}, nil
    97  			},
    98  		}
    99  		if err := backend.StartServer(); err != nil {
   100  			t.Fatalf("Failed to start backend: %v", err)
   101  		}
   102  		t.Logf("Started TestService backend at: %q", backend.Address)
   103  		t.Cleanup(func() { backend.Stop() })
   104  
   105  		backends[i] = backend
   106  		addrs[i] = resolver.Address{Addr: backend.Address}
   107  	}
   108  
   109  	dopts := []grpc.DialOption{
   110  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   111  		grpc.WithResolvers(r),
   112  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   113  	}
   114  	dopts = append(dopts, opts...)
   115  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   116  	if err != nil {
   117  		t.Fatalf("grpc.NewClient() failed: %v", err)
   118  	}
   119  	t.Cleanup(func() { cc.Close() })
   120  
   121  	// At this point, the resolver has not returned any addresses to the channel.
   122  	// This RPC must block until the context expires.
   123  	sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   124  	defer sCancel()
   125  	client := testgrpc.NewTestServiceClient(cc)
   126  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   127  		t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded)
   128  	}
   129  	return cc, r, backends
   130  }
   131  
   132  // stubBackendsToResolverAddrs converts from a set of stub server backends to
   133  // resolver addresses. Useful when pushing addresses to the manual resolver.
   134  func stubBackendsToResolverAddrs(backends []*stubserver.StubServer) []resolver.Address {
   135  	addrs := make([]resolver.Address, len(backends))
   136  	for i, backend := range backends {
   137  		addrs[i] = resolver.Address{Addr: backend.Address}
   138  	}
   139  	return addrs
   140  }
   141  
   142  // TestPickFirst_OneBackend tests the most basic scenario for pick_first. It
   143  // brings up a single backend and verifies that all RPCs get routed to it.
   144  func (s) TestPickFirst_OneBackend(t *testing.T) {
   145  	cc, r, backends := setupPickFirst(t, 1)
   146  
   147  	addrs := stubBackendsToResolverAddrs(backends)
   148  	r.UpdateState(resolver.State{Addresses: addrs})
   149  
   150  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   151  	defer cancel()
   152  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   153  		t.Fatal(err)
   154  	}
   155  }
   156  
   157  // TestPickFirst_MultipleBackends tests the scenario with multiple backends and
   158  // verifies that all RPCs get routed to the first one.
   159  func (s) TestPickFirst_MultipleBackends(t *testing.T) {
   160  	cc, r, backends := setupPickFirst(t, 2)
   161  
   162  	addrs := stubBackendsToResolverAddrs(backends)
   163  	r.UpdateState(resolver.State{Addresses: addrs})
   164  
   165  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   166  	defer cancel()
   167  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   168  		t.Fatal(err)
   169  	}
   170  }
   171  
   172  // TestPickFirst_OneServerDown tests the scenario where we have multiple
   173  // backends and pick_first is working as expected. Verifies that RPCs get routed
   174  // to the next backend in the list when the first one goes down.
   175  func (s) TestPickFirst_OneServerDown(t *testing.T) {
   176  	cc, r, backends := setupPickFirst(t, 2)
   177  
   178  	addrs := stubBackendsToResolverAddrs(backends)
   179  	r.UpdateState(resolver.State{Addresses: addrs})
   180  
   181  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   182  	defer cancel()
   183  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   184  		t.Fatal(err)
   185  	}
   186  
   187  	// Stop the backend which is currently being used. RPCs should get routed to
   188  	// the next backend in the list.
   189  	backends[0].Stop()
   190  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   191  		t.Fatal(err)
   192  	}
   193  }
   194  
   195  // TestPickFirst_AllServersDown tests the scenario where we have multiple
   196  // backends and pick_first is working as expected. When all backends go down,
   197  // the test verifies that RPCs fail with appropriate status code.
   198  func (s) TestPickFirst_AllServersDown(t *testing.T) {
   199  	cc, r, backends := setupPickFirst(t, 2)
   200  
   201  	addrs := stubBackendsToResolverAddrs(backends)
   202  	r.UpdateState(resolver.State{Addresses: addrs})
   203  
   204  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   205  	defer cancel()
   206  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   207  		t.Fatal(err)
   208  	}
   209  
   210  	for _, b := range backends {
   211  		b.Stop()
   212  	}
   213  
   214  	client := testgrpc.NewTestServiceClient(cc)
   215  	for {
   216  		if ctx.Err() != nil {
   217  			t.Fatalf("channel failed to move to Unavailable after all backends were stopped: %v", ctx.Err())
   218  		}
   219  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) == codes.Unavailable {
   220  			return
   221  		}
   222  		time.Sleep(defaultTestShortTimeout)
   223  	}
   224  }
   225  
   226  // TestPickFirst_AddressesRemoved tests the scenario where we have multiple
   227  // backends and pick_first is working as expected. It then verifies that when
   228  // addresses are removed by the name resolver, RPCs get routed appropriately.
   229  func (s) TestPickFirst_AddressesRemoved(t *testing.T) {
   230  	cc, r, backends := setupPickFirst(t, 3)
   231  
   232  	addrs := stubBackendsToResolverAddrs(backends)
   233  	r.UpdateState(resolver.State{Addresses: addrs})
   234  
   235  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   236  	defer cancel()
   237  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   238  		t.Fatal(err)
   239  	}
   240  
   241  	// Remove the first backend from the list of addresses originally pushed.
   242  	// RPCs should get routed to the first backend in the new list.
   243  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2]}})
   244  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   245  		t.Fatal(err)
   246  	}
   247  
   248  	// Append the backend that we just removed to the end of the list.
   249  	// Nothing should change.
   250  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2], addrs[0]}})
   251  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   252  		t.Fatal(err)
   253  	}
   254  
   255  	// Remove the first backend from the existing list of addresses.
   256  	// RPCs should get routed to the first backend in the new list.
   257  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[0]}})
   258  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[2]); err != nil {
   259  		t.Fatal(err)
   260  	}
   261  
   262  	// Remove the first backend from the existing list of addresses.
   263  	// RPCs should get routed to the first backend in the new list.
   264  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0]}})
   265  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   266  		t.Fatal(err)
   267  	}
   268  }
   269  
   270  // TestPickFirst_NewAddressWhileBlocking tests the case where pick_first is
   271  // configured on a channel, things are working as expected and then a resolver
   272  // updates removes all addresses. An RPC attempted at this point in time will be
   273  // blocked because there are no valid backends. This test verifies that when new
   274  // backends are added, the RPC is able to complete.
   275  func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) {
   276  	cc, r, backends := setupPickFirst(t, 2)
   277  	addrs := stubBackendsToResolverAddrs(backends)
   278  	r.UpdateState(resolver.State{Addresses: addrs})
   279  
   280  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   281  	defer cancel()
   282  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   283  		t.Fatal(err)
   284  	}
   285  
   286  	// Send a resolver update with no addresses. This should push the channel into
   287  	// TransientFailure.
   288  	r.UpdateState(resolver.State{})
   289  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   290  
   291  	doneCh := make(chan struct{})
   292  	client := testgrpc.NewTestServiceClient(cc)
   293  	go func() {
   294  		// The channel is currently in TransientFailure and this RPC will block
   295  		// until the channel becomes Ready, which will only happen when we push a
   296  		// resolver update with a valid backend address.
   297  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   298  			t.Errorf("EmptyCall() = %v, want <nil>", err)
   299  		}
   300  		close(doneCh)
   301  	}()
   302  
   303  	// Make sure that there is one pending RPC on the ClientConn before attempting
   304  	// to push new addresses through the name resolver. If we don't do this, the
   305  	// resolver update can happen before the above goroutine gets to make the RPC.
   306  	for {
   307  		if err := ctx.Err(); err != nil {
   308  			t.Fatal(err)
   309  		}
   310  		tcs, _ := channelz.GetTopChannels(0, 0)
   311  		if len(tcs) != 1 {
   312  			t.Fatalf("there should only be one top channel, not %d", len(tcs))
   313  		}
   314  		started := tcs[0].ChannelMetrics.CallsStarted.Load()
   315  		completed := tcs[0].ChannelMetrics.CallsSucceeded.Load() + tcs[0].ChannelMetrics.CallsFailed.Load()
   316  		if (started - completed) == 1 {
   317  			break
   318  		}
   319  		time.Sleep(defaultTestShortTimeout)
   320  	}
   321  
   322  	// Send a resolver update with a valid backend to push the channel to Ready
   323  	// and unblock the above RPC.
   324  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0].Address}}})
   325  
   326  	select {
   327  	case <-ctx.Done():
   328  		t.Fatal("Timeout when waiting for blocked RPC to complete")
   329  	case <-doneCh:
   330  	}
   331  }
   332  
   333  // TestPickFirst_StickyTransientFailure tests the case where pick_first is
   334  // configured on a channel, and the backend is configured to close incoming
   335  // connections as soon as they are accepted. The test verifies that the channel
   336  // enters TransientFailure and stays there. The test also verifies that the
   337  // pick_first LB policy is constantly trying to reconnect to the backend.
   338  func (s) TestPickFirst_StickyTransientFailure(t *testing.T) {
   339  	// Spin up a local server which closes the connection as soon as it receives
   340  	// one. It also sends a signal on a channel whenever it received a connection.
   341  	lis, err := testutils.LocalTCPListener()
   342  	if err != nil {
   343  		t.Fatalf("Failed to create listener: %v", err)
   344  	}
   345  	t.Cleanup(func() { lis.Close() })
   346  
   347  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   348  	defer cancel()
   349  	connCh := make(chan struct{}, 1)
   350  	go func() {
   351  		for {
   352  			conn, err := lis.Accept()
   353  			if err != nil {
   354  				return
   355  			}
   356  			select {
   357  			case connCh <- struct{}{}:
   358  				conn.Close()
   359  			case <-ctx.Done():
   360  				return
   361  			}
   362  		}
   363  	}()
   364  
   365  	// Dial the above server with a ConnectParams that does a constant backoff
   366  	// of defaultTestShortTimeout duration.
   367  	dopts := []grpc.DialOption{
   368  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   369  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   370  		grpc.WithConnectParams(grpc.ConnectParams{
   371  			Backoff: backoff.Config{
   372  				BaseDelay:  defaultTestShortTimeout,
   373  				Multiplier: float64(0),
   374  				Jitter:     float64(0),
   375  				MaxDelay:   defaultTestShortTimeout,
   376  			},
   377  		}),
   378  	}
   379  	cc, err := grpc.NewClient(lis.Addr().String(), dopts...)
   380  	if err != nil {
   381  		t.Fatalf("Failed to create new client: %v", err)
   382  	}
   383  	t.Cleanup(func() { cc.Close() })
   384  	cc.Connect()
   385  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   386  
   387  	// Spawn a goroutine to ensure that the channel stays in TransientFailure.
   388  	// The call to cc.WaitForStateChange will return false when the main
   389  	// goroutine exits and the context is cancelled.
   390  	go func() {
   391  		if cc.WaitForStateChange(ctx, connectivity.TransientFailure) {
   392  			if state := cc.GetState(); state != connectivity.Shutdown {
   393  				t.Errorf("Unexpected state change from TransientFailure to %s", cc.GetState())
   394  			}
   395  		}
   396  	}()
   397  
   398  	// Ensures that the pick_first LB policy is constantly trying to reconnect.
   399  	for i := 0; i < 10; i++ {
   400  		select {
   401  		case <-connCh:
   402  		case <-time.After(2 * defaultTestShortTimeout):
   403  			t.Error("Timeout when waiting for pick_first to reconnect")
   404  		}
   405  	}
   406  }
   407  
   408  // Tests the PF LB policy with shuffling enabled.
   409  func (s) TestPickFirst_ShuffleAddressList(t *testing.T) {
   410  	const serviceConfig = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}`
   411  
   412  	// Install a shuffler that always reverses two entries.
   413  	origShuf := pfinternal.RandShuffle
   414  	defer func() { pfinternal.RandShuffle = origShuf }()
   415  	pfinternal.RandShuffle = func(n int, f func(int, int)) {
   416  		if n != 2 {
   417  			t.Errorf("Shuffle called with n=%v; want 2", n)
   418  			return
   419  		}
   420  		f(0, 1) // reverse the two addresses
   421  	}
   422  	// Set up our backends.
   423  	cc, r, backends := setupPickFirst(t, 2)
   424  	addrs := stubBackendsToResolverAddrs(backends)
   425  
   426  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   427  	defer cancel()
   428  
   429  	// Push an update with both addresses and shuffling disabled.  We should
   430  	// connect to backend 0.
   431  	r.UpdateState(resolver.State{Endpoints: []resolver.Endpoint{
   432  		{Addresses: []resolver.Address{addrs[0]}},
   433  		{Addresses: []resolver.Address{addrs[1]}},
   434  	}})
   435  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   436  		t.Fatal(err)
   437  	}
   438  
   439  	// Send a config with shuffling enabled.  This will reverse the addresses,
   440  	// but the channel should still be connected to backend 0.
   441  	shufState := resolver.State{
   442  		ServiceConfig: parseServiceConfig(t, r, serviceConfig),
   443  		Endpoints: []resolver.Endpoint{
   444  			{Addresses: []resolver.Address{addrs[0]}},
   445  			{Addresses: []resolver.Address{addrs[1]}},
   446  		},
   447  	}
   448  	r.UpdateState(shufState)
   449  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   450  		t.Fatal(err)
   451  	}
   452  
   453  	// Send a resolver update with no addresses. This should push the channel
   454  	// into TransientFailure.
   455  	r.UpdateState(resolver.State{})
   456  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   457  
   458  	// Send the same config as last time with shuffling enabled.  Since we are
   459  	// not connected to backend 0, we should connect to backend 1.
   460  	r.UpdateState(shufState)
   461  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   462  		t.Fatal(err)
   463  	}
   464  }
   465  
   466  // Test config parsing with the env var turned on and off for various scenarios.
   467  func (s) TestPickFirst_ParseConfig_Success(t *testing.T) {
   468  	// Install a shuffler that always reverses two entries.
   469  	origShuf := pfinternal.RandShuffle
   470  	defer func() { pfinternal.RandShuffle = origShuf }()
   471  	pfinternal.RandShuffle = func(n int, f func(int, int)) {
   472  		if n != 2 {
   473  			t.Errorf("Shuffle called with n=%v; want 2", n)
   474  			return
   475  		}
   476  		f(0, 1) // reverse the two addresses
   477  	}
   478  
   479  	tests := []struct {
   480  		name          string
   481  		serviceConfig string
   482  		wantFirstAddr bool
   483  	}{
   484  		{
   485  			name:          "empty pickfirst config",
   486  			serviceConfig: `{"loadBalancingConfig": [{"pick_first":{}}]}`,
   487  			wantFirstAddr: true,
   488  		},
   489  		{
   490  			name:          "empty good pickfirst config",
   491  			serviceConfig: `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}`,
   492  			wantFirstAddr: false,
   493  		},
   494  	}
   495  
   496  	for _, test := range tests {
   497  		t.Run(test.name, func(t *testing.T) {
   498  			// Set up our backends.
   499  			cc, r, backends := setupPickFirst(t, 2)
   500  			addrs := stubBackendsToResolverAddrs(backends)
   501  
   502  			r.UpdateState(resolver.State{
   503  				ServiceConfig: parseServiceConfig(t, r, test.serviceConfig),
   504  				Addresses:     addrs,
   505  			})
   506  
   507  			// Some tests expect address shuffling to happen, and indicate that
   508  			// by setting wantFirstAddr to false (since our shuffling function
   509  			// defined at the top of this test, simply reverses the list of
   510  			// addresses provided to it).
   511  			wantAddr := addrs[0]
   512  			if !test.wantFirstAddr {
   513  				wantAddr = addrs[1]
   514  			}
   515  
   516  			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   517  			defer cancel()
   518  			if err := pickfirst.CheckRPCsToBackend(ctx, cc, wantAddr); err != nil {
   519  				t.Fatal(err)
   520  			}
   521  		})
   522  	}
   523  }
   524  
   525  // Test config parsing for a bad service config.
   526  func (s) TestPickFirst_ParseConfig_Failure(t *testing.T) {
   527  	// Service config should fail with the below config. Name resolvers are
   528  	// expected to perform this parsing before they push the parsed service
   529  	// config to the channel.
   530  	const sc = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": 666 }}]}`
   531  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(sc)
   532  	if scpr.Err == nil {
   533  		t.Fatalf("ParseConfig() succeeded and returned %+v, when expected to fail", scpr)
   534  	}
   535  }
   536  
   537  // setupPickFirstWithListenerWrapper is very similar to setupPickFirst, but uses
   538  // a wrapped listener that the test can use to track accepted connections.
   539  func setupPickFirstWithListenerWrapper(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer, []*testutils.ListenerWrapper) {
   540  	t.Helper()
   541  
   542  	backends := make([]*stubserver.StubServer, backendCount)
   543  	addrs := make([]resolver.Address, backendCount)
   544  	listeners := make([]*testutils.ListenerWrapper, backendCount)
   545  	for i := 0; i < backendCount; i++ {
   546  		lis := testutils.NewListenerWrapper(t, nil)
   547  		backend := &stubserver.StubServer{
   548  			Listener: lis,
   549  			EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
   550  				return &testpb.Empty{}, nil
   551  			},
   552  		}
   553  		if err := backend.StartServer(); err != nil {
   554  			t.Fatalf("Failed to start backend: %v", err)
   555  		}
   556  		t.Logf("Started TestService backend at: %q", backend.Address)
   557  		t.Cleanup(func() { backend.Stop() })
   558  
   559  		backends[i] = backend
   560  		addrs[i] = resolver.Address{Addr: backend.Address}
   561  		listeners[i] = lis
   562  	}
   563  
   564  	r := manual.NewBuilderWithScheme("whatever")
   565  	dopts := []grpc.DialOption{
   566  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   567  		grpc.WithResolvers(r),
   568  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   569  	}
   570  	dopts = append(dopts, opts...)
   571  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   572  	if err != nil {
   573  		t.Fatalf("grpc.NewClient() failed: %v", err)
   574  	}
   575  	t.Cleanup(func() { cc.Close() })
   576  
   577  	// At this point, the resolver has not returned any addresses to the channel.
   578  	// This RPC must block until the context expires.
   579  	sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   580  	defer sCancel()
   581  	client := testgrpc.NewTestServiceClient(cc)
   582  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   583  		t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded)
   584  	}
   585  	return cc, r, backends, listeners
   586  }
   587  
   588  // TestPickFirst_AddressUpdateWithAttributes tests the case where an address
   589  // update received by the pick_first LB policy differs in attributes. Addresses
   590  // which differ in attributes are considered different from the perspective of
   591  // subconn creation and connection establishment and the test verifies that new
   592  // connections are created when attributes change.
   593  func (s) TestPickFirst_AddressUpdateWithAttributes(t *testing.T) {
   594  	cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2)
   595  
   596  	// Add a set of attributes to the addresses before pushing them to the
   597  	// pick_first LB policy through the manual resolver.
   598  	addrs := stubBackendsToResolverAddrs(backends)
   599  	for i := range addrs {
   600  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i))
   601  	}
   602  	r.UpdateState(resolver.State{Addresses: addrs})
   603  
   604  	// Ensure that RPCs succeed to the first backend in the list.
   605  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   606  	defer cancel()
   607  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   608  		t.Fatal(err)
   609  	}
   610  
   611  	// Grab the wrapped connection from the listener wrapper. This will be used
   612  	// to verify the connection is closed.
   613  	val, err := listeners[0].NewConnCh.Receive(ctx)
   614  	if err != nil {
   615  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   616  	}
   617  	conn := val.(*testutils.ConnWrapper)
   618  
   619  	// Add another set of attributes to the addresses, and push them to the
   620  	// pick_first LB policy through the manual resolver. Leave the order of the
   621  	// addresses unchanged.
   622  	for i := range addrs {
   623  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i))
   624  	}
   625  	r.UpdateState(resolver.State{Addresses: addrs})
   626  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   627  		t.Fatal(err)
   628  	}
   629  
   630  	// A change in the address attributes results in the new address being
   631  	// considered different to the current address. This will result in the old
   632  	// connection being closed and a new connection to the same backend (since
   633  	// address order is not modified).
   634  	if _, err := conn.CloseCh.Receive(ctx); err != nil {
   635  		t.Fatalf("Timeout when expecting existing connection to be closed: %v", err)
   636  	}
   637  	val, err = listeners[0].NewConnCh.Receive(ctx)
   638  	if err != nil {
   639  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   640  	}
   641  	conn = val.(*testutils.ConnWrapper)
   642  
   643  	// Add another set of attributes to the addresses, and push them to the
   644  	// pick_first LB policy through the manual resolver.  Reverse of the order
   645  	// of addresses.
   646  	for i := range addrs {
   647  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i))
   648  	}
   649  	addrs[0], addrs[1] = addrs[1], addrs[0]
   650  	r.UpdateState(resolver.State{Addresses: addrs})
   651  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   652  		t.Fatal(err)
   653  	}
   654  
   655  	// Ensure that the old connection is closed and a new connection is
   656  	// established to the first address in the new list.
   657  	if _, err := conn.CloseCh.Receive(ctx); err != nil {
   658  		t.Fatalf("Timeout when expecting existing connection to be closed: %v", err)
   659  	}
   660  	_, err = listeners[1].NewConnCh.Receive(ctx)
   661  	if err != nil {
   662  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   663  	}
   664  }
   665  
   666  // TestPickFirst_AddressUpdateWithBalancerAttributes tests the case where an
   667  // address update received by the pick_first LB policy differs in balancer
   668  // attributes, which are meant only for consumption by LB policies. In this
   669  // case, the test verifies that new connections are not created when the address
   670  // update only changes the balancer attributes.
   671  func (s) TestPickFirst_AddressUpdateWithBalancerAttributes(t *testing.T) {
   672  	cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2)
   673  
   674  	// Add a set of balancer attributes to the addresses before pushing them to
   675  	// the pick_first LB policy through the manual resolver.
   676  	addrs := stubBackendsToResolverAddrs(backends)
   677  	for i := range addrs {
   678  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i))
   679  	}
   680  	r.UpdateState(resolver.State{Addresses: addrs})
   681  
   682  	// Ensure that RPCs succeed to the expected backend.
   683  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   684  	defer cancel()
   685  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   686  		t.Fatal(err)
   687  	}
   688  
   689  	// Grab the wrapped connection from the listener wrapper. This will be used
   690  	// to verify the connection is not closed.
   691  	val, err := listeners[0].NewConnCh.Receive(ctx)
   692  	if err != nil {
   693  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   694  	}
   695  	conn := val.(*testutils.ConnWrapper)
   696  
   697  	// Add a set of balancer attributes to the addresses before pushing them to
   698  	// the pick_first LB policy through the manual resolver. Leave the order of
   699  	// the addresses unchanged.
   700  	for i := range addrs {
   701  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i))
   702  	}
   703  	r.UpdateState(resolver.State{Addresses: addrs})
   704  
   705  	// Ensure that no new connection is established, and ensure that the old
   706  	// connection is not closed.
   707  	for i := range listeners {
   708  		sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   709  		defer sCancel()
   710  		if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded {
   711  			t.Fatalf("Unexpected error when expecting no new connection: %v", err)
   712  		}
   713  	}
   714  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   715  	defer sCancel()
   716  	if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded {
   717  		t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err)
   718  	}
   719  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   720  		t.Fatal(err)
   721  	}
   722  
   723  	// Add a set of balancer attributes to the addresses before pushing them to
   724  	// the pick_first LB policy through the manual resolver. Reverse of the
   725  	// order of addresses.
   726  	for i := range addrs {
   727  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i))
   728  	}
   729  	addrs[0], addrs[1] = addrs[1], addrs[0]
   730  	r.UpdateState(resolver.State{Addresses: addrs})
   731  
   732  	// Ensure that no new connection is established, and ensure that the old
   733  	// connection is not closed.
   734  	for i := range listeners {
   735  		sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   736  		defer sCancel()
   737  		if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded {
   738  			t.Fatalf("Unexpected error when expecting no new connection: %v", err)
   739  		}
   740  	}
   741  	sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout)
   742  	defer sCancel()
   743  	if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded {
   744  		t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err)
   745  	}
   746  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   747  		t.Fatal(err)
   748  	}
   749  }
   750  
   751  // Tests the case where the pick_first LB policy receives an error from the name
   752  // resolver without previously receiving a good update. Verifies that the
   753  // channel moves to TRANSIENT_FAILURE and that error received from the name
   754  // resolver is propagated to the caller of an RPC.
   755  func (s) TestPickFirst_ResolverError_NoPreviousUpdate(t *testing.T) {
   756  	cc, r, _ := setupPickFirst(t, 0)
   757  
   758  	nrErr := errors.New("error from name resolver")
   759  	r.CC().ReportError(nrErr)
   760  
   761  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   762  	defer cancel()
   763  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   764  
   765  	client := testgrpc.NewTestServiceClient(cc)
   766  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   767  	if err == nil {
   768  		t.Fatalf("EmptyCall() succeeded when expected to fail with error: %v", nrErr)
   769  	}
   770  	if !strings.Contains(err.Error(), nrErr.Error()) {
   771  		t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, nrErr)
   772  	}
   773  }
   774  
   775  // Tests the case where the pick_first LB policy receives an error from the name
   776  // resolver after receiving a good update (and the channel is currently READY).
   777  // The test verifies that the channel continues to use the previously received
   778  // good update.
   779  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Ready(t *testing.T) {
   780  	cc, r, backends := setupPickFirst(t, 1)
   781  
   782  	addrs := stubBackendsToResolverAddrs(backends)
   783  	r.UpdateState(resolver.State{Addresses: addrs})
   784  
   785  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   786  	defer cancel()
   787  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   788  		t.Fatal(err)
   789  	}
   790  
   791  	nrErr := errors.New("error from name resolver")
   792  	r.CC().ReportError(nrErr)
   793  
   794  	// Ensure that RPCs continue to succeed for the next second.
   795  	client := testgrpc.NewTestServiceClient(cc)
   796  	for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) {
   797  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   798  			t.Fatalf("EmptyCall() failed: %v", err)
   799  		}
   800  	}
   801  }
   802  
   803  // Tests the case where the pick_first LB policy receives an error from the name
   804  // resolver after receiving a good update (and the channel is currently in
   805  // CONNECTING state). The test verifies that the channel continues to use the
   806  // previously received good update, and that RPCs don't fail with the error
   807  // received from the name resolver.
   808  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Connecting(t *testing.T) {
   809  	lis, err := testutils.LocalTCPListener()
   810  	if err != nil {
   811  		t.Fatalf("net.Listen() failed: %v", err)
   812  	}
   813  
   814  	// Listen on a local port and act like a server that blocks until the
   815  	// channel reaches CONNECTING and closes the connection without sending a
   816  	// server preface.
   817  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   818  	defer cancel()
   819  	waitForConnecting := make(chan struct{})
   820  	go func() {
   821  		conn, err := lis.Accept()
   822  		if err != nil {
   823  			t.Errorf("Unexpected error when accepting a connection: %v", err)
   824  		}
   825  		defer conn.Close()
   826  
   827  		select {
   828  		case <-waitForConnecting:
   829  		case <-ctx.Done():
   830  			t.Error("Timeout when waiting for channel to move to CONNECTING state")
   831  		}
   832  	}()
   833  
   834  	r := manual.NewBuilderWithScheme("whatever")
   835  	dopts := []grpc.DialOption{
   836  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   837  		grpc.WithResolvers(r),
   838  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   839  	}
   840  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   841  	if err != nil {
   842  		t.Fatalf("grpc.NewClient() failed: %v", err)
   843  	}
   844  	t.Cleanup(func() { cc.Close() })
   845  	cc.Connect()
   846  	addrs := []resolver.Address{{Addr: lis.Addr().String()}}
   847  	r.UpdateState(resolver.State{Addresses: addrs})
   848  	testutils.AwaitState(ctx, t, cc, connectivity.Connecting)
   849  
   850  	nrErr := errors.New("error from name resolver")
   851  	r.CC().ReportError(nrErr)
   852  
   853  	// RPCs should fail with deadline exceed error as long as they are in
   854  	// CONNECTING and not the error returned by the name resolver.
   855  	client := testgrpc.NewTestServiceClient(cc)
   856  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   857  	defer sCancel()
   858  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
   859  		t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, context.DeadlineExceeded)
   860  	}
   861  
   862  	// Closing this channel leads to closing of the connection by our listener.
   863  	// gRPC should see this as a connection error.
   864  	close(waitForConnecting)
   865  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   866  	checkForConnectionError(ctx, t, cc)
   867  }
   868  
   869  // Tests the case where the pick_first LB policy receives an error from the name
   870  // resolver after receiving a good update. The previous good update though has
   871  // seen the channel move to TRANSIENT_FAILURE.  The test verifies that the
   872  // channel fails RPCs with the new error from the resolver.
   873  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_TransientFailure(t *testing.T) {
   874  	lis, err := testutils.LocalTCPListener()
   875  	if err != nil {
   876  		t.Fatalf("net.Listen() failed: %v", err)
   877  	}
   878  
   879  	// Listen on a local port and act like a server that closes the connection
   880  	// without sending a server preface.
   881  	go func() {
   882  		conn, err := lis.Accept()
   883  		if err != nil {
   884  			t.Errorf("Unexpected error when accepting a connection: %v", err)
   885  		}
   886  		conn.Close()
   887  	}()
   888  
   889  	r := manual.NewBuilderWithScheme("whatever")
   890  	dopts := []grpc.DialOption{
   891  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   892  		grpc.WithResolvers(r),
   893  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   894  	}
   895  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...)
   896  	if err != nil {
   897  		t.Fatalf("grpc.NewClient() failed: %v", err)
   898  	}
   899  	t.Cleanup(func() { cc.Close() })
   900  	cc.Connect()
   901  	addrs := []resolver.Address{{Addr: lis.Addr().String()}}
   902  	r.UpdateState(resolver.State{Addresses: addrs})
   903  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   904  	defer cancel()
   905  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   906  	checkForConnectionError(ctx, t, cc)
   907  
   908  	// An error from the name resolver should result in RPCs failing with that
   909  	// error instead of the old error that caused the channel to move to
   910  	// TRANSIENT_FAILURE in the first place.
   911  	nrErr := errors.New("error from name resolver")
   912  	r.CC().ReportError(nrErr)
   913  	client := testgrpc.NewTestServiceClient(cc)
   914  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   915  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), nrErr.Error()) {
   916  			break
   917  		}
   918  	}
   919  	if ctx.Err() != nil {
   920  		t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver")
   921  	}
   922  }
   923  
   924  func checkForConnectionError(ctx context.Context, t *testing.T, cc *grpc.ClientConn) {
   925  	t.Helper()
   926  
   927  	// RPCs may fail on the client side in two ways, once the fake server closes
   928  	// the accepted connection:
   929  	// - writing the client preface succeeds, but not reading the server preface
   930  	// - writing the client preface fails
   931  	// In either case, we should see it fail with UNAVAILABLE.
   932  	client := testgrpc.NewTestServiceClient(cc)
   933  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable {
   934  		t.Fatalf("EmptyCall() failed with error: %v, want code %v", err, codes.Unavailable)
   935  	}
   936  }
   937  
   938  // Tests the case where the pick_first LB policy receives an update from the
   939  // name resolver with no addresses after receiving a good update. The test
   940  // verifies that the channel fails RPCs with an error indicating the fact that
   941  // the name resolver returned no addresses.
   942  func (s) TestPickFirst_ResolverError_ZeroAddresses_WithPreviousUpdate(t *testing.T) {
   943  	cc, r, backends := setupPickFirst(t, 1)
   944  
   945  	addrs := stubBackendsToResolverAddrs(backends)
   946  	r.UpdateState(resolver.State{Addresses: addrs})
   947  
   948  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   949  	defer cancel()
   950  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   951  		t.Fatal(err)
   952  	}
   953  
   954  	r.UpdateState(resolver.State{})
   955  	wantErr := "produced zero addresses"
   956  	client := testgrpc.NewTestServiceClient(cc)
   957  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   958  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), wantErr) {
   959  			break
   960  		}
   961  	}
   962  	if ctx.Err() != nil {
   963  		t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver")
   964  	}
   965  }