google.golang.org/grpc@v1.62.1/test/pickfirst_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"strings"
    26  	"testing"
    27  	"time"
    28  
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/backoff"
    31  	"google.golang.org/grpc/codes"
    32  	"google.golang.org/grpc/connectivity"
    33  	"google.golang.org/grpc/credentials/insecure"
    34  	"google.golang.org/grpc/internal"
    35  	"google.golang.org/grpc/internal/channelz"
    36  	"google.golang.org/grpc/internal/grpcrand"
    37  	"google.golang.org/grpc/internal/stubserver"
    38  	"google.golang.org/grpc/internal/testutils"
    39  	"google.golang.org/grpc/internal/testutils/pickfirst"
    40  	"google.golang.org/grpc/resolver"
    41  	"google.golang.org/grpc/resolver/manual"
    42  	"google.golang.org/grpc/serviceconfig"
    43  	"google.golang.org/grpc/status"
    44  
    45  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    46  	testpb "google.golang.org/grpc/interop/grpc_testing"
    47  )
    48  
    49  const pickFirstServiceConfig = `{"loadBalancingConfig": [{"pick_first":{}}]}`
    50  
    51  // setupPickFirst performs steps required for pick_first tests. It starts a
    52  // bunch of backends exporting the TestService, creates a ClientConn to them
    53  // with service config specifying the use of the pick_first LB policy.
    54  func setupPickFirst(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer) {
    55  	t.Helper()
    56  
    57  	r := manual.NewBuilderWithScheme("whatever")
    58  
    59  	backends := make([]*stubserver.StubServer, backendCount)
    60  	addrs := make([]resolver.Address, backendCount)
    61  	for i := 0; i < backendCount; i++ {
    62  		backend := &stubserver.StubServer{
    63  			EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
    64  				return &testpb.Empty{}, nil
    65  			},
    66  		}
    67  		if err := backend.StartServer(); err != nil {
    68  			t.Fatalf("Failed to start backend: %v", err)
    69  		}
    70  		t.Logf("Started TestService backend at: %q", backend.Address)
    71  		t.Cleanup(func() { backend.Stop() })
    72  
    73  		backends[i] = backend
    74  		addrs[i] = resolver.Address{Addr: backend.Address}
    75  	}
    76  
    77  	dopts := []grpc.DialOption{
    78  		grpc.WithTransportCredentials(insecure.NewCredentials()),
    79  		grpc.WithResolvers(r),
    80  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
    81  	}
    82  	dopts = append(dopts, opts...)
    83  	cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...)
    84  	if err != nil {
    85  		t.Fatalf("grpc.Dial() failed: %v", err)
    86  	}
    87  	t.Cleanup(func() { cc.Close() })
    88  
    89  	// At this point, the resolver has not returned any addresses to the channel.
    90  	// This RPC must block until the context expires.
    91  	sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
    92  	defer sCancel()
    93  	client := testgrpc.NewTestServiceClient(cc)
    94  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
    95  		t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded)
    96  	}
    97  	return cc, r, backends
    98  }
    99  
   100  // stubBackendsToResolverAddrs converts from a set of stub server backends to
   101  // resolver addresses. Useful when pushing addresses to the manual resolver.
   102  func stubBackendsToResolverAddrs(backends []*stubserver.StubServer) []resolver.Address {
   103  	addrs := make([]resolver.Address, len(backends))
   104  	for i, backend := range backends {
   105  		addrs[i] = resolver.Address{Addr: backend.Address}
   106  	}
   107  	return addrs
   108  }
   109  
   110  // TestPickFirst_OneBackend tests the most basic scenario for pick_first. It
   111  // brings up a single backend and verifies that all RPCs get routed to it.
   112  func (s) TestPickFirst_OneBackend(t *testing.T) {
   113  	cc, r, backends := setupPickFirst(t, 1)
   114  
   115  	addrs := stubBackendsToResolverAddrs(backends)
   116  	r.UpdateState(resolver.State{Addresses: addrs})
   117  
   118  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   119  	defer cancel()
   120  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   121  		t.Fatal(err)
   122  	}
   123  }
   124  
   125  // TestPickFirst_MultipleBackends tests the scenario with multiple backends and
   126  // verifies that all RPCs get routed to the first one.
   127  func (s) TestPickFirst_MultipleBackends(t *testing.T) {
   128  	cc, r, backends := setupPickFirst(t, 2)
   129  
   130  	addrs := stubBackendsToResolverAddrs(backends)
   131  	r.UpdateState(resolver.State{Addresses: addrs})
   132  
   133  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   134  	defer cancel()
   135  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   136  		t.Fatal(err)
   137  	}
   138  }
   139  
   140  // TestPickFirst_OneServerDown tests the scenario where we have multiple
   141  // backends and pick_first is working as expected. Verifies that RPCs get routed
   142  // to the next backend in the list when the first one goes down.
   143  func (s) TestPickFirst_OneServerDown(t *testing.T) {
   144  	cc, r, backends := setupPickFirst(t, 2)
   145  
   146  	addrs := stubBackendsToResolverAddrs(backends)
   147  	r.UpdateState(resolver.State{Addresses: addrs})
   148  
   149  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   150  	defer cancel()
   151  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   152  		t.Fatal(err)
   153  	}
   154  
   155  	// Stop the backend which is currently being used. RPCs should get routed to
   156  	// the next backend in the list.
   157  	backends[0].Stop()
   158  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   159  		t.Fatal(err)
   160  	}
   161  }
   162  
   163  // TestPickFirst_AllServersDown tests the scenario where we have multiple
   164  // backends and pick_first is working as expected. When all backends go down,
   165  // the test verifies that RPCs fail with appropriate status code.
   166  func (s) TestPickFirst_AllServersDown(t *testing.T) {
   167  	cc, r, backends := setupPickFirst(t, 2)
   168  
   169  	addrs := stubBackendsToResolverAddrs(backends)
   170  	r.UpdateState(resolver.State{Addresses: addrs})
   171  
   172  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   173  	defer cancel()
   174  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   175  		t.Fatal(err)
   176  	}
   177  
   178  	for _, b := range backends {
   179  		b.Stop()
   180  	}
   181  
   182  	client := testgrpc.NewTestServiceClient(cc)
   183  	for {
   184  		if ctx.Err() != nil {
   185  			t.Fatalf("channel failed to move to Unavailable after all backends were stopped: %v", ctx.Err())
   186  		}
   187  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) == codes.Unavailable {
   188  			return
   189  		}
   190  		time.Sleep(defaultTestShortTimeout)
   191  	}
   192  }
   193  
   194  // TestPickFirst_AddressesRemoved tests the scenario where we have multiple
   195  // backends and pick_first is working as expected. It then verifies that when
   196  // addresses are removed by the name resolver, RPCs get routed appropriately.
   197  func (s) TestPickFirst_AddressesRemoved(t *testing.T) {
   198  	cc, r, backends := setupPickFirst(t, 3)
   199  
   200  	addrs := stubBackendsToResolverAddrs(backends)
   201  	r.UpdateState(resolver.State{Addresses: addrs})
   202  
   203  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   204  	defer cancel()
   205  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   206  		t.Fatal(err)
   207  	}
   208  
   209  	// Remove the first backend from the list of addresses originally pushed.
   210  	// RPCs should get routed to the first backend in the new list.
   211  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2]}})
   212  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   213  		t.Fatal(err)
   214  	}
   215  
   216  	// Append the backend that we just removed to the end of the list.
   217  	// Nothing should change.
   218  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2], addrs[0]}})
   219  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   220  		t.Fatal(err)
   221  	}
   222  
   223  	// Remove the first backend from the existing list of addresses.
   224  	// RPCs should get routed to the first backend in the new list.
   225  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[0]}})
   226  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[2]); err != nil {
   227  		t.Fatal(err)
   228  	}
   229  
   230  	// Remove the first backend from the existing list of addresses.
   231  	// RPCs should get routed to the first backend in the new list.
   232  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0]}})
   233  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   234  		t.Fatal(err)
   235  	}
   236  }
   237  
   238  // TestPickFirst_NewAddressWhileBlocking tests the case where pick_first is
   239  // configured on a channel, things are working as expected and then a resolver
   240  // updates removes all addresses. An RPC attempted at this point in time will be
   241  // blocked because there are no valid backends. This test verifies that when new
   242  // backends are added, the RPC is able to complete.
   243  func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) {
   244  	cc, r, backends := setupPickFirst(t, 2)
   245  	addrs := stubBackendsToResolverAddrs(backends)
   246  	r.UpdateState(resolver.State{Addresses: addrs})
   247  
   248  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   249  	defer cancel()
   250  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   251  		t.Fatal(err)
   252  	}
   253  
   254  	// Send a resolver update with no addresses. This should push the channel into
   255  	// TransientFailure.
   256  	r.UpdateState(resolver.State{})
   257  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   258  
   259  	doneCh := make(chan struct{})
   260  	client := testgrpc.NewTestServiceClient(cc)
   261  	go func() {
   262  		// The channel is currently in TransientFailure and this RPC will block
   263  		// until the channel becomes Ready, which will only happen when we push a
   264  		// resolver update with a valid backend address.
   265  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
   266  			t.Errorf("EmptyCall() = %v, want <nil>", err)
   267  		}
   268  		close(doneCh)
   269  	}()
   270  
   271  	// Make sure that there is one pending RPC on the ClientConn before attempting
   272  	// to push new addresses through the name resolver. If we don't do this, the
   273  	// resolver update can happen before the above goroutine gets to make the RPC.
   274  	for {
   275  		if err := ctx.Err(); err != nil {
   276  			t.Fatal(err)
   277  		}
   278  		tcs, _ := channelz.GetTopChannels(0, 0)
   279  		if len(tcs) != 1 {
   280  			t.Fatalf("there should only be one top channel, not %d", len(tcs))
   281  		}
   282  		started := tcs[0].ChannelData.CallsStarted
   283  		completed := tcs[0].ChannelData.CallsSucceeded + tcs[0].ChannelData.CallsFailed
   284  		if (started - completed) == 1 {
   285  			break
   286  		}
   287  		time.Sleep(defaultTestShortTimeout)
   288  	}
   289  
   290  	// Send a resolver update with a valid backend to push the channel to Ready
   291  	// and unblock the above RPC.
   292  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0].Address}}})
   293  
   294  	select {
   295  	case <-ctx.Done():
   296  		t.Fatal("Timeout when waiting for blocked RPC to complete")
   297  	case <-doneCh:
   298  	}
   299  }
   300  
   301  // TestPickFirst_StickyTransientFailure tests the case where pick_first is
   302  // configured on a channel, and the backend is configured to close incoming
   303  // connections as soon as they are accepted. The test verifies that the channel
   304  // enters TransientFailure and stays there. The test also verifies that the
   305  // pick_first LB policy is constantly trying to reconnect to the backend.
   306  func (s) TestPickFirst_StickyTransientFailure(t *testing.T) {
   307  	// Spin up a local server which closes the connection as soon as it receives
   308  	// one. It also sends a signal on a channel whenver it received a connection.
   309  	lis, err := testutils.LocalTCPListener()
   310  	if err != nil {
   311  		t.Fatalf("Failed to create listener: %v", err)
   312  	}
   313  	t.Cleanup(func() { lis.Close() })
   314  
   315  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   316  	defer cancel()
   317  	connCh := make(chan struct{}, 1)
   318  	go func() {
   319  		for {
   320  			conn, err := lis.Accept()
   321  			if err != nil {
   322  				return
   323  			}
   324  			select {
   325  			case connCh <- struct{}{}:
   326  				conn.Close()
   327  			case <-ctx.Done():
   328  				return
   329  			}
   330  		}
   331  	}()
   332  
   333  	// Dial the above server with a ConnectParams that does a constant backoff
   334  	// of defaultTestShortTimeout duration.
   335  	dopts := []grpc.DialOption{
   336  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   337  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   338  		grpc.WithConnectParams(grpc.ConnectParams{
   339  			Backoff: backoff.Config{
   340  				BaseDelay:  defaultTestShortTimeout,
   341  				Multiplier: float64(0),
   342  				Jitter:     float64(0),
   343  				MaxDelay:   defaultTestShortTimeout,
   344  			},
   345  		}),
   346  	}
   347  	cc, err := grpc.Dial(lis.Addr().String(), dopts...)
   348  	if err != nil {
   349  		t.Fatalf("Failed to dial server at %q: %v", lis.Addr(), err)
   350  	}
   351  	t.Cleanup(func() { cc.Close() })
   352  
   353  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   354  
   355  	// Spawn a goroutine to ensure that the channel stays in TransientFailure.
   356  	// The call to cc.WaitForStateChange will return false when the main
   357  	// goroutine exits and the context is cancelled.
   358  	go func() {
   359  		if cc.WaitForStateChange(ctx, connectivity.TransientFailure) {
   360  			if state := cc.GetState(); state != connectivity.Shutdown {
   361  				t.Errorf("Unexpected state change from TransientFailure to %s", cc.GetState())
   362  			}
   363  		}
   364  	}()
   365  
   366  	// Ensures that the pick_first LB policy is constantly trying to reconnect.
   367  	for i := 0; i < 10; i++ {
   368  		select {
   369  		case <-connCh:
   370  		case <-time.After(2 * defaultTestShortTimeout):
   371  			t.Error("Timeout when waiting for pick_first to reconnect")
   372  		}
   373  	}
   374  }
   375  
   376  // Tests the PF LB policy with shuffling enabled.
   377  func (s) TestPickFirst_ShuffleAddressList(t *testing.T) {
   378  	const serviceConfig = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}`
   379  
   380  	// Install a shuffler that always reverses two entries.
   381  	origShuf := grpcrand.Shuffle
   382  	defer func() { grpcrand.Shuffle = origShuf }()
   383  	grpcrand.Shuffle = func(n int, f func(int, int)) {
   384  		if n != 2 {
   385  			t.Errorf("Shuffle called with n=%v; want 2", n)
   386  			return
   387  		}
   388  		f(0, 1) // reverse the two addresses
   389  	}
   390  
   391  	// Set up our backends.
   392  	cc, r, backends := setupPickFirst(t, 2)
   393  	addrs := stubBackendsToResolverAddrs(backends)
   394  
   395  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   396  	defer cancel()
   397  
   398  	// Push an update with both addresses and shuffling disabled.  We should
   399  	// connect to backend 0.
   400  	r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}})
   401  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   402  		t.Fatal(err)
   403  	}
   404  
   405  	// Send a config with shuffling enabled.  This will reverse the addresses,
   406  	// but the channel should still be connected to backend 0.
   407  	shufState := resolver.State{
   408  		ServiceConfig: parseServiceConfig(t, r, serviceConfig),
   409  		Addresses:     []resolver.Address{addrs[0], addrs[1]},
   410  	}
   411  	r.UpdateState(shufState)
   412  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   413  		t.Fatal(err)
   414  	}
   415  
   416  	// Send a resolver update with no addresses. This should push the channel
   417  	// into TransientFailure.
   418  	r.UpdateState(resolver.State{})
   419  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   420  
   421  	// Send the same config as last time with shuffling enabled.  Since we are
   422  	// not connected to backend 0, we should connect to backend 1.
   423  	r.UpdateState(shufState)
   424  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   425  		t.Fatal(err)
   426  	}
   427  }
   428  
   429  // Test config parsing with the env var turned on and off for various scenarios.
   430  func (s) TestPickFirst_ParseConfig_Success(t *testing.T) {
   431  	// Install a shuffler that always reverses two entries.
   432  	origShuf := grpcrand.Shuffle
   433  	defer func() { grpcrand.Shuffle = origShuf }()
   434  	grpcrand.Shuffle = func(n int, f func(int, int)) {
   435  		if n != 2 {
   436  			t.Errorf("Shuffle called with n=%v; want 2", n)
   437  			return
   438  		}
   439  		f(0, 1) // reverse the two addresses
   440  	}
   441  
   442  	tests := []struct {
   443  		name          string
   444  		serviceConfig string
   445  		wantFirstAddr bool
   446  	}{
   447  		{
   448  			name:          "empty pickfirst config",
   449  			serviceConfig: `{"loadBalancingConfig": [{"pick_first":{}}]}`,
   450  			wantFirstAddr: true,
   451  		},
   452  		{
   453  			name:          "empty good pickfirst config",
   454  			serviceConfig: `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}`,
   455  			wantFirstAddr: false,
   456  		},
   457  	}
   458  
   459  	for _, test := range tests {
   460  		t.Run(test.name, func(t *testing.T) {
   461  			// Set up our backends.
   462  			cc, r, backends := setupPickFirst(t, 2)
   463  			addrs := stubBackendsToResolverAddrs(backends)
   464  
   465  			r.UpdateState(resolver.State{
   466  				ServiceConfig: parseServiceConfig(t, r, test.serviceConfig),
   467  				Addresses:     addrs,
   468  			})
   469  
   470  			// Some tests expect address shuffling to happen, and indicate that
   471  			// by setting wantFirstAddr to false (since our shuffling function
   472  			// defined at the top of this test, simply reverses the list of
   473  			// addresses provided to it).
   474  			wantAddr := addrs[0]
   475  			if !test.wantFirstAddr {
   476  				wantAddr = addrs[1]
   477  			}
   478  
   479  			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   480  			defer cancel()
   481  			if err := pickfirst.CheckRPCsToBackend(ctx, cc, wantAddr); err != nil {
   482  				t.Fatal(err)
   483  			}
   484  		})
   485  	}
   486  }
   487  
   488  // Test config parsing for a bad service config.
   489  func (s) TestPickFirst_ParseConfig_Failure(t *testing.T) {
   490  	// Service config should fail with the below config. Name resolvers are
   491  	// expected to perform this parsing before they push the parsed service
   492  	// config to the channel.
   493  	const sc = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": 666 }}]}`
   494  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(sc)
   495  	if scpr.Err == nil {
   496  		t.Fatalf("ParseConfig() succeeded and returned %+v, when expected to fail", scpr)
   497  	}
   498  }
   499  
   500  // setupPickFirstWithListenerWrapper is very similar to setupPickFirst, but uses
   501  // a wrapped listener that the test can use to track accepted connections.
   502  func setupPickFirstWithListenerWrapper(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer, []*testutils.ListenerWrapper) {
   503  	t.Helper()
   504  
   505  	backends := make([]*stubserver.StubServer, backendCount)
   506  	addrs := make([]resolver.Address, backendCount)
   507  	listeners := make([]*testutils.ListenerWrapper, backendCount)
   508  	for i := 0; i < backendCount; i++ {
   509  		lis := testutils.NewListenerWrapper(t, nil)
   510  		backend := &stubserver.StubServer{
   511  			Listener: lis,
   512  			EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
   513  				return &testpb.Empty{}, nil
   514  			},
   515  		}
   516  		if err := backend.StartServer(); err != nil {
   517  			t.Fatalf("Failed to start backend: %v", err)
   518  		}
   519  		t.Logf("Started TestService backend at: %q", backend.Address)
   520  		t.Cleanup(func() { backend.Stop() })
   521  
   522  		backends[i] = backend
   523  		addrs[i] = resolver.Address{Addr: backend.Address}
   524  		listeners[i] = lis
   525  	}
   526  
   527  	r := manual.NewBuilderWithScheme("whatever")
   528  	dopts := []grpc.DialOption{
   529  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   530  		grpc.WithResolvers(r),
   531  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   532  	}
   533  	dopts = append(dopts, opts...)
   534  	cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...)
   535  	if err != nil {
   536  		t.Fatalf("grpc.Dial() failed: %v", err)
   537  	}
   538  	t.Cleanup(func() { cc.Close() })
   539  
   540  	// At this point, the resolver has not returned any addresses to the channel.
   541  	// This RPC must block until the context expires.
   542  	sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   543  	defer sCancel()
   544  	client := testgrpc.NewTestServiceClient(cc)
   545  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   546  		t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded)
   547  	}
   548  	return cc, r, backends, listeners
   549  }
   550  
   551  // TestPickFirst_AddressUpdateWithAttributes tests the case where an address
   552  // update received by the pick_first LB policy differs in attributes. Addresses
   553  // which differ in attributes are considered different from the perspective of
   554  // subconn creation and connection establishment and the test verifies that new
   555  // connections are created when attributes change.
   556  func (s) TestPickFirst_AddressUpdateWithAttributes(t *testing.T) {
   557  	cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2)
   558  
   559  	// Add a set of attributes to the addresses before pushing them to the
   560  	// pick_first LB policy through the manual resolver.
   561  	addrs := stubBackendsToResolverAddrs(backends)
   562  	for i := range addrs {
   563  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i))
   564  	}
   565  	r.UpdateState(resolver.State{Addresses: addrs})
   566  
   567  	// Ensure that RPCs succeed to the first backend in the list.
   568  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   569  	defer cancel()
   570  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   571  		t.Fatal(err)
   572  	}
   573  
   574  	// Grab the wrapped connection from the listener wrapper. This will be used
   575  	// to verify the connection is closed.
   576  	val, err := listeners[0].NewConnCh.Receive(ctx)
   577  	if err != nil {
   578  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   579  	}
   580  	conn := val.(*testutils.ConnWrapper)
   581  
   582  	// Add another set of attributes to the addresses, and push them to the
   583  	// pick_first LB policy through the manual resolver. Leave the order of the
   584  	// addresses unchanged.
   585  	for i := range addrs {
   586  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i))
   587  	}
   588  	r.UpdateState(resolver.State{Addresses: addrs})
   589  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   590  		t.Fatal(err)
   591  	}
   592  
   593  	// A change in the address attributes results in the new address being
   594  	// considered different to the current address. This will result in the old
   595  	// connection being closed and a new connection to the same backend (since
   596  	// address order is not modified).
   597  	if _, err := conn.CloseCh.Receive(ctx); err != nil {
   598  		t.Fatalf("Timeout when expecting existing connection to be closed: %v", err)
   599  	}
   600  	val, err = listeners[0].NewConnCh.Receive(ctx)
   601  	if err != nil {
   602  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   603  	}
   604  	conn = val.(*testutils.ConnWrapper)
   605  
   606  	// Add another set of attributes to the addresses, and push them to the
   607  	// pick_first LB policy through the manual resolver.  Reverse of the order
   608  	// of addresses.
   609  	for i := range addrs {
   610  		addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i))
   611  	}
   612  	addrs[0], addrs[1] = addrs[1], addrs[0]
   613  	r.UpdateState(resolver.State{Addresses: addrs})
   614  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   615  		t.Fatal(err)
   616  	}
   617  
   618  	// Ensure that the old connection is closed and a new connection is
   619  	// established to the first address in the new list.
   620  	if _, err := conn.CloseCh.Receive(ctx); err != nil {
   621  		t.Fatalf("Timeout when expecting existing connection to be closed: %v", err)
   622  	}
   623  	_, err = listeners[1].NewConnCh.Receive(ctx)
   624  	if err != nil {
   625  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   626  	}
   627  }
   628  
   629  // TestPickFirst_AddressUpdateWithBalancerAttributes tests the case where an
   630  // address update received by the pick_first LB policy differs in balancer
   631  // attributes, which are meant only for consumption by LB policies. In this
   632  // case, the test verifies that new connections are not created when the address
   633  // update only changes the balancer attributes.
   634  func (s) TestPickFirst_AddressUpdateWithBalancerAttributes(t *testing.T) {
   635  	cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2)
   636  
   637  	// Add a set of balancer attributes to the addresses before pushing them to
   638  	// the pick_first LB policy through the manual resolver.
   639  	addrs := stubBackendsToResolverAddrs(backends)
   640  	for i := range addrs {
   641  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i))
   642  	}
   643  	r.UpdateState(resolver.State{Addresses: addrs})
   644  
   645  	// Ensure that RPCs succeed to the expected backend.
   646  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   647  	defer cancel()
   648  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   649  		t.Fatal(err)
   650  	}
   651  
   652  	// Grab the wrapped connection from the listener wrapper. This will be used
   653  	// to verify the connection is not closed.
   654  	val, err := listeners[0].NewConnCh.Receive(ctx)
   655  	if err != nil {
   656  		t.Fatalf("Failed to receive new connection from wrapped listener: %v", err)
   657  	}
   658  	conn := val.(*testutils.ConnWrapper)
   659  
   660  	// Add a set of balancer attributes to the addresses before pushing them to
   661  	// the pick_first LB policy through the manual resolver. Leave the order of
   662  	// the addresses unchanged.
   663  	for i := range addrs {
   664  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i))
   665  	}
   666  	r.UpdateState(resolver.State{Addresses: addrs})
   667  
   668  	// Ensure that no new connection is established, and ensure that the old
   669  	// connection is not closed.
   670  	for i := range listeners {
   671  		sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   672  		defer sCancel()
   673  		if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded {
   674  			t.Fatalf("Unexpected error when expecting no new connection: %v", err)
   675  		}
   676  	}
   677  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   678  	defer sCancel()
   679  	if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded {
   680  		t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err)
   681  	}
   682  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   683  		t.Fatal(err)
   684  	}
   685  
   686  	// Add a set of balancer attributes to the addresses before pushing them to
   687  	// the pick_first LB policy through the manual resolver. Reverse of the
   688  	// order of addresses.
   689  	for i := range addrs {
   690  		addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i))
   691  	}
   692  	addrs[0], addrs[1] = addrs[1], addrs[0]
   693  	r.UpdateState(resolver.State{Addresses: addrs})
   694  
   695  	// Ensure that no new connection is established, and ensure that the old
   696  	// connection is not closed.
   697  	for i := range listeners {
   698  		sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   699  		defer sCancel()
   700  		if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded {
   701  			t.Fatalf("Unexpected error when expecting no new connection: %v", err)
   702  		}
   703  	}
   704  	sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout)
   705  	defer sCancel()
   706  	if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded {
   707  		t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err)
   708  	}
   709  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil {
   710  		t.Fatal(err)
   711  	}
   712  }
   713  
   714  // Tests the case where the pick_first LB policy receives an error from the name
   715  // resolver without previously receiving a good update. Verifies that the
   716  // channel moves to TRANSIENT_FAILURE and that error received from the name
   717  // resolver is propagated to the caller of an RPC.
   718  func (s) TestPickFirst_ResolverError_NoPreviousUpdate(t *testing.T) {
   719  	cc, r, _ := setupPickFirst(t, 0)
   720  
   721  	nrErr := errors.New("error from name resolver")
   722  	r.ReportError(nrErr)
   723  
   724  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   725  	defer cancel()
   726  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   727  
   728  	client := testgrpc.NewTestServiceClient(cc)
   729  	_, err := client.EmptyCall(ctx, &testpb.Empty{})
   730  	if err == nil {
   731  		t.Fatalf("EmptyCall() succeeded when expected to fail with error: %v", nrErr)
   732  	}
   733  	if !strings.Contains(err.Error(), nrErr.Error()) {
   734  		t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, nrErr)
   735  	}
   736  }
   737  
   738  // Tests the case where the pick_first LB policy receives an error from the name
   739  // resolver after receiving a good update (and the channel is currently READY).
   740  // The test verifies that the channel continues to use the previously received
   741  // good update.
   742  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Ready(t *testing.T) {
   743  	cc, r, backends := setupPickFirst(t, 1)
   744  
   745  	addrs := stubBackendsToResolverAddrs(backends)
   746  	r.UpdateState(resolver.State{Addresses: addrs})
   747  
   748  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   749  	defer cancel()
   750  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   751  		t.Fatal(err)
   752  	}
   753  
   754  	nrErr := errors.New("error from name resolver")
   755  	r.ReportError(nrErr)
   756  
   757  	// Ensure that RPCs continue to succeed for the next second.
   758  	client := testgrpc.NewTestServiceClient(cc)
   759  	for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) {
   760  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   761  			t.Fatalf("EmptyCall() failed: %v", err)
   762  		}
   763  	}
   764  }
   765  
   766  // Tests the case where the pick_first LB policy receives an error from the name
   767  // resolver after receiving a good update (and the channel is currently in
   768  // CONNECTING state). The test verifies that the channel continues to use the
   769  // previously received good update, and that RPCs don't fail with the error
   770  // received from the name resolver.
   771  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Connecting(t *testing.T) {
   772  	lis, err := testutils.LocalTCPListener()
   773  	if err != nil {
   774  		t.Fatalf("net.Listen() failed: %v", err)
   775  	}
   776  
   777  	// Listen on a local port and act like a server that blocks until the
   778  	// channel reaches CONNECTING and closes the connection without sending a
   779  	// server preface.
   780  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   781  	defer cancel()
   782  	waitForConnecting := make(chan struct{})
   783  	go func() {
   784  		conn, err := lis.Accept()
   785  		if err != nil {
   786  			t.Errorf("Unexpected error when accepting a connection: %v", err)
   787  		}
   788  		defer conn.Close()
   789  
   790  		select {
   791  		case <-waitForConnecting:
   792  		case <-ctx.Done():
   793  			t.Error("Timeout when waiting for channel to move to CONNECTING state")
   794  		}
   795  	}()
   796  
   797  	r := manual.NewBuilderWithScheme("whatever")
   798  	dopts := []grpc.DialOption{
   799  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   800  		grpc.WithResolvers(r),
   801  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   802  	}
   803  	cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...)
   804  	if err != nil {
   805  		t.Fatalf("grpc.Dial() failed: %v", err)
   806  	}
   807  	t.Cleanup(func() { cc.Close() })
   808  
   809  	addrs := []resolver.Address{{Addr: lis.Addr().String()}}
   810  	r.UpdateState(resolver.State{Addresses: addrs})
   811  	testutils.AwaitState(ctx, t, cc, connectivity.Connecting)
   812  
   813  	nrErr := errors.New("error from name resolver")
   814  	r.ReportError(nrErr)
   815  
   816  	// RPCs should fail with deadline exceed error as long as they are in
   817  	// CONNECTING and not the error returned by the name resolver.
   818  	client := testgrpc.NewTestServiceClient(cc)
   819  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   820  	defer sCancel()
   821  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
   822  		t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, context.DeadlineExceeded)
   823  	}
   824  
   825  	// Closing this channel leads to closing of the connection by our listener.
   826  	// gRPC should see this as a connection error.
   827  	close(waitForConnecting)
   828  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   829  	checkForConnectionError(ctx, t, cc)
   830  }
   831  
   832  // Tests the case where the pick_first LB policy receives an error from the name
   833  // resolver after receiving a good update. The previous good update though has
   834  // seen the channel move to TRANSIENT_FAILURE.  The test verifies that the
   835  // channel fails RPCs with the new error from the resolver.
   836  func (s) TestPickFirst_ResolverError_WithPreviousUpdate_TransientFailure(t *testing.T) {
   837  	lis, err := testutils.LocalTCPListener()
   838  	if err != nil {
   839  		t.Fatalf("net.Listen() failed: %v", err)
   840  	}
   841  
   842  	// Listen on a local port and act like a server that closes the connection
   843  	// without sending a server preface.
   844  	go func() {
   845  		conn, err := lis.Accept()
   846  		if err != nil {
   847  			t.Errorf("Unexpected error when accepting a connection: %v", err)
   848  		}
   849  		conn.Close()
   850  	}()
   851  
   852  	r := manual.NewBuilderWithScheme("whatever")
   853  	dopts := []grpc.DialOption{
   854  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   855  		grpc.WithResolvers(r),
   856  		grpc.WithDefaultServiceConfig(pickFirstServiceConfig),
   857  	}
   858  	cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...)
   859  	if err != nil {
   860  		t.Fatalf("grpc.Dial() failed: %v", err)
   861  	}
   862  	t.Cleanup(func() { cc.Close() })
   863  
   864  	addrs := []resolver.Address{{Addr: lis.Addr().String()}}
   865  	r.UpdateState(resolver.State{Addresses: addrs})
   866  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   867  	defer cancel()
   868  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   869  	checkForConnectionError(ctx, t, cc)
   870  
   871  	// An error from the name resolver should result in RPCs failing with that
   872  	// error instead of the old error that caused the channel to move to
   873  	// TRANSIENT_FAILURE in the first place.
   874  	nrErr := errors.New("error from name resolver")
   875  	r.ReportError(nrErr)
   876  	client := testgrpc.NewTestServiceClient(cc)
   877  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   878  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), nrErr.Error()) {
   879  			break
   880  		}
   881  	}
   882  	if ctx.Err() != nil {
   883  		t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver")
   884  	}
   885  }
   886  
   887  func checkForConnectionError(ctx context.Context, t *testing.T, cc *grpc.ClientConn) {
   888  	t.Helper()
   889  
   890  	// RPCs may fail on the client side in two ways, once the fake server closes
   891  	// the accepted connection:
   892  	// - writing the client preface succeeds, but not reading the server preface
   893  	// - writing the client preface fails
   894  	// In either case, we should see it fail with UNAVAILABLE.
   895  	client := testgrpc.NewTestServiceClient(cc)
   896  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable {
   897  		t.Fatalf("EmptyCall() failed with error: %v, want code %v", err, codes.Unavailable)
   898  	}
   899  }
   900  
   901  // Tests the case where the pick_first LB policy receives an update from the
   902  // name resolver with no addresses after receiving a good update. The test
   903  // verifies that the channel fails RPCs with an error indicating the fact that
   904  // the name resolver returned no addresses.
   905  func (s) TestPickFirst_ResolverError_ZeroAddresses_WithPreviousUpdate(t *testing.T) {
   906  	cc, r, backends := setupPickFirst(t, 1)
   907  
   908  	addrs := stubBackendsToResolverAddrs(backends)
   909  	r.UpdateState(resolver.State{Addresses: addrs})
   910  
   911  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   912  	defer cancel()
   913  	if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil {
   914  		t.Fatal(err)
   915  	}
   916  
   917  	r.UpdateState(resolver.State{})
   918  	wantErr := "produced zero addresses"
   919  	client := testgrpc.NewTestServiceClient(cc)
   920  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   921  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), wantErr) {
   922  			break
   923  		}
   924  	}
   925  	if ctx.Err() != nil {
   926  		t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver")
   927  	}
   928  }