google.golang.org/grpc@v1.72.2/test/healthcheck_test.go (about)

     1  /*
     2   *
     3   * Copyright 2018 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"net"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"google.golang.org/grpc"
    31  	"google.golang.org/grpc/balancer"
    32  	"google.golang.org/grpc/balancer/pickfirst"
    33  	"google.golang.org/grpc/balancer/roundrobin"
    34  	"google.golang.org/grpc/codes"
    35  	"google.golang.org/grpc/connectivity"
    36  	"google.golang.org/grpc/credentials/insecure"
    37  	"google.golang.org/grpc/health"
    38  	"google.golang.org/grpc/internal"
    39  	"google.golang.org/grpc/internal/balancer/stub"
    40  	"google.golang.org/grpc/internal/channelz"
    41  	"google.golang.org/grpc/internal/grpctest"
    42  	"google.golang.org/grpc/internal/testutils"
    43  	"google.golang.org/grpc/resolver"
    44  	"google.golang.org/grpc/resolver/manual"
    45  	"google.golang.org/grpc/status"
    46  
    47  	healthgrpc "google.golang.org/grpc/health/grpc_health_v1"
    48  	healthpb "google.golang.org/grpc/health/grpc_health_v1"
    49  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    50  	testpb "google.golang.org/grpc/interop/grpc_testing"
    51  )
    52  
    53  func newTestHealthServer() *testHealthServer {
    54  	return newTestHealthServerWithWatchFunc(defaultWatchFunc)
    55  }
    56  
    57  func newTestHealthServerWithWatchFunc(f healthWatchFunc) *testHealthServer {
    58  	return &testHealthServer{
    59  		watchFunc: f,
    60  		update:    make(chan struct{}, 1),
    61  		status:    make(map[string]healthpb.HealthCheckResponse_ServingStatus),
    62  	}
    63  }
    64  
    65  // defaultWatchFunc will send a HealthCheckResponse to the client whenever SetServingStatus is called.
    66  func defaultWatchFunc(s *testHealthServer, in *healthpb.HealthCheckRequest, stream healthgrpc.Health_WatchServer) error {
    67  	if in.Service != "foo" {
    68  		return status.Error(codes.FailedPrecondition,
    69  			"the defaultWatchFunc only handles request with service name to be \"foo\"")
    70  	}
    71  	var done bool
    72  	for {
    73  		select {
    74  		case <-stream.Context().Done():
    75  			done = true
    76  		case <-s.update:
    77  		}
    78  		if done {
    79  			break
    80  		}
    81  		s.mu.Lock()
    82  		resp := &healthpb.HealthCheckResponse{
    83  			Status: s.status[in.Service],
    84  		}
    85  		s.mu.Unlock()
    86  		stream.SendMsg(resp)
    87  	}
    88  	return nil
    89  }
    90  
    91  type healthWatchFunc func(*testHealthServer, *healthpb.HealthCheckRequest, healthgrpc.Health_WatchServer) error
    92  
    93  type testHealthServer struct {
    94  	healthgrpc.UnimplementedHealthServer
    95  	watchFunc healthWatchFunc
    96  	mu        sync.Mutex
    97  	status    map[string]healthpb.HealthCheckResponse_ServingStatus
    98  	update    chan struct{}
    99  }
   100  
   101  func (s *testHealthServer) Check(context.Context, *healthpb.HealthCheckRequest) (*healthpb.HealthCheckResponse, error) {
   102  	return &healthpb.HealthCheckResponse{
   103  		Status: healthpb.HealthCheckResponse_SERVING,
   104  	}, nil
   105  }
   106  
   107  func (s *testHealthServer) Watch(in *healthpb.HealthCheckRequest, stream healthgrpc.Health_WatchServer) error {
   108  	return s.watchFunc(s, in, stream)
   109  }
   110  
   111  // SetServingStatus is called when need to reset the serving status of a service
   112  // or insert a new service entry into the statusMap.
   113  func (s *testHealthServer) SetServingStatus(service string, status healthpb.HealthCheckResponse_ServingStatus) {
   114  	s.mu.Lock()
   115  	s.status[service] = status
   116  	select {
   117  	case <-s.update:
   118  	default:
   119  	}
   120  	s.update <- struct{}{}
   121  	s.mu.Unlock()
   122  }
   123  
   124  func setupHealthCheckWrapper(t *testing.T) (hcEnterChan chan struct{}, hcExitChan chan struct{}) {
   125  	t.Helper()
   126  
   127  	hcEnterChan = make(chan struct{})
   128  	hcExitChan = make(chan struct{})
   129  	origHealthCheckFn := internal.HealthCheckFunc
   130  	internal.HealthCheckFunc = func(ctx context.Context, newStream func(string) (any, error), update func(connectivity.State, error), service string) error {
   131  		close(hcEnterChan)
   132  		defer close(hcExitChan)
   133  		return origHealthCheckFn(ctx, newStream, update, service)
   134  	}
   135  
   136  	t.Cleanup(func() {
   137  		internal.HealthCheckFunc = origHealthCheckFn
   138  	})
   139  
   140  	return
   141  }
   142  
   143  func setupServer(t *testing.T, watchFunc healthWatchFunc) (*grpc.Server, net.Listener, *testHealthServer) {
   144  	t.Helper()
   145  
   146  	lis, err := net.Listen("tcp", "localhost:0")
   147  	if err != nil {
   148  		t.Fatalf("net.Listen() failed: %v", err)
   149  	}
   150  
   151  	var ts *testHealthServer
   152  	if watchFunc != nil {
   153  		ts = newTestHealthServerWithWatchFunc(watchFunc)
   154  	} else {
   155  		ts = newTestHealthServer()
   156  	}
   157  	s := grpc.NewServer()
   158  	healthgrpc.RegisterHealthServer(s, ts)
   159  	testgrpc.RegisterTestServiceServer(s, &testServer{})
   160  	go s.Serve(lis)
   161  	t.Cleanup(func() { s.Stop() })
   162  	return s, lis, ts
   163  }
   164  
   165  type clientConfig struct {
   166  	balancerName    string
   167  	extraDialOption []grpc.DialOption
   168  }
   169  
   170  func setupClient(t *testing.T, c *clientConfig) (*grpc.ClientConn, *manual.Resolver) {
   171  	t.Helper()
   172  
   173  	r := manual.NewBuilderWithScheme("whatever")
   174  	opts := []grpc.DialOption{
   175  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   176  		grpc.WithResolvers(r),
   177  	}
   178  	if c != nil {
   179  		if c.balancerName != "" {
   180  			opts = append(opts, grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, c.balancerName)))
   181  		}
   182  		opts = append(opts, c.extraDialOption...)
   183  	}
   184  
   185  	cc, err := grpc.NewClient(r.Scheme()+":///test.server", opts...)
   186  	if err != nil {
   187  		t.Fatalf("grpc.NewClient() failed: %v", err)
   188  	}
   189  	cc.Connect()
   190  	t.Cleanup(func() { cc.Close() })
   191  	return cc, r
   192  }
   193  
   194  func (s) TestHealthCheckWatchStateChange(t *testing.T) {
   195  	_, lis, ts := setupServer(t, nil)
   196  
   197  	// The table below shows the expected series of addrConn connectivity transitions when server
   198  	// updates its health status. As there's only one addrConn corresponds with the ClientConn in this
   199  	// test, we use ClientConn's connectivity state as the addrConn connectivity state.
   200  	//+------------------------------+-------------------------------------------+
   201  	//| Health Check Returned Status | Expected addrConn Connectivity Transition |
   202  	//+------------------------------+-------------------------------------------+
   203  	//| NOT_SERVING                  | ->TRANSIENT FAILURE                       |
   204  	//| SERVING                      | ->READY                                   |
   205  	//| SERVICE_UNKNOWN              | ->TRANSIENT FAILURE                       |
   206  	//| SERVING                      | ->READY                                   |
   207  	//| UNKNOWN                      | ->TRANSIENT FAILURE                       |
   208  	//+------------------------------+-------------------------------------------+
   209  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_NOT_SERVING)
   210  
   211  	cc, r := setupClient(t, nil)
   212  	r.UpdateState(resolver.State{
   213  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   214  		ServiceConfig: parseServiceConfig(t, r, `{
   215  	"healthCheckConfig": {
   216  		"serviceName": "foo"
   217  	},
   218  	"loadBalancingConfig": [{"round_robin":{}}]
   219  }`)})
   220  
   221  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   222  	defer cancel()
   223  	testutils.AwaitNotState(ctx, t, cc, connectivity.Idle)
   224  	testutils.AwaitNotState(ctx, t, cc, connectivity.Connecting)
   225  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   226  	if s := cc.GetState(); s != connectivity.TransientFailure {
   227  		t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s)
   228  	}
   229  
   230  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   231  	testutils.AwaitNotState(ctx, t, cc, connectivity.TransientFailure)
   232  	if s := cc.GetState(); s != connectivity.Ready {
   233  		t.Fatalf("ClientConn is in %v state, want READY", s)
   234  	}
   235  
   236  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVICE_UNKNOWN)
   237  	testutils.AwaitNotState(ctx, t, cc, connectivity.Ready)
   238  	if s := cc.GetState(); s != connectivity.TransientFailure {
   239  		t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s)
   240  	}
   241  
   242  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   243  	testutils.AwaitNotState(ctx, t, cc, connectivity.TransientFailure)
   244  	if s := cc.GetState(); s != connectivity.Ready {
   245  		t.Fatalf("ClientConn is in %v state, want READY", s)
   246  	}
   247  
   248  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_UNKNOWN)
   249  	testutils.AwaitNotState(ctx, t, cc, connectivity.Ready)
   250  	if s := cc.GetState(); s != connectivity.TransientFailure {
   251  		t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s)
   252  	}
   253  }
   254  
   255  // If Watch returns Unimplemented, then the ClientConn should go into READY state.
   256  func (s) TestHealthCheckHealthServerNotRegistered(t *testing.T) {
   257  	grpctest.TLogger.ExpectError("Subchannel health check is unimplemented at server side, thus health check is disabled")
   258  	s := grpc.NewServer()
   259  	lis, err := net.Listen("tcp", "localhost:0")
   260  	if err != nil {
   261  		t.Fatalf("failed to listen due to err: %v", err)
   262  	}
   263  	go s.Serve(lis)
   264  	defer s.Stop()
   265  
   266  	cc, r := setupClient(t, nil)
   267  	r.UpdateState(resolver.State{
   268  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   269  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   270  			"healthCheckConfig": {
   271  				"serviceName": "foo"
   272  			},
   273  			"loadBalancingConfig": [{"%s":{}}]
   274  		}`, roundrobin.Name))})
   275  
   276  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   277  	defer cancel()
   278  	testutils.AwaitNotState(ctx, t, cc, connectivity.Idle)
   279  	testutils.AwaitNotState(ctx, t, cc, connectivity.Connecting)
   280  	if s := cc.GetState(); s != connectivity.Ready {
   281  		t.Fatalf("ClientConn is in %v state, want READY", s)
   282  	}
   283  }
   284  
   285  // In the case of a goaway received, the health check stream should be terminated and health check
   286  // function should exit.
   287  func (s) TestHealthCheckWithGoAway(t *testing.T) {
   288  	s, lis, ts := setupServer(t, nil)
   289  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   290  
   291  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   292  	cc, r := setupClient(t, &clientConfig{})
   293  	tc := testgrpc.NewTestServiceClient(cc)
   294  	r.UpdateState(resolver.State{
   295  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   296  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   297  			"healthCheckConfig": {
   298  				"serviceName": "foo"
   299  			},
   300  			"loadBalancingConfig": [{"%s":{}}]
   301  		}`, roundrobin.Name))})
   302  
   303  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   304  	defer cancel()
   305  	// make some rpcs to make sure connection is working.
   306  	if err := verifyResultWithDelay(func() (bool, error) {
   307  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   308  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   309  		}
   310  		return true, nil
   311  	}); err != nil {
   312  		t.Fatal(err)
   313  	}
   314  
   315  	// the stream rpc will persist through goaway event.
   316  	stream, err := tc.FullDuplexCall(ctx, grpc.WaitForReady(true))
   317  	if err != nil {
   318  		t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err)
   319  	}
   320  	respParam := []*testpb.ResponseParameters{{Size: 1}}
   321  	payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, int32(1))
   322  	if err != nil {
   323  		t.Fatal(err)
   324  	}
   325  	req := &testpb.StreamingOutputCallRequest{
   326  		ResponseParameters: respParam,
   327  		Payload:            payload,
   328  	}
   329  	if err := stream.Send(req); err != nil {
   330  		t.Fatalf("%v.Send(_) = %v, want <nil>", stream, err)
   331  	}
   332  	if _, err := stream.Recv(); err != nil {
   333  		t.Fatalf("%v.Recv() = _, %v, want _, <nil>", stream, err)
   334  	}
   335  
   336  	select {
   337  	case <-hcExitChan:
   338  		t.Fatal("Health check function has exited, which is not expected.")
   339  	default:
   340  	}
   341  
   342  	// server sends GoAway
   343  	go s.GracefulStop()
   344  
   345  	select {
   346  	case <-hcExitChan:
   347  	case <-time.After(5 * time.Second):
   348  		select {
   349  		case <-hcEnterChan:
   350  		default:
   351  			t.Fatal("Health check function has not entered after 5s.")
   352  		}
   353  		t.Fatal("Health check function has not exited after 5s.")
   354  	}
   355  
   356  	// The existing RPC should be still good to proceed.
   357  	if err := stream.Send(req); err != nil {
   358  		t.Fatalf("%v.Send(_) = %v, want <nil>", stream, err)
   359  	}
   360  	if _, err := stream.Recv(); err != nil {
   361  		t.Fatalf("%v.Recv() = _, %v, want _, <nil>", stream, err)
   362  	}
   363  }
   364  
   365  func (s) TestHealthCheckWithConnClose(t *testing.T) {
   366  	s, lis, ts := setupServer(t, nil)
   367  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   368  
   369  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   370  	cc, r := setupClient(t, &clientConfig{})
   371  	tc := testgrpc.NewTestServiceClient(cc)
   372  	r.UpdateState(resolver.State{
   373  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   374  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   375  			"healthCheckConfig": {
   376  				"serviceName": "foo"
   377  			},
   378  			"loadBalancingConfig": [{"%s":{}}]
   379  		}`, roundrobin.Name))})
   380  
   381  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   382  	defer cancel()
   383  	// make some rpcs to make sure connection is working.
   384  	if err := verifyResultWithDelay(func() (bool, error) {
   385  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   386  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   387  		}
   388  		return true, nil
   389  	}); err != nil {
   390  		t.Fatal(err)
   391  	}
   392  
   393  	select {
   394  	case <-hcExitChan:
   395  		t.Fatal("Health check function has exited, which is not expected.")
   396  	default:
   397  	}
   398  	// server closes the connection
   399  	s.Stop()
   400  
   401  	select {
   402  	case <-hcExitChan:
   403  	case <-time.After(5 * time.Second):
   404  		select {
   405  		case <-hcEnterChan:
   406  		default:
   407  			t.Fatal("Health check function has not entered after 5s.")
   408  		}
   409  		t.Fatal("Health check function has not exited after 5s.")
   410  	}
   411  }
   412  
   413  // addrConn drain happens when addrConn gets torn down due to its address being no longer in the
   414  // address list returned by the resolver.
   415  func (s) TestHealthCheckWithAddrConnDrain(t *testing.T) {
   416  	_, lis, ts := setupServer(t, nil)
   417  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   418  
   419  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   420  	cc, r := setupClient(t, &clientConfig{})
   421  	tc := testgrpc.NewTestServiceClient(cc)
   422  	sc := parseServiceConfig(t, r, fmt.Sprintf(`{
   423  		"healthCheckConfig": {
   424  			"serviceName": "foo"
   425  		},
   426  		"loadBalancingConfig": [{"%s":{}}]
   427  	}`, roundrobin.Name))
   428  	r.UpdateState(resolver.State{
   429  		Addresses:     []resolver.Address{{Addr: lis.Addr().String()}},
   430  		ServiceConfig: sc,
   431  	})
   432  
   433  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   434  	defer cancel()
   435  	// make some rpcs to make sure connection is working.
   436  	if err := verifyResultWithDelay(func() (bool, error) {
   437  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   438  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   439  		}
   440  		return true, nil
   441  	}); err != nil {
   442  		t.Fatal(err)
   443  	}
   444  
   445  	// the stream rpc will persist through goaway event.
   446  	stream, err := tc.FullDuplexCall(ctx, grpc.WaitForReady(true))
   447  	if err != nil {
   448  		t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err)
   449  	}
   450  	respParam := []*testpb.ResponseParameters{{Size: 1}}
   451  	payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, int32(1))
   452  	if err != nil {
   453  		t.Fatal(err)
   454  	}
   455  	req := &testpb.StreamingOutputCallRequest{
   456  		ResponseParameters: respParam,
   457  		Payload:            payload,
   458  	}
   459  	if err := stream.Send(req); err != nil {
   460  		t.Fatalf("%v.Send(_) = %v, want <nil>", stream, err)
   461  	}
   462  	if _, err := stream.Recv(); err != nil {
   463  		t.Fatalf("%v.Recv() = _, %v, want _, <nil>", stream, err)
   464  	}
   465  
   466  	select {
   467  	case <-hcExitChan:
   468  		t.Fatal("Health check function has exited, which is not expected.")
   469  	default:
   470  	}
   471  	// trigger teardown of the ac
   472  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}, ServiceConfig: sc})
   473  
   474  	select {
   475  	case <-hcExitChan:
   476  	case <-time.After(5 * time.Second):
   477  		select {
   478  		case <-hcEnterChan:
   479  		default:
   480  			t.Fatal("Health check function has not entered after 5s.")
   481  		}
   482  		t.Fatal("Health check function has not exited after 5s.")
   483  	}
   484  
   485  	// The existing RPC should be still good to proceed.
   486  	if err := stream.Send(req); err != nil {
   487  		t.Fatalf("%v.Send(_) = %v, want <nil>", stream, err)
   488  	}
   489  	if _, err := stream.Recv(); err != nil {
   490  		t.Fatalf("%v.Recv() = _, %v, want _, <nil>", stream, err)
   491  	}
   492  }
   493  
   494  // ClientConn close will lead to its addrConns being torn down.
   495  func (s) TestHealthCheckWithClientConnClose(t *testing.T) {
   496  	_, lis, ts := setupServer(t, nil)
   497  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   498  
   499  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   500  	cc, r := setupClient(t, &clientConfig{})
   501  	tc := testgrpc.NewTestServiceClient(cc)
   502  	r.UpdateState(resolver.State{
   503  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   504  		ServiceConfig: parseServiceConfig(t, r, (fmt.Sprintf(`{
   505  			"healthCheckConfig": {
   506  				"serviceName": "foo"
   507  			},
   508  			"loadBalancingConfig": [{"%s":{}}]
   509  		}`, roundrobin.Name)))})
   510  
   511  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   512  	defer cancel()
   513  	// make some rpcs to make sure connection is working.
   514  	if err := verifyResultWithDelay(func() (bool, error) {
   515  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   516  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   517  		}
   518  		return true, nil
   519  	}); err != nil {
   520  		t.Fatal(err)
   521  	}
   522  
   523  	select {
   524  	case <-hcExitChan:
   525  		t.Fatal("Health check function has exited, which is not expected.")
   526  	default:
   527  	}
   528  
   529  	// trigger addrConn teardown
   530  	cc.Close()
   531  
   532  	select {
   533  	case <-hcExitChan:
   534  	case <-time.After(5 * time.Second):
   535  		select {
   536  		case <-hcEnterChan:
   537  		default:
   538  			t.Fatal("Health check function has not entered after 5s.")
   539  		}
   540  		t.Fatal("Health check function has not exited after 5s.")
   541  	}
   542  }
   543  
   544  // This test is to test the logic in the createTransport after the health check function returns which
   545  // closes the skipReset channel(since it has not been closed inside health check func) to unblock
   546  // onGoAway/onClose goroutine.
   547  func (s) TestHealthCheckWithoutSetConnectivityStateCalledAddrConnShutDown(t *testing.T) {
   548  	watchFunc := func(_ *testHealthServer, in *healthpb.HealthCheckRequest, stream healthgrpc.Health_WatchServer) error {
   549  		if in.Service != "delay" {
   550  			return status.Error(codes.FailedPrecondition,
   551  				"this special Watch function only handles request with service name to be \"delay\"")
   552  		}
   553  		// Do nothing to mock a delay of health check response from server side.
   554  		// This case is to help with the test that covers the condition that setConnectivityState is not
   555  		// called inside HealthCheckFunc before the func returns.
   556  		select {
   557  		case <-stream.Context().Done():
   558  		case <-time.After(5 * time.Second):
   559  		}
   560  		return nil
   561  	}
   562  	_, lis, ts := setupServer(t, watchFunc)
   563  	ts.SetServingStatus("delay", healthpb.HealthCheckResponse_SERVING)
   564  
   565  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   566  	_, r := setupClient(t, &clientConfig{})
   567  
   568  	// The serviceName "delay" is specially handled at server side, where response will not be sent
   569  	// back to client immediately upon receiving the request (client should receive no response until
   570  	// test ends).
   571  	sc := parseServiceConfig(t, r, fmt.Sprintf(`{
   572  		"healthCheckConfig": {
   573  			"serviceName": "delay"
   574  		},
   575  		"loadBalancingConfig": [{"%s":{}}]
   576  	}`, roundrobin.Name))
   577  	r.UpdateState(resolver.State{
   578  		Addresses:     []resolver.Address{{Addr: lis.Addr().String()}},
   579  		ServiceConfig: sc,
   580  	})
   581  
   582  	select {
   583  	case <-hcExitChan:
   584  		t.Fatal("Health check function has exited, which is not expected.")
   585  	default:
   586  	}
   587  
   588  	select {
   589  	case <-hcEnterChan:
   590  	case <-time.After(5 * time.Second):
   591  		t.Fatal("Health check function has not been invoked after 5s.")
   592  	}
   593  	// trigger teardown of the ac, ac in SHUTDOWN state
   594  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}, ServiceConfig: sc})
   595  
   596  	// The health check func should exit without calling the setConnectivityState func, as server hasn't sent
   597  	// any response.
   598  	select {
   599  	case <-hcExitChan:
   600  	case <-time.After(5 * time.Second):
   601  		t.Fatal("Health check function has not exited after 5s.")
   602  	}
   603  	// The deferred leakcheck will check whether there's leaked goroutine, which is an indication
   604  	// whether we closes the skipReset channel to unblock onGoAway/onClose goroutine.
   605  }
   606  
   607  // This test is to test the logic in the createTransport after the health check function returns which
   608  // closes the allowedToReset channel(since it has not been closed inside health check func) to unblock
   609  // onGoAway/onClose goroutine.
   610  func (s) TestHealthCheckWithoutSetConnectivityStateCalled(t *testing.T) {
   611  	watchFunc := func(_ *testHealthServer, in *healthpb.HealthCheckRequest, stream healthgrpc.Health_WatchServer) error {
   612  		if in.Service != "delay" {
   613  			return status.Error(codes.FailedPrecondition,
   614  				"this special Watch function only handles request with service name to be \"delay\"")
   615  		}
   616  		// Do nothing to mock a delay of health check response from server side.
   617  		// This case is to help with the test that covers the condition that setConnectivityState is not
   618  		// called inside HealthCheckFunc before the func returns.
   619  		select {
   620  		case <-stream.Context().Done():
   621  		case <-time.After(5 * time.Second):
   622  		}
   623  		return nil
   624  	}
   625  	s, lis, ts := setupServer(t, watchFunc)
   626  	ts.SetServingStatus("delay", healthpb.HealthCheckResponse_SERVING)
   627  
   628  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
   629  	_, r := setupClient(t, &clientConfig{})
   630  
   631  	// The serviceName "delay" is specially handled at server side, where response will not be sent
   632  	// back to client immediately upon receiving the request (client should receive no response until
   633  	// test ends).
   634  	r.UpdateState(resolver.State{
   635  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   636  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   637  			"healthCheckConfig": {
   638  				"serviceName": "delay"
   639  			},
   640  			"loadBalancingConfig": [{"%s":{}}]
   641  		}`, roundrobin.Name))})
   642  
   643  	select {
   644  	case <-hcExitChan:
   645  		t.Fatal("Health check function has exited, which is not expected.")
   646  	default:
   647  	}
   648  
   649  	select {
   650  	case <-hcEnterChan:
   651  	case <-time.After(5 * time.Second):
   652  		t.Fatal("Health check function has not been invoked after 5s.")
   653  	}
   654  	// trigger transport being closed
   655  	s.Stop()
   656  
   657  	// The health check func should exit without calling the setConnectivityState func, as server hasn't sent
   658  	// any response.
   659  	select {
   660  	case <-hcExitChan:
   661  	case <-time.After(5 * time.Second):
   662  		t.Fatal("Health check function has not exited after 5s.")
   663  	}
   664  	// The deferred leakcheck will check whether there's leaked goroutine, which is an indication
   665  	// whether we closes the allowedToReset channel to unblock onGoAway/onClose goroutine.
   666  }
   667  
   668  func testHealthCheckDisableWithDialOption(t *testing.T, addr string) {
   669  	hcEnterChan, _ := setupHealthCheckWrapper(t)
   670  	cc, r := setupClient(t, &clientConfig{extraDialOption: []grpc.DialOption{grpc.WithDisableHealthCheck()}})
   671  	tc := testgrpc.NewTestServiceClient(cc)
   672  	r.UpdateState(resolver.State{
   673  		Addresses: []resolver.Address{{Addr: addr}},
   674  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   675  			"healthCheckConfig": {
   676  				"serviceName": "foo"
   677  			},
   678  			"loadBalancingConfig": [{"%s":{}}]
   679  		}`, roundrobin.Name))})
   680  
   681  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   682  	defer cancel()
   683  	// send some rpcs to make sure transport has been created and is ready for use.
   684  	if err := verifyResultWithDelay(func() (bool, error) {
   685  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   686  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   687  		}
   688  		return true, nil
   689  	}); err != nil {
   690  		t.Fatal(err)
   691  	}
   692  
   693  	select {
   694  	case <-hcEnterChan:
   695  		t.Fatal("Health check function has exited, which is not expected.")
   696  	default:
   697  	}
   698  }
   699  
   700  func testHealthCheckDisableWithBalancer(t *testing.T, addr string) {
   701  	hcEnterChan, _ := setupHealthCheckWrapper(t)
   702  	cc, r := setupClient(t, &clientConfig{})
   703  	tc := testgrpc.NewTestServiceClient(cc)
   704  	r.UpdateState(resolver.State{
   705  		Addresses: []resolver.Address{{Addr: addr}},
   706  		ServiceConfig: parseServiceConfig(t, r, `{
   707  	"healthCheckConfig": {
   708  		"serviceName": "foo"
   709  	},
   710  	"loadBalancingConfig": [{"pick_first":{}}]
   711  }`)})
   712  
   713  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   714  	defer cancel()
   715  	// send some rpcs to make sure transport has been created and is ready for use.
   716  	if err := verifyResultWithDelay(func() (bool, error) {
   717  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   718  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   719  		}
   720  		return true, nil
   721  	}); err != nil {
   722  		t.Fatal(err)
   723  	}
   724  
   725  	select {
   726  	case <-hcEnterChan:
   727  		t.Fatal("Health check function has started, which is not expected.")
   728  	default:
   729  	}
   730  }
   731  
   732  func testHealthCheckDisableWithServiceConfig(t *testing.T, addr string) {
   733  	hcEnterChan, _ := setupHealthCheckWrapper(t)
   734  	cc, r := setupClient(t, &clientConfig{})
   735  	tc := testgrpc.NewTestServiceClient(cc)
   736  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: addr}}})
   737  
   738  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   739  	defer cancel()
   740  	// send some rpcs to make sure transport has been created and is ready for use.
   741  	if err := verifyResultWithDelay(func() (bool, error) {
   742  		if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   743  			return false, fmt.Errorf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   744  		}
   745  		return true, nil
   746  	}); err != nil {
   747  		t.Fatal(err)
   748  	}
   749  
   750  	select {
   751  	case <-hcEnterChan:
   752  		t.Fatal("Health check function has started, which is not expected.")
   753  	default:
   754  	}
   755  }
   756  
   757  func (s) TestHealthCheckDisable(t *testing.T) {
   758  	_, lis, ts := setupServer(t, nil)
   759  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
   760  
   761  	// test client side disabling configuration.
   762  	testHealthCheckDisableWithDialOption(t, lis.Addr().String())
   763  	testHealthCheckDisableWithBalancer(t, lis.Addr().String())
   764  	testHealthCheckDisableWithServiceConfig(t, lis.Addr().String())
   765  }
   766  
   767  func (s) TestHealthCheckChannelzCountingCallSuccess(t *testing.T) {
   768  	watchFunc := func(_ *testHealthServer, in *healthpb.HealthCheckRequest, _ healthgrpc.Health_WatchServer) error {
   769  		if in.Service != "channelzSuccess" {
   770  			return status.Error(codes.FailedPrecondition,
   771  				"this special Watch function only handles request with service name to be \"channelzSuccess\"")
   772  		}
   773  		return status.Error(codes.OK, "fake success")
   774  	}
   775  	_, lis, _ := setupServer(t, watchFunc)
   776  
   777  	_, r := setupClient(t, nil)
   778  	r.UpdateState(resolver.State{
   779  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   780  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   781  			"healthCheckConfig": {
   782  				"serviceName": "channelzSuccess"
   783  			},
   784  			"loadBalancingConfig": [{"%s":{}}]
   785  		}`, roundrobin.Name))})
   786  
   787  	if err := verifyResultWithDelay(func() (bool, error) {
   788  		cm, _ := channelz.GetTopChannels(0, 0)
   789  		if len(cm) == 0 {
   790  			return false, errors.New("channelz.GetTopChannels return 0 top channel")
   791  		}
   792  		subChans := cm[0].SubChans()
   793  		if len(subChans) == 0 {
   794  			return false, errors.New("there is 0 subchannel")
   795  		}
   796  		var id int64
   797  		for k := range subChans {
   798  			id = k
   799  			break
   800  		}
   801  		scm := channelz.GetSubChannel(id)
   802  		if scm == nil {
   803  			return false, errors.New("nil subchannel returned")
   804  		}
   805  		// exponential backoff retry may result in more than one health check call.
   806  		cstart, csucc, cfail := scm.ChannelMetrics.CallsStarted.Load(), scm.ChannelMetrics.CallsSucceeded.Load(), scm.ChannelMetrics.CallsFailed.Load()
   807  		if cstart > 0 && csucc > 0 && cfail == 0 {
   808  			return true, nil
   809  		}
   810  		return false, fmt.Errorf("got %d CallsStarted, %d CallsSucceeded %d CallsFailed, want >0 >0 =0", cstart, csucc, cfail)
   811  	}); err != nil {
   812  		t.Fatal(err)
   813  	}
   814  }
   815  
   816  func (s) TestHealthCheckChannelzCountingCallFailure(t *testing.T) {
   817  	watchFunc := func(_ *testHealthServer, in *healthpb.HealthCheckRequest, _ healthgrpc.Health_WatchServer) error {
   818  		if in.Service != "channelzFailure" {
   819  			return status.Error(codes.FailedPrecondition,
   820  				"this special Watch function only handles request with service name to be \"channelzFailure\"")
   821  		}
   822  		return status.Error(codes.Internal, "fake failure")
   823  	}
   824  	_, lis, _ := setupServer(t, watchFunc)
   825  
   826  	_, r := setupClient(t, nil)
   827  	r.UpdateState(resolver.State{
   828  		Addresses: []resolver.Address{{Addr: lis.Addr().String()}},
   829  		ServiceConfig: parseServiceConfig(t, r, fmt.Sprintf(`{
   830  			"healthCheckConfig": {
   831  				"serviceName": "channelzFailure"
   832  			},
   833  			"loadBalancingConfig": [{"%s":{}}]
   834  		}`, roundrobin.Name))})
   835  
   836  	if err := verifyResultWithDelay(func() (bool, error) {
   837  		cm, _ := channelz.GetTopChannels(0, 0)
   838  		if len(cm) == 0 {
   839  			return false, errors.New("channelz.GetTopChannels return 0 top channel")
   840  		}
   841  		subChans := cm[0].SubChans()
   842  		if len(subChans) == 0 {
   843  			return false, errors.New("there is 0 subchannel")
   844  		}
   845  		var id int64
   846  		for k := range subChans {
   847  			id = k
   848  			break
   849  		}
   850  		scm := channelz.GetSubChannel(id)
   851  		if scm == nil {
   852  			return false, errors.New("nil subchannel returned")
   853  		}
   854  		// exponential backoff retry may result in more than one health check call.
   855  		cstart, cfail, csucc := scm.ChannelMetrics.CallsStarted.Load(), scm.ChannelMetrics.CallsFailed.Load(), scm.ChannelMetrics.CallsSucceeded.Load()
   856  		if cstart > 0 && cfail > 0 && csucc == 0 {
   857  			return true, nil
   858  		}
   859  		return false, fmt.Errorf("got %d CallsStarted, %d CallsFailed, %d CallsSucceeded, want >0, >0", cstart, cfail, csucc)
   860  	}); err != nil {
   861  		t.Fatal(err)
   862  	}
   863  }
   864  
   865  // healthCheck is a helper function to make a unary health check RPC and return
   866  // the response.
   867  func healthCheck(d time.Duration, cc *grpc.ClientConn, service string) (*healthpb.HealthCheckResponse, error) {
   868  	ctx, cancel := context.WithTimeout(context.Background(), d)
   869  	defer cancel()
   870  	hc := healthgrpc.NewHealthClient(cc)
   871  	return hc.Check(ctx, &healthpb.HealthCheckRequest{Service: service})
   872  }
   873  
   874  // verifyHealthCheckStatus is a helper function to verify that the current
   875  // health status of the service matches the one passed in 'wantStatus'.
   876  func verifyHealthCheckStatus(t *testing.T, d time.Duration, cc *grpc.ClientConn, service string, wantStatus healthpb.HealthCheckResponse_ServingStatus) {
   877  	t.Helper()
   878  	resp, err := healthCheck(d, cc, service)
   879  	if err != nil {
   880  		t.Fatalf("Health/Check(_, _) = _, %v, want _, <nil>", err)
   881  	}
   882  	if resp.Status != wantStatus {
   883  		t.Fatalf("Got the serving status %v, want %v", resp.Status, wantStatus)
   884  	}
   885  }
   886  
   887  // verifyHealthCheckErrCode is a helper function to verify that a unary health
   888  // check RPC returns an error with a code set to 'wantCode'.
   889  func verifyHealthCheckErrCode(t *testing.T, d time.Duration, cc *grpc.ClientConn, service string, wantCode codes.Code) {
   890  	t.Helper()
   891  	if _, err := healthCheck(d, cc, service); status.Code(err) != wantCode {
   892  		t.Fatalf("Health/Check() got errCode %v, want %v", status.Code(err), wantCode)
   893  	}
   894  }
   895  
   896  // newHealthCheckStream is a helper function to start a health check streaming
   897  // RPC, and returns the stream.
   898  func newHealthCheckStream(t *testing.T, cc *grpc.ClientConn, service string) (healthgrpc.Health_WatchClient, context.CancelFunc) {
   899  	t.Helper()
   900  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   901  	hc := healthgrpc.NewHealthClient(cc)
   902  	stream, err := hc.Watch(ctx, &healthpb.HealthCheckRequest{Service: service})
   903  	if err != nil {
   904  		t.Fatalf("hc.Watch(_, %v) failed: %v", service, err)
   905  	}
   906  	return stream, cancel
   907  }
   908  
   909  // healthWatchChecker is a helper function to verify that the next health
   910  // status returned on the given stream matches the one passed in 'wantStatus'.
   911  func healthWatchChecker(t *testing.T, stream healthgrpc.Health_WatchClient, wantStatus healthpb.HealthCheckResponse_ServingStatus) {
   912  	t.Helper()
   913  	response, err := stream.Recv()
   914  	if err != nil {
   915  		t.Fatalf("stream.Recv() failed: %v", err)
   916  	}
   917  	if response.Status != wantStatus {
   918  		t.Fatalf("got servingStatus %v, want %v", response.Status, wantStatus)
   919  	}
   920  }
   921  
   922  // TestHealthCheckSuccess invokes the unary Check() RPC on the health server in
   923  // a successful case.
   924  func (s) TestHealthCheckSuccess(t *testing.T) {
   925  	for _, e := range listTestEnv() {
   926  		testHealthCheckSuccess(t, e)
   927  	}
   928  }
   929  
   930  func testHealthCheckSuccess(t *testing.T, e env) {
   931  	te := newTest(t, e)
   932  	te.enableHealthServer = true
   933  	te.startServer(&testServer{security: e.security})
   934  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
   935  	defer te.tearDown()
   936  
   937  	verifyHealthCheckErrCode(t, 1*time.Second, te.clientConn(), defaultHealthService, codes.OK)
   938  }
   939  
   940  // TestHealthCheckFailure invokes the unary Check() RPC on the health server
   941  // with an expired context and expects the RPC to fail.
   942  func (s) TestHealthCheckFailure(t *testing.T) {
   943  	e := env{
   944  		name:     "tcp-tls",
   945  		network:  "tcp",
   946  		security: "tls",
   947  		balancer: roundrobin.Name,
   948  	}
   949  	te := newTest(t, e)
   950  	te.declareLogNoise(
   951  		"Failed to dial ",
   952  		"grpc: the client connection is closing; please retry",
   953  	)
   954  	te.enableHealthServer = true
   955  	te.startServer(&testServer{security: e.security})
   956  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
   957  	defer te.tearDown()
   958  
   959  	verifyHealthCheckErrCode(t, 0*time.Second, te.clientConn(), defaultHealthService, codes.DeadlineExceeded)
   960  	awaitNewConnLogOutput()
   961  }
   962  
   963  // TestHealthCheckOff makes a unary Check() RPC on the health server where the
   964  // health status of the defaultHealthService is not set, and therefore expects
   965  // an error code 'codes.NotFound'.
   966  func (s) TestHealthCheckOff(t *testing.T) {
   967  	for _, e := range listTestEnv() {
   968  		// TODO(bradfitz): Temporarily skip this env due to #619.
   969  		if e.name == "handler-tls" {
   970  			continue
   971  		}
   972  		testHealthCheckOff(t, e)
   973  	}
   974  }
   975  
   976  func testHealthCheckOff(t *testing.T, e env) {
   977  	te := newTest(t, e)
   978  	te.enableHealthServer = true
   979  	te.startServer(&testServer{security: e.security})
   980  	defer te.tearDown()
   981  
   982  	verifyHealthCheckErrCode(t, 1*time.Second, te.clientConn(), defaultHealthService, codes.NotFound)
   983  }
   984  
   985  // TestHealthWatchMultipleClients makes a streaming Watch() RPC on the health
   986  // server with multiple clients and expects the same status on both streams.
   987  func (s) TestHealthWatchMultipleClients(t *testing.T) {
   988  	for _, e := range listTestEnv() {
   989  		testHealthWatchMultipleClients(t, e)
   990  	}
   991  }
   992  
   993  func testHealthWatchMultipleClients(t *testing.T, e env) {
   994  	te := newTest(t, e)
   995  	te.enableHealthServer = true
   996  	te.startServer(&testServer{security: e.security})
   997  	defer te.tearDown()
   998  
   999  	cc := te.clientConn()
  1000  	stream1, cf1 := newHealthCheckStream(t, cc, defaultHealthService)
  1001  	defer cf1()
  1002  	healthWatchChecker(t, stream1, healthpb.HealthCheckResponse_SERVICE_UNKNOWN)
  1003  
  1004  	stream2, cf2 := newHealthCheckStream(t, cc, defaultHealthService)
  1005  	defer cf2()
  1006  	healthWatchChecker(t, stream2, healthpb.HealthCheckResponse_SERVICE_UNKNOWN)
  1007  
  1008  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_NOT_SERVING)
  1009  	healthWatchChecker(t, stream1, healthpb.HealthCheckResponse_NOT_SERVING)
  1010  	healthWatchChecker(t, stream2, healthpb.HealthCheckResponse_NOT_SERVING)
  1011  }
  1012  
  1013  // TestHealthWatchSameStatus makes a streaming Watch() RPC on the health server
  1014  // and makes sure that the health status of the server is as expected after
  1015  // multiple calls to SetServingStatus with the same status.
  1016  func (s) TestHealthWatchSameStatus(t *testing.T) {
  1017  	for _, e := range listTestEnv() {
  1018  		testHealthWatchSameStatus(t, e)
  1019  	}
  1020  }
  1021  
  1022  func testHealthWatchSameStatus(t *testing.T, e env) {
  1023  	te := newTest(t, e)
  1024  	te.enableHealthServer = true
  1025  	te.startServer(&testServer{security: e.security})
  1026  	defer te.tearDown()
  1027  
  1028  	stream, cf := newHealthCheckStream(t, te.clientConn(), defaultHealthService)
  1029  	defer cf()
  1030  
  1031  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVICE_UNKNOWN)
  1032  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1033  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVING)
  1034  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1035  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_NOT_SERVING)
  1036  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_NOT_SERVING)
  1037  }
  1038  
  1039  // TestHealthWatchServiceStatusSetBeforeStartingServer starts a health server
  1040  // on which the health status for the defaultService is set before the gRPC
  1041  // server is started, and expects the correct health status to be returned.
  1042  func (s) TestHealthWatchServiceStatusSetBeforeStartingServer(t *testing.T) {
  1043  	for _, e := range listTestEnv() {
  1044  		testHealthWatchSetServiceStatusBeforeStartingServer(t, e)
  1045  	}
  1046  }
  1047  
  1048  func testHealthWatchSetServiceStatusBeforeStartingServer(t *testing.T, e env) {
  1049  	hs := health.NewServer()
  1050  	te := newTest(t, e)
  1051  	te.healthServer = hs
  1052  	hs.SetServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1053  	te.startServer(&testServer{security: e.security})
  1054  	defer te.tearDown()
  1055  
  1056  	stream, cf := newHealthCheckStream(t, te.clientConn(), defaultHealthService)
  1057  	defer cf()
  1058  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVING)
  1059  }
  1060  
  1061  // TestHealthWatchDefaultStatusChange verifies the simple case where the
  1062  // service starts off with a SERVICE_UNKNOWN status (because SetServingStatus
  1063  // hasn't been called yet) and then moves to SERVING after SetServingStatus is
  1064  // called.
  1065  func (s) TestHealthWatchDefaultStatusChange(t *testing.T) {
  1066  	for _, e := range listTestEnv() {
  1067  		testHealthWatchDefaultStatusChange(t, e)
  1068  	}
  1069  }
  1070  
  1071  func testHealthWatchDefaultStatusChange(t *testing.T, e env) {
  1072  	te := newTest(t, e)
  1073  	te.enableHealthServer = true
  1074  	te.startServer(&testServer{security: e.security})
  1075  	defer te.tearDown()
  1076  
  1077  	stream, cf := newHealthCheckStream(t, te.clientConn(), defaultHealthService)
  1078  	defer cf()
  1079  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVICE_UNKNOWN)
  1080  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1081  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVING)
  1082  }
  1083  
  1084  // TestHealthWatchSetServiceStatusBeforeClientCallsWatch verifies the case
  1085  // where the health status is set to SERVING before the client calls Watch().
  1086  func (s) TestHealthWatchSetServiceStatusBeforeClientCallsWatch(t *testing.T) {
  1087  	for _, e := range listTestEnv() {
  1088  		testHealthWatchSetServiceStatusBeforeClientCallsWatch(t, e)
  1089  	}
  1090  }
  1091  
  1092  func testHealthWatchSetServiceStatusBeforeClientCallsWatch(t *testing.T, e env) {
  1093  	te := newTest(t, e)
  1094  	te.enableHealthServer = true
  1095  	te.startServer(&testServer{security: e.security})
  1096  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1097  	defer te.tearDown()
  1098  
  1099  	stream, cf := newHealthCheckStream(t, te.clientConn(), defaultHealthService)
  1100  	defer cf()
  1101  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVING)
  1102  }
  1103  
  1104  // TestHealthWatchOverallServerHealthChange verifies setting the overall status
  1105  // of the server by using the empty service name.
  1106  func (s) TestHealthWatchOverallServerHealthChange(t *testing.T) {
  1107  	for _, e := range listTestEnv() {
  1108  		testHealthWatchOverallServerHealthChange(t, e)
  1109  	}
  1110  }
  1111  
  1112  func testHealthWatchOverallServerHealthChange(t *testing.T, e env) {
  1113  	te := newTest(t, e)
  1114  	te.enableHealthServer = true
  1115  	te.startServer(&testServer{security: e.security})
  1116  	defer te.tearDown()
  1117  
  1118  	stream, cf := newHealthCheckStream(t, te.clientConn(), "")
  1119  	defer cf()
  1120  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_SERVING)
  1121  	te.setHealthServingStatus("", healthpb.HealthCheckResponse_NOT_SERVING)
  1122  	healthWatchChecker(t, stream, healthpb.HealthCheckResponse_NOT_SERVING)
  1123  }
  1124  
  1125  // TestUnknownHandler verifies that an expected error is returned (by setting
  1126  // the unknownHandler on the server) for a service which is not exposed to the
  1127  // client.
  1128  func (s) TestUnknownHandler(t *testing.T) {
  1129  	// An example unknownHandler that returns a different code and a different
  1130  	// method, making sure that we do not expose what methods are implemented to
  1131  	// a client that is not authenticated.
  1132  	unknownHandler := func(any, grpc.ServerStream) error {
  1133  		return status.Error(codes.Unauthenticated, "user unauthenticated")
  1134  	}
  1135  	for _, e := range listTestEnv() {
  1136  		// TODO(bradfitz): Temporarily skip this env due to #619.
  1137  		if e.name == "handler-tls" {
  1138  			continue
  1139  		}
  1140  		testUnknownHandler(t, e, unknownHandler)
  1141  	}
  1142  }
  1143  
  1144  func testUnknownHandler(t *testing.T, e env, unknownHandler grpc.StreamHandler) {
  1145  	te := newTest(t, e)
  1146  	te.unknownHandler = unknownHandler
  1147  	te.startServer(&testServer{security: e.security})
  1148  	defer te.tearDown()
  1149  	verifyHealthCheckErrCode(t, 1*time.Second, te.clientConn(), "", codes.Unauthenticated)
  1150  }
  1151  
  1152  // TestHealthCheckServingStatus makes a streaming Watch() RPC on the health
  1153  // server and verifies a bunch of health status transitions.
  1154  func (s) TestHealthCheckServingStatus(t *testing.T) {
  1155  	for _, e := range listTestEnv() {
  1156  		testHealthCheckServingStatus(t, e)
  1157  	}
  1158  }
  1159  
  1160  func testHealthCheckServingStatus(t *testing.T, e env) {
  1161  	te := newTest(t, e)
  1162  	te.enableHealthServer = true
  1163  	te.startServer(&testServer{security: e.security})
  1164  	defer te.tearDown()
  1165  
  1166  	cc := te.clientConn()
  1167  	verifyHealthCheckStatus(t, 1*time.Second, cc, "", healthpb.HealthCheckResponse_SERVING)
  1168  	verifyHealthCheckErrCode(t, 1*time.Second, cc, defaultHealthService, codes.NotFound)
  1169  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1170  	verifyHealthCheckStatus(t, 1*time.Second, cc, defaultHealthService, healthpb.HealthCheckResponse_SERVING)
  1171  	te.setHealthServingStatus(defaultHealthService, healthpb.HealthCheckResponse_NOT_SERVING)
  1172  	verifyHealthCheckStatus(t, 1*time.Second, cc, defaultHealthService, healthpb.HealthCheckResponse_NOT_SERVING)
  1173  }
  1174  
  1175  // Test verifies that registering a nil health listener closes the health
  1176  // client.
  1177  func (s) TestHealthCheckUnregisterHealthListener(t *testing.T) {
  1178  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1179  	defer cancel()
  1180  	hcEnterChan, hcExitChan := setupHealthCheckWrapper(t)
  1181  	scChan := make(chan balancer.SubConn, 1)
  1182  	readyUpdateReceivedCh := make(chan struct{})
  1183  	bf := stub.BalancerFuncs{
  1184  		Init: func(bd *stub.BalancerData) {
  1185  			cc := bd.ClientConn
  1186  			ccw := &subConnStoringCCWrapper{
  1187  				ClientConn: cc,
  1188  				scChan:     scChan,
  1189  				stateListener: func(scs balancer.SubConnState) {
  1190  					if scs.ConnectivityState != connectivity.Ready {
  1191  						return
  1192  					}
  1193  					close(readyUpdateReceivedCh)
  1194  				},
  1195  			}
  1196  			bd.Data = balancer.Get(pickfirst.Name).Build(ccw, bd.BuildOptions)
  1197  		},
  1198  		Close: func(bd *stub.BalancerData) {
  1199  			bd.Data.(balancer.Balancer).Close()
  1200  		},
  1201  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
  1202  			return bd.Data.(balancer.Balancer).UpdateClientConnState(ccs)
  1203  		},
  1204  	}
  1205  
  1206  	stub.Register(t.Name(), bf)
  1207  	_, lis, ts := setupServer(t, nil)
  1208  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING)
  1209  
  1210  	_, r := setupClient(t, nil)
  1211  	svcCfg := fmt.Sprintf(`{
  1212  		"healthCheckConfig": {
  1213  			"serviceName": "foo"
  1214  		},
  1215  		"loadBalancingConfig": [{"%s":{}}]
  1216  	}`, t.Name())
  1217  	r.UpdateState(resolver.State{
  1218  		Addresses:     []resolver.Address{{Addr: lis.Addr().String()}},
  1219  		ServiceConfig: parseServiceConfig(t, r, svcCfg)})
  1220  
  1221  	var sc balancer.SubConn
  1222  	select {
  1223  	case sc = <-scChan:
  1224  	case <-ctx.Done():
  1225  		t.Fatal("Context timed out waiting for SubConn creation")
  1226  	}
  1227  
  1228  	// Wait for the SubConn to enter READY.
  1229  	select {
  1230  	case <-readyUpdateReceivedCh:
  1231  	case <-ctx.Done():
  1232  		t.Fatalf("Context timed out waiting for SubConn to enter READY")
  1233  	}
  1234  
  1235  	// Health check should start only after a health listener is registered.
  1236  	select {
  1237  	case <-hcEnterChan:
  1238  		t.Fatalf("Health service client created prematurely.")
  1239  	case <-time.After(defaultTestShortTimeout):
  1240  	}
  1241  
  1242  	// Register a health listener and verify it receives updates.
  1243  	healthChan := make(chan balancer.SubConnState, 1)
  1244  	sc.RegisterHealthListener(func(scs balancer.SubConnState) {
  1245  		healthChan <- scs
  1246  	})
  1247  
  1248  	select {
  1249  	case <-hcEnterChan:
  1250  	case <-ctx.Done():
  1251  		t.Fatalf("Context timed out waiting for health check to begin.")
  1252  	}
  1253  
  1254  	for readyReceived := false; !readyReceived; {
  1255  		select {
  1256  		case scs := <-healthChan:
  1257  			t.Logf("Received health update: %v", scs)
  1258  			readyReceived = scs.ConnectivityState == connectivity.Ready
  1259  		case <-ctx.Done():
  1260  			t.Fatalf("Context timed out waiting for healthy backend.")
  1261  		}
  1262  	}
  1263  
  1264  	// Registering a nil listener should invalidate the previously registered
  1265  	// listener and close the health service client.
  1266  	sc.RegisterHealthListener(nil)
  1267  	select {
  1268  	case <-hcExitChan:
  1269  	case <-ctx.Done():
  1270  		t.Fatalf("Context timed out waiting for the health client to close.")
  1271  	}
  1272  
  1273  	ts.SetServingStatus("foo", healthpb.HealthCheckResponse_NOT_SERVING)
  1274  
  1275  	// No updates should be received on the listener.
  1276  	select {
  1277  	case scs := <-healthChan:
  1278  		t.Fatalf("Received unexpected health update on the listener: %v", scs)
  1279  	case <-time.After(defaultTestShortTimeout):
  1280  	}
  1281  }