go.etcd.io/etcd@v3.3.27+incompatible/clientv3/integration/black_hole_test.go (about)

     1  // Copyright 2017 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build !cluster_proxy
    16  
    17  package integration
    18  
    19  import (
    20  	"context"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/coreos/etcd/clientv3"
    25  	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
    26  	"github.com/coreos/etcd/integration"
    27  	"github.com/coreos/etcd/pkg/testutil"
    28  	"google.golang.org/grpc"
    29  )
    30  
    31  // TestBalancerUnderBlackholeKeepAliveWatch tests when watch discovers it cannot talk to
    32  // blackholed endpoint, client balancer switches to healthy one.
    33  // TODO: test server-to-client keepalive ping
    34  func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
    35  	defer testutil.AfterTest(t)
    36  
    37  	clus := integration.NewClusterV3(t, &integration.ClusterConfig{
    38  		Size:                 2,
    39  		GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings
    40  	})
    41  	defer clus.Terminate(t)
    42  
    43  	eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
    44  
    45  	ccfg := clientv3.Config{
    46  		Endpoints:            []string{eps[0]},
    47  		DialTimeout:          time.Second,
    48  		DialOptions:          []grpc.DialOption{grpc.WithBlock()},
    49  		DialKeepAliveTime:    time.Second,
    50  		DialKeepAliveTimeout: 500 * time.Millisecond,
    51  	}
    52  
    53  	// gRPC internal implementation related.
    54  	pingInterval := ccfg.DialKeepAliveTime + ccfg.DialKeepAliveTimeout
    55  	// 3s for slow machine to process watch and reset connections
    56  	// TODO: only send healthy endpoint to gRPC so gRPC wont waste time to
    57  	// dial for unhealthy endpoint.
    58  	// then we can reduce 3s to 1s.
    59  	timeout := pingInterval + integration.RequestWaitTimeout
    60  
    61  	cli, err := clientv3.New(ccfg)
    62  	if err != nil {
    63  		t.Fatal(err)
    64  	}
    65  	defer cli.Close()
    66  
    67  	wch := cli.Watch(context.Background(), "foo", clientv3.WithCreatedNotify())
    68  	if _, ok := <-wch; !ok {
    69  		t.Fatalf("watch failed on creation")
    70  	}
    71  
    72  	// endpoint can switch to eps[1] when it detects the failure of eps[0]
    73  	cli.SetEndpoints(eps...)
    74  
    75  	// give enough time for balancer resolution
    76  	time.Sleep(5 * time.Second)
    77  
    78  	clus.Members[0].Blackhole()
    79  
    80  	if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil {
    81  		t.Fatal(err)
    82  	}
    83  	select {
    84  	case <-wch:
    85  	case <-time.After(timeout):
    86  		t.Error("took too long to receive watch events")
    87  	}
    88  
    89  	clus.Members[0].Unblackhole()
    90  
    91  	// waiting for moving eps[0] out of unhealthy, so that it can be re-pined.
    92  	time.Sleep(ccfg.DialTimeout)
    93  
    94  	clus.Members[1].Blackhole()
    95  
    96  	// make sure client[0] can connect to eps[0] after remove the blackhole.
    97  	if _, err = clus.Client(0).Get(context.TODO(), "foo"); err != nil {
    98  		t.Fatal(err)
    99  	}
   100  	if _, err = clus.Client(0).Put(context.TODO(), "foo", "bar1"); err != nil {
   101  		t.Fatal(err)
   102  	}
   103  
   104  	select {
   105  	case <-wch:
   106  	case <-time.After(timeout):
   107  		t.Error("took too long to receive watch events")
   108  	}
   109  }
   110  
   111  func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
   112  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   113  		_, err := cli.Put(ctx, "foo", "bar")
   114  		if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   115  			return errExpected
   116  		}
   117  		return err
   118  	})
   119  }
   120  
   121  func TestBalancerUnderBlackholeNoKeepAliveDelete(t *testing.T) {
   122  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   123  		_, err := cli.Delete(ctx, "foo")
   124  		if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   125  			return errExpected
   126  		}
   127  		return err
   128  	})
   129  }
   130  
   131  func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
   132  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   133  		_, err := cli.Txn(ctx).
   134  			If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
   135  			Then(clientv3.OpPut("foo", "bar")).
   136  			Else(clientv3.OpPut("foo", "baz")).Commit()
   137  		if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   138  			return errExpected
   139  		}
   140  		return err
   141  	})
   142  }
   143  
   144  func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
   145  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   146  		_, err := cli.Get(ctx, "a")
   147  		if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   148  			return errExpected
   149  		}
   150  		return err
   151  	})
   152  }
   153  
   154  func TestBalancerUnderBlackholeNoKeepAliveSerializableGet(t *testing.T) {
   155  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   156  		_, err := cli.Get(ctx, "a", clientv3.WithSerializable())
   157  		if isClientTimeout(err) || isServerCtxTimeout(err) {
   158  			return errExpected
   159  		}
   160  		return err
   161  	})
   162  }
   163  
   164  // testBalancerUnderBlackholeNoKeepAlive ensures that first request to blackholed endpoint
   165  // fails due to context timeout, but succeeds on next try, with endpoint switch.
   166  func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Client, context.Context) error) {
   167  	defer testutil.AfterTest(t)
   168  
   169  	clus := integration.NewClusterV3(t, &integration.ClusterConfig{
   170  		Size:               2,
   171  		SkipCreatingClient: true,
   172  	})
   173  	defer clus.Terminate(t)
   174  
   175  	eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
   176  
   177  	ccfg := clientv3.Config{
   178  		Endpoints:   []string{eps[0]},
   179  		DialTimeout: 1 * time.Second,
   180  		DialOptions: []grpc.DialOption{grpc.WithBlock()},
   181  	}
   182  	cli, err := clientv3.New(ccfg)
   183  	if err != nil {
   184  		t.Fatal(err)
   185  	}
   186  	defer cli.Close()
   187  
   188  	// wait for eps[0] to be pinned
   189  	mustWaitPinReady(t, cli)
   190  
   191  	// add all eps to list, so that when the original pined one fails
   192  	// the client can switch to other available eps
   193  	cli.SetEndpoints(eps...)
   194  
   195  	// blackhole eps[0]
   196  	clus.Members[0].Blackhole()
   197  
   198  	// With round robin balancer, client will make a request to a healthy endpoint
   199  	// within a few requests.
   200  	// TODO: first operation can succeed
   201  	// when gRPC supports better retry on non-delivered request
   202  	for i := 0; i < 5; i++ {
   203  		ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
   204  		err = op(cli, ctx)
   205  		cancel()
   206  		if err == nil {
   207  			break
   208  		} else if err == errExpected {
   209  			t.Logf("#%d: current error %v", i, err)
   210  		} else {
   211  			t.Errorf("#%d: failed with error %v", i, err)
   212  		}
   213  	}
   214  	if err != nil {
   215  		t.Fatal(err)
   216  	}
   217  }