github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/integration/clientv3/connectivity/black_hole_test.go (about)

     1  // Copyright 2017 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build !cluster_proxy
    16  // +build !cluster_proxy
    17  
    18  package connectivity_test
    19  
    20  import (
    21  	"context"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/lfch/etcd-io/api/v3/v3rpc/rpctypes"
    26  	"github.com/lfch/etcd-io/client/v3"
    27  	integration2 "github.com/lfch/etcd-io/tests/v3/framework/integration"
    28  	"github.com/lfch/etcd-io/tests/v3/integration/clientv3"
    29  	"google.golang.org/grpc"
    30  )
    31  
    32  // TestBalancerUnderBlackholeKeepAliveWatch tests when watch discovers it cannot talk to
    33  // blackholed endpoint, client balancer switches to healthy one.
    34  // TODO: test server-to-client keepalive ping
    35  func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
    36  	integration2.BeforeTest(t)
    37  
    38  	clus := integration2.NewCluster(t, &integration2.ClusterConfig{
    39  		Size:                 2,
    40  		GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings
    41  		UseBridge:            true,
    42  	})
    43  	defer clus.Terminate(t)
    44  
    45  	eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()}
    46  
    47  	ccfg := clientv3.Config{
    48  		Endpoints:            []string{eps[0]},
    49  		DialTimeout:          time.Second,
    50  		DialOptions:          []grpc.DialOption{grpc.WithBlock()},
    51  		DialKeepAliveTime:    time.Second,
    52  		DialKeepAliveTimeout: 500 * time.Millisecond,
    53  	}
    54  
    55  	// gRPC internal implementation related.
    56  	pingInterval := ccfg.DialKeepAliveTime + ccfg.DialKeepAliveTimeout
    57  	// 3s for slow machine to process watch and reset connections
    58  	// TODO: only send healthy endpoint to gRPC so gRPC wont waste time to
    59  	// dial for unhealthy endpoint.
    60  	// then we can reduce 3s to 1s.
    61  	timeout := pingInterval + integration2.RequestWaitTimeout
    62  
    63  	cli, err := integration2.NewClient(t, ccfg)
    64  	if err != nil {
    65  		t.Fatal(err)
    66  	}
    67  	defer cli.Close()
    68  
    69  	wch := cli.Watch(context.Background(), "foo", clientv3.WithCreatedNotify())
    70  	if _, ok := <-wch; !ok {
    71  		t.Fatalf("watch failed on creation")
    72  	}
    73  
    74  	// endpoint can switch to eps[1] when it detects the failure of eps[0]
    75  	cli.SetEndpoints(eps...)
    76  
    77  	// give enough time for balancer resolution
    78  	time.Sleep(5 * time.Second)
    79  
    80  	clus.Members[0].Bridge().Blackhole()
    81  
    82  	if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil {
    83  		t.Fatal(err)
    84  	}
    85  	select {
    86  	case <-wch:
    87  	case <-time.After(timeout):
    88  		t.Error("took too long to receive watch events")
    89  	}
    90  
    91  	clus.Members[0].Bridge().Unblackhole()
    92  
    93  	// waiting for moving eps[0] out of unhealthy, so that it can be re-pined.
    94  	time.Sleep(ccfg.DialTimeout)
    95  
    96  	clus.Members[1].Bridge().Blackhole()
    97  
    98  	// make sure client[0] can connect to eps[0] after remove the blackhole.
    99  	if _, err = clus.Client(0).Get(context.TODO(), "foo"); err != nil {
   100  		t.Fatal(err)
   101  	}
   102  	if _, err = clus.Client(0).Put(context.TODO(), "foo", "bar1"); err != nil {
   103  		t.Fatal(err)
   104  	}
   105  
   106  	select {
   107  	case <-wch:
   108  	case <-time.After(timeout):
   109  		t.Error("took too long to receive watch events")
   110  	}
   111  }
   112  
   113  func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
   114  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   115  		_, err := cli.Put(ctx, "foo", "bar")
   116  		if clientv3test.IsClientTimeout(err) || clientv3test.IsServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   117  			return errExpected
   118  		}
   119  		return err
   120  	})
   121  }
   122  
   123  func TestBalancerUnderBlackholeNoKeepAliveDelete(t *testing.T) {
   124  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   125  		_, err := cli.Delete(ctx, "foo")
   126  		if clientv3test.IsClientTimeout(err) || clientv3test.IsServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   127  			return errExpected
   128  		}
   129  		return err
   130  	})
   131  }
   132  
   133  func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
   134  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   135  		_, err := cli.Txn(ctx).
   136  			If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
   137  			Then(clientv3.OpPut("foo", "bar")).
   138  			Else(clientv3.OpPut("foo", "baz")).Commit()
   139  		if clientv3test.IsClientTimeout(err) || clientv3test.IsServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   140  			return errExpected
   141  		}
   142  		return err
   143  	})
   144  }
   145  
   146  func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
   147  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   148  		_, err := cli.Get(ctx, "a")
   149  		if clientv3test.IsClientTimeout(err) || clientv3test.IsServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
   150  			return errExpected
   151  		}
   152  		return err
   153  	})
   154  }
   155  
   156  func TestBalancerUnderBlackholeNoKeepAliveSerializableGet(t *testing.T) {
   157  	testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
   158  		_, err := cli.Get(ctx, "a", clientv3.WithSerializable())
   159  		if clientv3test.IsClientTimeout(err) || clientv3test.IsServerCtxTimeout(err) {
   160  			return errExpected
   161  		}
   162  		return err
   163  	})
   164  }
   165  
   166  // testBalancerUnderBlackholeNoKeepAlive ensures that first request to blackholed endpoint
   167  // fails due to context timeout, but succeeds on next try, with endpoint switch.
   168  func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Client, context.Context) error) {
   169  	integration2.BeforeTest(t)
   170  
   171  	clus := integration2.NewCluster(t, &integration2.ClusterConfig{
   172  		Size:      2,
   173  		UseBridge: true,
   174  	})
   175  	defer clus.Terminate(t)
   176  
   177  	eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()}
   178  
   179  	ccfg := clientv3.Config{
   180  		Endpoints:   []string{eps[0]},
   181  		DialTimeout: 1 * time.Second,
   182  		DialOptions: []grpc.DialOption{grpc.WithBlock()},
   183  	}
   184  	cli, err := integration2.NewClient(t, ccfg)
   185  	if err != nil {
   186  		t.Fatal(err)
   187  	}
   188  	defer cli.Close()
   189  
   190  	// wait for eps[0] to be pinned
   191  	clientv3test.MustWaitPinReady(t, cli)
   192  
   193  	// add all eps to list, so that when the original pined one fails
   194  	// the client can switch to other available eps
   195  	cli.SetEndpoints(eps...)
   196  
   197  	// blackhole eps[0]
   198  	clus.Members[0].Bridge().Blackhole()
   199  
   200  	// With round robin balancer, client will make a request to a healthy endpoint
   201  	// within a few requests.
   202  	// TODO: first operation can succeed
   203  	// when gRPC supports better retry on non-delivered request
   204  	for i := 0; i < 5; i++ {
   205  		ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
   206  		err = op(cli, ctx)
   207  		cancel()
   208  		if err == nil {
   209  			break
   210  		} else if err == errExpected {
   211  			t.Logf("#%d: current error %v", i, err)
   212  		} else {
   213  			t.Errorf("#%d: failed with error %v", i, err)
   214  		}
   215  	}
   216  	if err != nil {
   217  		t.Fatal(err)
   218  	}
   219  }