github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/functional/tester/stresser_key.go (about)

     1  // Copyright 2018 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tester
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"reflect"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/lfch/etcd-io/api/v3/v3rpc/rpctypes"
    27  	"github.com/lfch/etcd-io/client/v3"
    28  	"github.com/lfch/etcd-io/raft/v3"
    29  	"github.com/lfch/etcd-io/server/v3/etcdserver/errors"
    30  	"github.com/lfch/etcd-io/tests/v3/functional/rpcpb"
    31  
    32  	"go.uber.org/zap"
    33  	"golang.org/x/time/rate"
    34  	"google.golang.org/grpc"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/status"
    37  )
    38  
    39  type keyStresser struct {
    40  	lg *zap.Logger
    41  
    42  	m *rpcpb.Member
    43  
    44  	weightKVWriteSmall     float64
    45  	weightKVWriteLarge     float64
    46  	weightKVReadOneKey     float64
    47  	weightKVReadRange      float64
    48  	weightKVDeleteOneKey   float64
    49  	weightKVDeleteRange    float64
    50  	weightKVTxnWriteDelete float64
    51  
    52  	keySize           int
    53  	keyLargeSize      int
    54  	keySuffixRange    int
    55  	keyTxnSuffixRange int
    56  	keyTxnOps         int
    57  
    58  	rateLimiter *rate.Limiter
    59  
    60  	wg       sync.WaitGroup
    61  	clientsN int
    62  
    63  	ctx    context.Context
    64  	cancel func()
    65  	cli    *clientv3.Client
    66  
    67  	emu    sync.RWMutex
    68  	ems    map[string]int
    69  	paused bool
    70  
    71  	// atomicModifiedKeys records the number of keys created and deleted by the stresser.
    72  	atomicModifiedKeys int64
    73  
    74  	stressTable *stressTable
    75  }
    76  
    77  func (s *keyStresser) Stress() error {
    78  	var err error
    79  	s.cli, err = s.m.CreateEtcdClient(grpc.WithBackoffMaxDelay(1 * time.Second))
    80  	if err != nil {
    81  		return fmt.Errorf("%v (%q)", err, s.m.EtcdClientEndpoint)
    82  	}
    83  	s.ctx, s.cancel = context.WithCancel(context.Background())
    84  
    85  	s.wg.Add(s.clientsN)
    86  
    87  	s.stressTable = createStressTable([]stressEntry{
    88  		{weight: s.weightKVWriteSmall, f: newStressPut(s.cli, s.keySuffixRange, s.keySize)},
    89  		{weight: s.weightKVWriteLarge, f: newStressPut(s.cli, s.keySuffixRange, s.keyLargeSize)},
    90  		{weight: s.weightKVReadOneKey, f: newStressRange(s.cli, s.keySuffixRange)},
    91  		{weight: s.weightKVReadRange, f: newStressRangeInterval(s.cli, s.keySuffixRange)},
    92  		{weight: s.weightKVDeleteOneKey, f: newStressDelete(s.cli, s.keySuffixRange)},
    93  		{weight: s.weightKVDeleteRange, f: newStressDeleteInterval(s.cli, s.keySuffixRange)},
    94  		{weight: s.weightKVTxnWriteDelete, f: newStressTxn(s.cli, s.keyTxnSuffixRange, s.keyTxnOps)},
    95  	})
    96  
    97  	s.emu.Lock()
    98  	s.paused = false
    99  	s.ems = make(map[string]int, 100)
   100  	s.emu.Unlock()
   101  	for i := 0; i < s.clientsN; i++ {
   102  		go s.run()
   103  	}
   104  
   105  	s.lg.Info(
   106  		"stress START",
   107  		zap.String("stress-type", "KV"),
   108  		zap.String("endpoint", s.m.EtcdClientEndpoint),
   109  	)
   110  	return nil
   111  }
   112  
   113  func (s *keyStresser) run() {
   114  	defer s.wg.Done()
   115  
   116  	for {
   117  		if err := s.rateLimiter.Wait(s.ctx); err == context.Canceled {
   118  			return
   119  		}
   120  
   121  		// TODO: 10-second is enough timeout to cover leader failure
   122  		// and immediate leader election. Find out what other cases this
   123  		// could be timed out.
   124  		sctx, scancel := context.WithTimeout(s.ctx, 10*time.Second)
   125  		modifiedKeys, err := s.stressTable.choose()(sctx)
   126  		scancel()
   127  		if err == nil {
   128  			atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
   129  			continue
   130  		}
   131  
   132  		if !s.isRetryableError(err) {
   133  			return
   134  		}
   135  
   136  		// only record errors before pausing stressers
   137  		s.emu.Lock()
   138  		if !s.paused {
   139  			s.ems[err.Error()]++
   140  		}
   141  		s.emu.Unlock()
   142  	}
   143  }
   144  
   145  func (s *keyStresser) isRetryableError(err error) bool {
   146  	switch rpctypes.ErrorDesc(err) {
   147  	// retryable
   148  	case context.DeadlineExceeded.Error():
   149  		// This retries when request is triggered at the same time as
   150  		// leader failure. When we terminate the leader, the request to
   151  		// that leader cannot be processed, and times out. Also requests
   152  		// to followers cannot be forwarded to the old leader, so timing out
   153  		// as well. We want to keep stressing until the cluster elects a
   154  		// new leader and start processing requests again.
   155  		return true
   156  	case errors.ErrTimeoutDueToLeaderFail.Error(), errors.ErrTimeout.Error():
   157  		// This retries when request is triggered at the same time as
   158  		// leader failure and follower nodes receive time out errors
   159  		// from losing their leader. Followers should retry to connect
   160  		// to the new leader.
   161  		return true
   162  	case errors.ErrStopped.Error():
   163  		// one of the etcd nodes stopped from failure injection
   164  		return true
   165  	case rpctypes.ErrNotCapable.Error():
   166  		// capability check has not been done (in the beginning)
   167  		return true
   168  	case rpctypes.ErrTooManyRequests.Error():
   169  		// hitting the recovering member.
   170  		return true
   171  	case raft.ErrProposalDropped.Error():
   172  		// removed member, or leadership has changed (old leader got raftpb.MsgProp)
   173  		return true
   174  
   175  	// not retryable.
   176  	case context.Canceled.Error():
   177  		// from stresser.Cancel method:
   178  		return false
   179  	}
   180  
   181  	if status.Convert(err).Code() == codes.Unavailable {
   182  		// gRPC connection errors are translated to status.Unavailable
   183  		return true
   184  	}
   185  
   186  	s.lg.Warn(
   187  		"stress run exiting",
   188  		zap.String("stress-type", "KV"),
   189  		zap.String("endpoint", s.m.EtcdClientEndpoint),
   190  		zap.String("error-type", reflect.TypeOf(err).String()),
   191  		zap.String("error-desc", rpctypes.ErrorDesc(err)),
   192  		zap.Error(err),
   193  	)
   194  	return false
   195  }
   196  
   197  func (s *keyStresser) Pause() map[string]int {
   198  	return s.Close()
   199  }
   200  
   201  func (s *keyStresser) Close() map[string]int {
   202  	s.cancel()
   203  	s.cli.Close()
   204  	s.wg.Wait()
   205  
   206  	s.emu.Lock()
   207  	s.paused = true
   208  	ess := s.ems
   209  	s.ems = make(map[string]int, 100)
   210  	s.emu.Unlock()
   211  
   212  	s.lg.Info(
   213  		"stress STOP",
   214  		zap.String("stress-type", "KV"),
   215  		zap.String("endpoint", s.m.EtcdClientEndpoint),
   216  	)
   217  	return ess
   218  }
   219  
   220  func (s *keyStresser) ModifiedKeys() int64 {
   221  	return atomic.LoadInt64(&s.atomicModifiedKeys)
   222  }
   223  
   224  type stressFunc func(ctx context.Context) (modifiedKeys int64, err error)
   225  
   226  type stressEntry struct {
   227  	weight float64
   228  	f      stressFunc
   229  }
   230  
   231  type stressTable struct {
   232  	entries    []stressEntry
   233  	sumWeights float64
   234  }
   235  
   236  func createStressTable(entries []stressEntry) *stressTable {
   237  	st := stressTable{entries: entries}
   238  	for _, entry := range st.entries {
   239  		st.sumWeights += entry.weight
   240  	}
   241  	return &st
   242  }
   243  
   244  func (st *stressTable) choose() stressFunc {
   245  	v := rand.Float64() * st.sumWeights
   246  	var sum float64
   247  	var idx int
   248  	for i := range st.entries {
   249  		sum += st.entries[i].weight
   250  		if sum >= v {
   251  			idx = i
   252  			break
   253  		}
   254  	}
   255  	return st.entries[idx].f
   256  }
   257  
   258  func newStressPut(cli *clientv3.Client, keySuffixRange, keySize int) stressFunc {
   259  	return func(ctx context.Context) (int64, error) {
   260  		_, err := cli.Put(
   261  			ctx,
   262  			fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)),
   263  			string(randBytes(keySize)),
   264  		)
   265  		return 1, err
   266  	}
   267  }
   268  
   269  func newStressTxn(cli *clientv3.Client, keyTxnSuffixRange, txnOps int) stressFunc {
   270  	keys := make([]string, keyTxnSuffixRange)
   271  	for i := range keys {
   272  		keys[i] = fmt.Sprintf("/k%03d", i)
   273  	}
   274  	return writeTxn(cli, keys, txnOps)
   275  }
   276  
   277  func writeTxn(cli *clientv3.Client, keys []string, txnOps int) stressFunc {
   278  	return func(ctx context.Context) (int64, error) {
   279  		ks := make(map[string]struct{}, txnOps)
   280  		for len(ks) != txnOps {
   281  			ks[keys[rand.Intn(len(keys))]] = struct{}{}
   282  		}
   283  		selected := make([]string, 0, txnOps)
   284  		for k := range ks {
   285  			selected = append(selected, k)
   286  		}
   287  		com, delOp, putOp := getTxnOps(selected[0], "bar00")
   288  		thenOps := []clientv3.Op{delOp}
   289  		elseOps := []clientv3.Op{putOp}
   290  		for i := 1; i < txnOps; i++ { // nested txns
   291  			k, v := selected[i], fmt.Sprintf("bar%02d", i)
   292  			com, delOp, putOp = getTxnOps(k, v)
   293  			txnOp := clientv3.OpTxn(
   294  				[]clientv3.Cmp{com},
   295  				[]clientv3.Op{delOp},
   296  				[]clientv3.Op{putOp},
   297  			)
   298  			thenOps = append(thenOps, txnOp)
   299  			elseOps = append(elseOps, txnOp)
   300  		}
   301  		_, err := cli.Txn(ctx).
   302  			If(com).
   303  			Then(thenOps...).
   304  			Else(elseOps...).
   305  			Commit()
   306  		return int64(txnOps), err
   307  	}
   308  }
   309  
   310  func getTxnOps(k, v string) (
   311  	cmp clientv3.Cmp,
   312  	dop clientv3.Op,
   313  	pop clientv3.Op) {
   314  	// if key exists (version > 0)
   315  	cmp = clientv3.Compare(clientv3.Version(k), ">", 0)
   316  	dop = clientv3.OpDelete(k)
   317  	pop = clientv3.OpPut(k, v)
   318  	return cmp, dop, pop
   319  }
   320  
   321  func newStressRange(cli *clientv3.Client, keySuffixRange int) stressFunc {
   322  	return func(ctx context.Context) (int64, error) {
   323  		_, err := cli.Get(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)))
   324  		return 0, err
   325  	}
   326  }
   327  
   328  func newStressRangeInterval(cli *clientv3.Client, keySuffixRange int) stressFunc {
   329  	return func(ctx context.Context) (int64, error) {
   330  		start := rand.Intn(keySuffixRange)
   331  		end := start + 500
   332  		_, err := cli.Get(
   333  			ctx,
   334  			fmt.Sprintf("foo%016x", start),
   335  			clientv3.WithRange(fmt.Sprintf("foo%016x", end)),
   336  		)
   337  		return 0, err
   338  	}
   339  }
   340  
   341  func newStressDelete(cli *clientv3.Client, keySuffixRange int) stressFunc {
   342  	return func(ctx context.Context) (int64, error) {
   343  		_, err := cli.Delete(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)))
   344  		return 1, err
   345  	}
   346  }
   347  
   348  func newStressDeleteInterval(cli *clientv3.Client, keySuffixRange int) stressFunc {
   349  	return func(ctx context.Context) (int64, error) {
   350  		start := rand.Intn(keySuffixRange)
   351  		end := start + 500
   352  		resp, err := cli.Delete(ctx,
   353  			fmt.Sprintf("foo%016x", start),
   354  			clientv3.WithRange(fmt.Sprintf("foo%016x", end)),
   355  		)
   356  		if err == nil {
   357  			return resp.Deleted, nil
   358  		}
   359  		return 0, err
   360  	}
   361  }