go.etcd.io/etcd@v3.3.27+incompatible/functional/tester/stresser_key.go (about)

     1  // Copyright 2018 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tester
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"reflect"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/coreos/etcd/clientv3"
    27  	"github.com/coreos/etcd/etcdserver"
    28  	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
    29  	"github.com/coreos/etcd/functional/rpcpb"
    30  
    31  	"go.uber.org/zap"
    32  	"golang.org/x/time/rate"
    33  	"google.golang.org/grpc"
    34  )
    35  
    36  type keyStresser struct {
    37  	stype rpcpb.Stresser
    38  	lg    *zap.Logger
    39  
    40  	m *rpcpb.Member
    41  
    42  	keySize           int
    43  	keyLargeSize      int
    44  	keySuffixRange    int
    45  	keyTxnSuffixRange int
    46  	keyTxnOps         int
    47  
    48  	rateLimiter *rate.Limiter
    49  
    50  	wg       sync.WaitGroup
    51  	clientsN int
    52  
    53  	ctx    context.Context
    54  	cancel func()
    55  	cli    *clientv3.Client
    56  
    57  	emu    sync.RWMutex
    58  	ems    map[string]int
    59  	paused bool
    60  
    61  	// atomicModifiedKeys records the number of keys created and deleted by the stresser.
    62  	atomicModifiedKeys int64
    63  
    64  	stressTable *stressTable
    65  }
    66  
    67  func (s *keyStresser) Stress() error {
    68  	var err error
    69  	s.cli, err = s.m.CreateEtcdClient(grpc.WithBackoffMaxDelay(1 * time.Second))
    70  	if err != nil {
    71  		return fmt.Errorf("%v (%q)", err, s.m.EtcdClientEndpoint)
    72  	}
    73  	s.ctx, s.cancel = context.WithCancel(context.Background())
    74  
    75  	s.wg.Add(s.clientsN)
    76  	var stressEntries = []stressEntry{
    77  		{weight: 0.7, f: newStressPut(s.cli, s.keySuffixRange, s.keySize)},
    78  		{
    79  			weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize),
    80  			f:      newStressPut(s.cli, s.keySuffixRange, s.keyLargeSize),
    81  		},
    82  		{weight: 0.07, f: newStressRange(s.cli, s.keySuffixRange)},
    83  		{weight: 0.07, f: newStressRangeInterval(s.cli, s.keySuffixRange)},
    84  		{weight: 0.07, f: newStressDelete(s.cli, s.keySuffixRange)},
    85  		{weight: 0.07, f: newStressDeleteInterval(s.cli, s.keySuffixRange)},
    86  	}
    87  	if s.keyTxnSuffixRange > 0 {
    88  		// adjust to make up ±70% of workloads with writes
    89  		stressEntries[0].weight = 0.35
    90  		stressEntries = append(stressEntries, stressEntry{
    91  			weight: 0.35,
    92  			f:      newStressTxn(s.cli, s.keyTxnSuffixRange, s.keyTxnOps),
    93  		})
    94  	}
    95  	s.stressTable = createStressTable(stressEntries)
    96  
    97  	s.emu.Lock()
    98  	s.paused = false
    99  	s.ems = make(map[string]int, 100)
   100  	s.emu.Unlock()
   101  	for i := 0; i < s.clientsN; i++ {
   102  		go s.run()
   103  	}
   104  
   105  	s.lg.Info(
   106  		"stress START",
   107  		zap.String("stress-type", s.stype.String()),
   108  		zap.String("endpoint", s.m.EtcdClientEndpoint),
   109  	)
   110  	return nil
   111  }
   112  
   113  func (s *keyStresser) run() {
   114  	defer s.wg.Done()
   115  
   116  	for {
   117  		if err := s.rateLimiter.Wait(s.ctx); err == context.Canceled {
   118  			return
   119  		}
   120  
   121  		// TODO: 10-second is enough timeout to cover leader failure
   122  		// and immediate leader election. Find out what other cases this
   123  		// could be timed out.
   124  		sctx, scancel := context.WithTimeout(s.ctx, 10*time.Second)
   125  		err, modifiedKeys := s.stressTable.choose()(sctx)
   126  		scancel()
   127  		if err == nil {
   128  			atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
   129  			continue
   130  		}
   131  
   132  		switch rpctypes.ErrorDesc(err) {
   133  		case context.DeadlineExceeded.Error():
   134  			// This retries when request is triggered at the same time as
   135  			// leader failure. When we terminate the leader, the request to
   136  			// that leader cannot be processed, and times out. Also requests
   137  			// to followers cannot be forwarded to the old leader, so timing out
   138  			// as well. We want to keep stressing until the cluster elects a
   139  			// new leader and start processing requests again.
   140  		case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error():
   141  			// This retries when request is triggered at the same time as
   142  			// leader failure and follower nodes receive time out errors
   143  			// from losing their leader. Followers should retry to connect
   144  			// to the new leader.
   145  		case etcdserver.ErrStopped.Error():
   146  			// one of the etcd nodes stopped from failure injection
   147  		// case transport.ErrConnClosing.Desc:
   148  		// 	// server closed the transport (failure injected node)
   149  		case rpctypes.ErrNotCapable.Error():
   150  			// capability check has not been done (in the beginning)
   151  		case rpctypes.ErrTooManyRequests.Error():
   152  			// hitting the recovering member.
   153  		case context.Canceled.Error():
   154  			// from stresser.Cancel method:
   155  			return
   156  		case grpc.ErrClientConnClosing.Error():
   157  			// from stresser.Cancel method:
   158  			return
   159  		default:
   160  			s.lg.Warn(
   161  				"stress run exiting",
   162  				zap.String("stress-type", s.stype.String()),
   163  				zap.String("endpoint", s.m.EtcdClientEndpoint),
   164  				zap.String("error-type", reflect.TypeOf(err).String()),
   165  				zap.Error(err),
   166  			)
   167  			return
   168  		}
   169  
   170  		// only record errors before pausing stressers
   171  		s.emu.Lock()
   172  		if !s.paused {
   173  			s.ems[err.Error()]++
   174  		}
   175  		s.emu.Unlock()
   176  	}
   177  }
   178  
   179  func (s *keyStresser) Pause() map[string]int {
   180  	return s.Close()
   181  }
   182  
   183  func (s *keyStresser) Close() map[string]int {
   184  	s.cancel()
   185  	s.cli.Close()
   186  	s.wg.Wait()
   187  
   188  	s.emu.Lock()
   189  	s.paused = true
   190  	ess := s.ems
   191  	s.ems = make(map[string]int, 100)
   192  	s.emu.Unlock()
   193  
   194  	s.lg.Info(
   195  		"stress STOP",
   196  		zap.String("stress-type", s.stype.String()),
   197  		zap.String("endpoint", s.m.EtcdClientEndpoint),
   198  	)
   199  	return ess
   200  }
   201  
   202  func (s *keyStresser) ModifiedKeys() int64 {
   203  	return atomic.LoadInt64(&s.atomicModifiedKeys)
   204  }
   205  
   206  type stressFunc func(ctx context.Context) (err error, modifiedKeys int64)
   207  
   208  type stressEntry struct {
   209  	weight float32
   210  	f      stressFunc
   211  }
   212  
   213  type stressTable struct {
   214  	entries    []stressEntry
   215  	sumWeights float32
   216  }
   217  
   218  func createStressTable(entries []stressEntry) *stressTable {
   219  	st := stressTable{entries: entries}
   220  	for _, entry := range st.entries {
   221  		st.sumWeights += entry.weight
   222  	}
   223  	return &st
   224  }
   225  
   226  func (st *stressTable) choose() stressFunc {
   227  	v := rand.Float32() * st.sumWeights
   228  	var sum float32
   229  	var idx int
   230  	for i := range st.entries {
   231  		sum += st.entries[i].weight
   232  		if sum >= v {
   233  			idx = i
   234  			break
   235  		}
   236  	}
   237  	return st.entries[idx].f
   238  }
   239  
   240  func newStressPut(cli *clientv3.Client, keySuffixRange, keySize int) stressFunc {
   241  	return func(ctx context.Context) (error, int64) {
   242  		_, err := cli.Put(
   243  			ctx,
   244  			fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)),
   245  			string(randBytes(keySize)),
   246  		)
   247  		return err, 1
   248  	}
   249  }
   250  
   251  func newStressTxn(cli *clientv3.Client, keyTxnSuffixRange, txnOps int) stressFunc {
   252  	keys := make([]string, keyTxnSuffixRange)
   253  	for i := range keys {
   254  		keys[i] = fmt.Sprintf("/k%03d", i)
   255  	}
   256  	return writeTxn(cli, keys, txnOps)
   257  }
   258  
   259  func writeTxn(cli *clientv3.Client, keys []string, txnOps int) stressFunc {
   260  	return func(ctx context.Context) (error, int64) {
   261  		ks := make(map[string]struct{}, txnOps)
   262  		for len(ks) != txnOps {
   263  			ks[keys[rand.Intn(len(keys))]] = struct{}{}
   264  		}
   265  		selected := make([]string, 0, txnOps)
   266  		for k := range ks {
   267  			selected = append(selected, k)
   268  		}
   269  		com, delOp, putOp := getTxnOps(selected[0], "bar00")
   270  		thenOps := []clientv3.Op{delOp}
   271  		elseOps := []clientv3.Op{putOp}
   272  		for i := 1; i < txnOps; i++ { // nested txns
   273  			k, v := selected[i], fmt.Sprintf("bar%02d", i)
   274  			com, delOp, putOp = getTxnOps(k, v)
   275  			txnOp := clientv3.OpTxn(
   276  				[]clientv3.Cmp{com},
   277  				[]clientv3.Op{delOp},
   278  				[]clientv3.Op{putOp},
   279  			)
   280  			thenOps = append(thenOps, txnOp)
   281  			elseOps = append(elseOps, txnOp)
   282  		}
   283  		_, err := cli.Txn(ctx).
   284  			If(com).
   285  			Then(thenOps...).
   286  			Else(elseOps...).
   287  			Commit()
   288  		return err, int64(txnOps)
   289  	}
   290  }
   291  
   292  func getTxnOps(k, v string) (
   293  	cmp clientv3.Cmp,
   294  	dop clientv3.Op,
   295  	pop clientv3.Op) {
   296  	// if key exists (version > 0)
   297  	cmp = clientv3.Compare(clientv3.Version(k), ">", 0)
   298  	dop = clientv3.OpDelete(k)
   299  	pop = clientv3.OpPut(k, v)
   300  	return cmp, dop, pop
   301  }
   302  
   303  func newStressRange(cli *clientv3.Client, keySuffixRange int) stressFunc {
   304  	return func(ctx context.Context) (error, int64) {
   305  		_, err := cli.Get(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)))
   306  		return err, 0
   307  	}
   308  }
   309  
   310  func newStressRangeInterval(cli *clientv3.Client, keySuffixRange int) stressFunc {
   311  	return func(ctx context.Context) (error, int64) {
   312  		start := rand.Intn(keySuffixRange)
   313  		end := start + 500
   314  		_, err := cli.Get(
   315  			ctx,
   316  			fmt.Sprintf("foo%016x", start),
   317  			clientv3.WithRange(fmt.Sprintf("foo%016x", end)),
   318  		)
   319  		return err, 0
   320  	}
   321  }
   322  
   323  func newStressDelete(cli *clientv3.Client, keySuffixRange int) stressFunc {
   324  	return func(ctx context.Context) (error, int64) {
   325  		_, err := cli.Delete(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)))
   326  		return err, 1
   327  	}
   328  }
   329  
   330  func newStressDeleteInterval(cli *clientv3.Client, keySuffixRange int) stressFunc {
   331  	return func(ctx context.Context) (error, int64) {
   332  		start := rand.Intn(keySuffixRange)
   333  		end := start + 500
   334  		resp, err := cli.Delete(ctx,
   335  			fmt.Sprintf("foo%016x", start),
   336  			clientv3.WithRange(fmt.Sprintf("foo%016x", end)),
   337  		)
   338  		if err == nil {
   339  			return nil, resp.Deleted
   340  		}
   341  		return err, 0
   342  	}
   343  }