github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/functional/tester/stresser_key.go (about) 1 // Copyright 2018 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tester 16 17 import ( 18 "context" 19 "fmt" 20 "math/rand" 21 "reflect" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/lfch/etcd-io/api/v3/v3rpc/rpctypes" 27 "github.com/lfch/etcd-io/client/v3" 28 "github.com/lfch/etcd-io/raft/v3" 29 "github.com/lfch/etcd-io/server/v3/etcdserver/errors" 30 "github.com/lfch/etcd-io/tests/v3/functional/rpcpb" 31 32 "go.uber.org/zap" 33 "golang.org/x/time/rate" 34 "google.golang.org/grpc" 35 "google.golang.org/grpc/codes" 36 "google.golang.org/grpc/status" 37 ) 38 39 type keyStresser struct { 40 lg *zap.Logger 41 42 m *rpcpb.Member 43 44 weightKVWriteSmall float64 45 weightKVWriteLarge float64 46 weightKVReadOneKey float64 47 weightKVReadRange float64 48 weightKVDeleteOneKey float64 49 weightKVDeleteRange float64 50 weightKVTxnWriteDelete float64 51 52 keySize int 53 keyLargeSize int 54 keySuffixRange int 55 keyTxnSuffixRange int 56 keyTxnOps int 57 58 rateLimiter *rate.Limiter 59 60 wg sync.WaitGroup 61 clientsN int 62 63 ctx context.Context 64 cancel func() 65 cli *clientv3.Client 66 67 emu sync.RWMutex 68 ems map[string]int 69 paused bool 70 71 // atomicModifiedKeys records the number of keys created and deleted by the stresser. 72 atomicModifiedKeys int64 73 74 stressTable *stressTable 75 } 76 77 func (s *keyStresser) Stress() error { 78 var err error 79 s.cli, err = s.m.CreateEtcdClient(grpc.WithBackoffMaxDelay(1 * time.Second)) 80 if err != nil { 81 return fmt.Errorf("%v (%q)", err, s.m.EtcdClientEndpoint) 82 } 83 s.ctx, s.cancel = context.WithCancel(context.Background()) 84 85 s.wg.Add(s.clientsN) 86 87 s.stressTable = createStressTable([]stressEntry{ 88 {weight: s.weightKVWriteSmall, f: newStressPut(s.cli, s.keySuffixRange, s.keySize)}, 89 {weight: s.weightKVWriteLarge, f: newStressPut(s.cli, s.keySuffixRange, s.keyLargeSize)}, 90 {weight: s.weightKVReadOneKey, f: newStressRange(s.cli, s.keySuffixRange)}, 91 {weight: s.weightKVReadRange, f: newStressRangeInterval(s.cli, s.keySuffixRange)}, 92 {weight: s.weightKVDeleteOneKey, f: newStressDelete(s.cli, s.keySuffixRange)}, 93 {weight: s.weightKVDeleteRange, f: newStressDeleteInterval(s.cli, s.keySuffixRange)}, 94 {weight: s.weightKVTxnWriteDelete, f: newStressTxn(s.cli, s.keyTxnSuffixRange, s.keyTxnOps)}, 95 }) 96 97 s.emu.Lock() 98 s.paused = false 99 s.ems = make(map[string]int, 100) 100 s.emu.Unlock() 101 for i := 0; i < s.clientsN; i++ { 102 go s.run() 103 } 104 105 s.lg.Info( 106 "stress START", 107 zap.String("stress-type", "KV"), 108 zap.String("endpoint", s.m.EtcdClientEndpoint), 109 ) 110 return nil 111 } 112 113 func (s *keyStresser) run() { 114 defer s.wg.Done() 115 116 for { 117 if err := s.rateLimiter.Wait(s.ctx); err == context.Canceled { 118 return 119 } 120 121 // TODO: 10-second is enough timeout to cover leader failure 122 // and immediate leader election. Find out what other cases this 123 // could be timed out. 124 sctx, scancel := context.WithTimeout(s.ctx, 10*time.Second) 125 modifiedKeys, err := s.stressTable.choose()(sctx) 126 scancel() 127 if err == nil { 128 atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys) 129 continue 130 } 131 132 if !s.isRetryableError(err) { 133 return 134 } 135 136 // only record errors before pausing stressers 137 s.emu.Lock() 138 if !s.paused { 139 s.ems[err.Error()]++ 140 } 141 s.emu.Unlock() 142 } 143 } 144 145 func (s *keyStresser) isRetryableError(err error) bool { 146 switch rpctypes.ErrorDesc(err) { 147 // retryable 148 case context.DeadlineExceeded.Error(): 149 // This retries when request is triggered at the same time as 150 // leader failure. When we terminate the leader, the request to 151 // that leader cannot be processed, and times out. Also requests 152 // to followers cannot be forwarded to the old leader, so timing out 153 // as well. We want to keep stressing until the cluster elects a 154 // new leader and start processing requests again. 155 return true 156 case errors.ErrTimeoutDueToLeaderFail.Error(), errors.ErrTimeout.Error(): 157 // This retries when request is triggered at the same time as 158 // leader failure and follower nodes receive time out errors 159 // from losing their leader. Followers should retry to connect 160 // to the new leader. 161 return true 162 case errors.ErrStopped.Error(): 163 // one of the etcd nodes stopped from failure injection 164 return true 165 case rpctypes.ErrNotCapable.Error(): 166 // capability check has not been done (in the beginning) 167 return true 168 case rpctypes.ErrTooManyRequests.Error(): 169 // hitting the recovering member. 170 return true 171 case raft.ErrProposalDropped.Error(): 172 // removed member, or leadership has changed (old leader got raftpb.MsgProp) 173 return true 174 175 // not retryable. 176 case context.Canceled.Error(): 177 // from stresser.Cancel method: 178 return false 179 } 180 181 if status.Convert(err).Code() == codes.Unavailable { 182 // gRPC connection errors are translated to status.Unavailable 183 return true 184 } 185 186 s.lg.Warn( 187 "stress run exiting", 188 zap.String("stress-type", "KV"), 189 zap.String("endpoint", s.m.EtcdClientEndpoint), 190 zap.String("error-type", reflect.TypeOf(err).String()), 191 zap.String("error-desc", rpctypes.ErrorDesc(err)), 192 zap.Error(err), 193 ) 194 return false 195 } 196 197 func (s *keyStresser) Pause() map[string]int { 198 return s.Close() 199 } 200 201 func (s *keyStresser) Close() map[string]int { 202 s.cancel() 203 s.cli.Close() 204 s.wg.Wait() 205 206 s.emu.Lock() 207 s.paused = true 208 ess := s.ems 209 s.ems = make(map[string]int, 100) 210 s.emu.Unlock() 211 212 s.lg.Info( 213 "stress STOP", 214 zap.String("stress-type", "KV"), 215 zap.String("endpoint", s.m.EtcdClientEndpoint), 216 ) 217 return ess 218 } 219 220 func (s *keyStresser) ModifiedKeys() int64 { 221 return atomic.LoadInt64(&s.atomicModifiedKeys) 222 } 223 224 type stressFunc func(ctx context.Context) (modifiedKeys int64, err error) 225 226 type stressEntry struct { 227 weight float64 228 f stressFunc 229 } 230 231 type stressTable struct { 232 entries []stressEntry 233 sumWeights float64 234 } 235 236 func createStressTable(entries []stressEntry) *stressTable { 237 st := stressTable{entries: entries} 238 for _, entry := range st.entries { 239 st.sumWeights += entry.weight 240 } 241 return &st 242 } 243 244 func (st *stressTable) choose() stressFunc { 245 v := rand.Float64() * st.sumWeights 246 var sum float64 247 var idx int 248 for i := range st.entries { 249 sum += st.entries[i].weight 250 if sum >= v { 251 idx = i 252 break 253 } 254 } 255 return st.entries[idx].f 256 } 257 258 func newStressPut(cli *clientv3.Client, keySuffixRange, keySize int) stressFunc { 259 return func(ctx context.Context) (int64, error) { 260 _, err := cli.Put( 261 ctx, 262 fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)), 263 string(randBytes(keySize)), 264 ) 265 return 1, err 266 } 267 } 268 269 func newStressTxn(cli *clientv3.Client, keyTxnSuffixRange, txnOps int) stressFunc { 270 keys := make([]string, keyTxnSuffixRange) 271 for i := range keys { 272 keys[i] = fmt.Sprintf("/k%03d", i) 273 } 274 return writeTxn(cli, keys, txnOps) 275 } 276 277 func writeTxn(cli *clientv3.Client, keys []string, txnOps int) stressFunc { 278 return func(ctx context.Context) (int64, error) { 279 ks := make(map[string]struct{}, txnOps) 280 for len(ks) != txnOps { 281 ks[keys[rand.Intn(len(keys))]] = struct{}{} 282 } 283 selected := make([]string, 0, txnOps) 284 for k := range ks { 285 selected = append(selected, k) 286 } 287 com, delOp, putOp := getTxnOps(selected[0], "bar00") 288 thenOps := []clientv3.Op{delOp} 289 elseOps := []clientv3.Op{putOp} 290 for i := 1; i < txnOps; i++ { // nested txns 291 k, v := selected[i], fmt.Sprintf("bar%02d", i) 292 com, delOp, putOp = getTxnOps(k, v) 293 txnOp := clientv3.OpTxn( 294 []clientv3.Cmp{com}, 295 []clientv3.Op{delOp}, 296 []clientv3.Op{putOp}, 297 ) 298 thenOps = append(thenOps, txnOp) 299 elseOps = append(elseOps, txnOp) 300 } 301 _, err := cli.Txn(ctx). 302 If(com). 303 Then(thenOps...). 304 Else(elseOps...). 305 Commit() 306 return int64(txnOps), err 307 } 308 } 309 310 func getTxnOps(k, v string) ( 311 cmp clientv3.Cmp, 312 dop clientv3.Op, 313 pop clientv3.Op) { 314 // if key exists (version > 0) 315 cmp = clientv3.Compare(clientv3.Version(k), ">", 0) 316 dop = clientv3.OpDelete(k) 317 pop = clientv3.OpPut(k, v) 318 return cmp, dop, pop 319 } 320 321 func newStressRange(cli *clientv3.Client, keySuffixRange int) stressFunc { 322 return func(ctx context.Context) (int64, error) { 323 _, err := cli.Get(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))) 324 return 0, err 325 } 326 } 327 328 func newStressRangeInterval(cli *clientv3.Client, keySuffixRange int) stressFunc { 329 return func(ctx context.Context) (int64, error) { 330 start := rand.Intn(keySuffixRange) 331 end := start + 500 332 _, err := cli.Get( 333 ctx, 334 fmt.Sprintf("foo%016x", start), 335 clientv3.WithRange(fmt.Sprintf("foo%016x", end)), 336 ) 337 return 0, err 338 } 339 } 340 341 func newStressDelete(cli *clientv3.Client, keySuffixRange int) stressFunc { 342 return func(ctx context.Context) (int64, error) { 343 _, err := cli.Delete(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))) 344 return 1, err 345 } 346 } 347 348 func newStressDeleteInterval(cli *clientv3.Client, keySuffixRange int) stressFunc { 349 return func(ctx context.Context) (int64, error) { 350 start := rand.Intn(keySuffixRange) 351 end := start + 500 352 resp, err := cli.Delete(ctx, 353 fmt.Sprintf("foo%016x", start), 354 clientv3.WithRange(fmt.Sprintf("foo%016x", end)), 355 ) 356 if err == nil { 357 return resp.Deleted, nil 358 } 359 return 0, err 360 } 361 }