go.etcd.io/etcd@v3.3.27+incompatible/functional/tester/stresser_key.go (about) 1 // Copyright 2018 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tester 16 17 import ( 18 "context" 19 "fmt" 20 "math/rand" 21 "reflect" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/coreos/etcd/clientv3" 27 "github.com/coreos/etcd/etcdserver" 28 "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" 29 "github.com/coreos/etcd/functional/rpcpb" 30 31 "go.uber.org/zap" 32 "golang.org/x/time/rate" 33 "google.golang.org/grpc" 34 ) 35 36 type keyStresser struct { 37 stype rpcpb.Stresser 38 lg *zap.Logger 39 40 m *rpcpb.Member 41 42 keySize int 43 keyLargeSize int 44 keySuffixRange int 45 keyTxnSuffixRange int 46 keyTxnOps int 47 48 rateLimiter *rate.Limiter 49 50 wg sync.WaitGroup 51 clientsN int 52 53 ctx context.Context 54 cancel func() 55 cli *clientv3.Client 56 57 emu sync.RWMutex 58 ems map[string]int 59 paused bool 60 61 // atomicModifiedKeys records the number of keys created and deleted by the stresser. 62 atomicModifiedKeys int64 63 64 stressTable *stressTable 65 } 66 67 func (s *keyStresser) Stress() error { 68 var err error 69 s.cli, err = s.m.CreateEtcdClient(grpc.WithBackoffMaxDelay(1 * time.Second)) 70 if err != nil { 71 return fmt.Errorf("%v (%q)", err, s.m.EtcdClientEndpoint) 72 } 73 s.ctx, s.cancel = context.WithCancel(context.Background()) 74 75 s.wg.Add(s.clientsN) 76 var stressEntries = []stressEntry{ 77 {weight: 0.7, f: newStressPut(s.cli, s.keySuffixRange, s.keySize)}, 78 { 79 weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize), 80 f: newStressPut(s.cli, s.keySuffixRange, s.keyLargeSize), 81 }, 82 {weight: 0.07, f: newStressRange(s.cli, s.keySuffixRange)}, 83 {weight: 0.07, f: newStressRangeInterval(s.cli, s.keySuffixRange)}, 84 {weight: 0.07, f: newStressDelete(s.cli, s.keySuffixRange)}, 85 {weight: 0.07, f: newStressDeleteInterval(s.cli, s.keySuffixRange)}, 86 } 87 if s.keyTxnSuffixRange > 0 { 88 // adjust to make up ±70% of workloads with writes 89 stressEntries[0].weight = 0.35 90 stressEntries = append(stressEntries, stressEntry{ 91 weight: 0.35, 92 f: newStressTxn(s.cli, s.keyTxnSuffixRange, s.keyTxnOps), 93 }) 94 } 95 s.stressTable = createStressTable(stressEntries) 96 97 s.emu.Lock() 98 s.paused = false 99 s.ems = make(map[string]int, 100) 100 s.emu.Unlock() 101 for i := 0; i < s.clientsN; i++ { 102 go s.run() 103 } 104 105 s.lg.Info( 106 "stress START", 107 zap.String("stress-type", s.stype.String()), 108 zap.String("endpoint", s.m.EtcdClientEndpoint), 109 ) 110 return nil 111 } 112 113 func (s *keyStresser) run() { 114 defer s.wg.Done() 115 116 for { 117 if err := s.rateLimiter.Wait(s.ctx); err == context.Canceled { 118 return 119 } 120 121 // TODO: 10-second is enough timeout to cover leader failure 122 // and immediate leader election. Find out what other cases this 123 // could be timed out. 124 sctx, scancel := context.WithTimeout(s.ctx, 10*time.Second) 125 err, modifiedKeys := s.stressTable.choose()(sctx) 126 scancel() 127 if err == nil { 128 atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys) 129 continue 130 } 131 132 switch rpctypes.ErrorDesc(err) { 133 case context.DeadlineExceeded.Error(): 134 // This retries when request is triggered at the same time as 135 // leader failure. When we terminate the leader, the request to 136 // that leader cannot be processed, and times out. Also requests 137 // to followers cannot be forwarded to the old leader, so timing out 138 // as well. We want to keep stressing until the cluster elects a 139 // new leader and start processing requests again. 140 case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error(): 141 // This retries when request is triggered at the same time as 142 // leader failure and follower nodes receive time out errors 143 // from losing their leader. Followers should retry to connect 144 // to the new leader. 145 case etcdserver.ErrStopped.Error(): 146 // one of the etcd nodes stopped from failure injection 147 // case transport.ErrConnClosing.Desc: 148 // // server closed the transport (failure injected node) 149 case rpctypes.ErrNotCapable.Error(): 150 // capability check has not been done (in the beginning) 151 case rpctypes.ErrTooManyRequests.Error(): 152 // hitting the recovering member. 153 case context.Canceled.Error(): 154 // from stresser.Cancel method: 155 return 156 case grpc.ErrClientConnClosing.Error(): 157 // from stresser.Cancel method: 158 return 159 default: 160 s.lg.Warn( 161 "stress run exiting", 162 zap.String("stress-type", s.stype.String()), 163 zap.String("endpoint", s.m.EtcdClientEndpoint), 164 zap.String("error-type", reflect.TypeOf(err).String()), 165 zap.Error(err), 166 ) 167 return 168 } 169 170 // only record errors before pausing stressers 171 s.emu.Lock() 172 if !s.paused { 173 s.ems[err.Error()]++ 174 } 175 s.emu.Unlock() 176 } 177 } 178 179 func (s *keyStresser) Pause() map[string]int { 180 return s.Close() 181 } 182 183 func (s *keyStresser) Close() map[string]int { 184 s.cancel() 185 s.cli.Close() 186 s.wg.Wait() 187 188 s.emu.Lock() 189 s.paused = true 190 ess := s.ems 191 s.ems = make(map[string]int, 100) 192 s.emu.Unlock() 193 194 s.lg.Info( 195 "stress STOP", 196 zap.String("stress-type", s.stype.String()), 197 zap.String("endpoint", s.m.EtcdClientEndpoint), 198 ) 199 return ess 200 } 201 202 func (s *keyStresser) ModifiedKeys() int64 { 203 return atomic.LoadInt64(&s.atomicModifiedKeys) 204 } 205 206 type stressFunc func(ctx context.Context) (err error, modifiedKeys int64) 207 208 type stressEntry struct { 209 weight float32 210 f stressFunc 211 } 212 213 type stressTable struct { 214 entries []stressEntry 215 sumWeights float32 216 } 217 218 func createStressTable(entries []stressEntry) *stressTable { 219 st := stressTable{entries: entries} 220 for _, entry := range st.entries { 221 st.sumWeights += entry.weight 222 } 223 return &st 224 } 225 226 func (st *stressTable) choose() stressFunc { 227 v := rand.Float32() * st.sumWeights 228 var sum float32 229 var idx int 230 for i := range st.entries { 231 sum += st.entries[i].weight 232 if sum >= v { 233 idx = i 234 break 235 } 236 } 237 return st.entries[idx].f 238 } 239 240 func newStressPut(cli *clientv3.Client, keySuffixRange, keySize int) stressFunc { 241 return func(ctx context.Context) (error, int64) { 242 _, err := cli.Put( 243 ctx, 244 fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)), 245 string(randBytes(keySize)), 246 ) 247 return err, 1 248 } 249 } 250 251 func newStressTxn(cli *clientv3.Client, keyTxnSuffixRange, txnOps int) stressFunc { 252 keys := make([]string, keyTxnSuffixRange) 253 for i := range keys { 254 keys[i] = fmt.Sprintf("/k%03d", i) 255 } 256 return writeTxn(cli, keys, txnOps) 257 } 258 259 func writeTxn(cli *clientv3.Client, keys []string, txnOps int) stressFunc { 260 return func(ctx context.Context) (error, int64) { 261 ks := make(map[string]struct{}, txnOps) 262 for len(ks) != txnOps { 263 ks[keys[rand.Intn(len(keys))]] = struct{}{} 264 } 265 selected := make([]string, 0, txnOps) 266 for k := range ks { 267 selected = append(selected, k) 268 } 269 com, delOp, putOp := getTxnOps(selected[0], "bar00") 270 thenOps := []clientv3.Op{delOp} 271 elseOps := []clientv3.Op{putOp} 272 for i := 1; i < txnOps; i++ { // nested txns 273 k, v := selected[i], fmt.Sprintf("bar%02d", i) 274 com, delOp, putOp = getTxnOps(k, v) 275 txnOp := clientv3.OpTxn( 276 []clientv3.Cmp{com}, 277 []clientv3.Op{delOp}, 278 []clientv3.Op{putOp}, 279 ) 280 thenOps = append(thenOps, txnOp) 281 elseOps = append(elseOps, txnOp) 282 } 283 _, err := cli.Txn(ctx). 284 If(com). 285 Then(thenOps...). 286 Else(elseOps...). 287 Commit() 288 return err, int64(txnOps) 289 } 290 } 291 292 func getTxnOps(k, v string) ( 293 cmp clientv3.Cmp, 294 dop clientv3.Op, 295 pop clientv3.Op) { 296 // if key exists (version > 0) 297 cmp = clientv3.Compare(clientv3.Version(k), ">", 0) 298 dop = clientv3.OpDelete(k) 299 pop = clientv3.OpPut(k, v) 300 return cmp, dop, pop 301 } 302 303 func newStressRange(cli *clientv3.Client, keySuffixRange int) stressFunc { 304 return func(ctx context.Context) (error, int64) { 305 _, err := cli.Get(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))) 306 return err, 0 307 } 308 } 309 310 func newStressRangeInterval(cli *clientv3.Client, keySuffixRange int) stressFunc { 311 return func(ctx context.Context) (error, int64) { 312 start := rand.Intn(keySuffixRange) 313 end := start + 500 314 _, err := cli.Get( 315 ctx, 316 fmt.Sprintf("foo%016x", start), 317 clientv3.WithRange(fmt.Sprintf("foo%016x", end)), 318 ) 319 return err, 0 320 } 321 } 322 323 func newStressDelete(cli *clientv3.Client, keySuffixRange int) stressFunc { 324 return func(ctx context.Context) (error, int64) { 325 _, err := cli.Delete(ctx, fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))) 326 return err, 1 327 } 328 } 329 330 func newStressDeleteInterval(cli *clientv3.Client, keySuffixRange int) stressFunc { 331 return func(ctx context.Context) (error, int64) { 332 start := rand.Intn(keySuffixRange) 333 end := start + 500 334 resp, err := cli.Delete(ctx, 335 fmt.Sprintf("foo%016x", start), 336 clientv3.WithRange(fmt.Sprintf("foo%016x", end)), 337 ) 338 if err == nil { 339 return nil, resp.Deleted 340 } 341 return err, 0 342 } 343 }