github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/functional/tester/stresser_lease.go (about) 1 // Copyright 2018 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tester 16 17 import ( 18 "context" 19 "fmt" 20 "math/rand" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/lfch/etcd-io/api/v3/v3rpc/rpctypes" 26 "github.com/lfch/etcd-io/client/v3" 27 "github.com/lfch/etcd-io/tests/v3/functional/rpcpb" 28 29 "go.uber.org/zap" 30 "golang.org/x/time/rate" 31 "google.golang.org/grpc" 32 ) 33 34 const ( 35 // time to live for lease 36 defaultTTL = 120 37 defaultTTLShort = 2 38 ) 39 40 type leaseStresser struct { 41 stype rpcpb.StresserType 42 lg *zap.Logger 43 44 m *rpcpb.Member 45 cli *clientv3.Client 46 ctx context.Context 47 cancel func() 48 49 rateLimiter *rate.Limiter 50 // atomicModifiedKey records the number of keys created and deleted during a test case 51 atomicModifiedKey int64 52 numLeases int 53 keysPerLease int 54 aliveLeases *atomicLeases 55 alivedLeasesWithShortTTL *atomicLeases 56 revokedLeases *atomicLeases 57 shortLivedLeases *atomicLeases 58 59 runWg sync.WaitGroup 60 aliveWg sync.WaitGroup 61 } 62 63 type atomicLeases struct { 64 // rwLock is used to protect read/write access of leases map 65 // which are accessed and modified by different goroutines. 66 rwLock sync.RWMutex 67 leases map[int64]time.Time 68 } 69 70 func (al *atomicLeases) add(leaseID int64, t time.Time) { 71 al.rwLock.Lock() 72 al.leases[leaseID] = t 73 al.rwLock.Unlock() 74 } 75 76 func (al *atomicLeases) update(leaseID int64, t time.Time) { 77 al.rwLock.Lock() 78 _, ok := al.leases[leaseID] 79 if ok { 80 al.leases[leaseID] = t 81 } 82 al.rwLock.Unlock() 83 } 84 85 func (al *atomicLeases) read(leaseID int64) (rv time.Time, ok bool) { 86 al.rwLock.RLock() 87 rv, ok = al.leases[leaseID] 88 al.rwLock.RUnlock() 89 return rv, ok 90 } 91 92 func (al *atomicLeases) remove(leaseID int64) { 93 al.rwLock.Lock() 94 delete(al.leases, leaseID) 95 al.rwLock.Unlock() 96 } 97 98 func (al *atomicLeases) getLeasesMap() map[int64]time.Time { 99 leasesCopy := make(map[int64]time.Time) 100 al.rwLock.RLock() 101 for k, v := range al.leases { 102 leasesCopy[k] = v 103 } 104 al.rwLock.RUnlock() 105 return leasesCopy 106 } 107 108 func (ls *leaseStresser) setupOnce() error { 109 if ls.aliveLeases != nil { 110 return nil 111 } 112 if ls.numLeases == 0 { 113 panic("expect numLeases to be set") 114 } 115 if ls.keysPerLease == 0 { 116 panic("expect keysPerLease to be set") 117 } 118 119 ls.aliveLeases = &atomicLeases{leases: make(map[int64]time.Time)} 120 return nil 121 } 122 123 func (ls *leaseStresser) Stress() error { 124 ls.lg.Info( 125 "stress START", 126 zap.String("stress-type", ls.stype.String()), 127 zap.String("endpoint", ls.m.EtcdClientEndpoint), 128 ) 129 130 if err := ls.setupOnce(); err != nil { 131 return err 132 } 133 134 ctx, cancel := context.WithCancel(context.Background()) 135 ls.ctx = ctx 136 ls.cancel = cancel 137 138 cli, err := ls.m.CreateEtcdClient(grpc.WithBackoffMaxDelay(1 * time.Second)) 139 if err != nil { 140 return fmt.Errorf("%v (%s)", err, ls.m.EtcdClientEndpoint) 141 } 142 ls.cli = cli 143 144 ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)} 145 ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)} 146 ls.alivedLeasesWithShortTTL = &atomicLeases{leases: make(map[int64]time.Time)} 147 148 ls.runWg.Add(1) 149 go ls.run() 150 return nil 151 } 152 153 func (ls *leaseStresser) run() { 154 defer ls.runWg.Done() 155 ls.restartKeepAlives() 156 for { 157 // the number of keys created and deleted is roughly 2x the number of created keys for an iteration. 158 // the rateLimiter therefore consumes 2x ls.numLeases*ls.keysPerLease tokens where each token represents a create/delete operation for key. 159 err := ls.rateLimiter.WaitN(ls.ctx, 2*ls.numLeases*ls.keysPerLease) 160 if err == context.Canceled { 161 return 162 } 163 164 ls.lg.Debug( 165 "stress creating leases", 166 zap.String("stress-type", ls.stype.String()), 167 zap.String("endpoint", ls.m.EtcdClientEndpoint), 168 ) 169 ls.createLeases() 170 ls.lg.Debug( 171 "stress created leases", 172 zap.String("stress-type", ls.stype.String()), 173 zap.String("endpoint", ls.m.EtcdClientEndpoint), 174 ) 175 176 ls.lg.Debug( 177 "stress dropped leases", 178 zap.String("stress-type", ls.stype.String()), 179 zap.String("endpoint", ls.m.EtcdClientEndpoint), 180 ) 181 ls.randomlyDropLeases() 182 ls.lg.Debug( 183 "stress dropped leases", 184 zap.String("stress-type", ls.stype.String()), 185 zap.String("endpoint", ls.m.EtcdClientEndpoint), 186 ) 187 } 188 } 189 190 func (ls *leaseStresser) restartKeepAlives() { 191 for leaseID := range ls.aliveLeases.getLeasesMap() { 192 ls.aliveWg.Add(1) 193 go func(id int64) { 194 ls.keepLeaseAlive(id) 195 }(leaseID) 196 } 197 for leaseID := range ls.alivedLeasesWithShortTTL.getLeasesMap() { 198 ls.aliveWg.Add(1) 199 go func(id int64) { 200 ls.keepLeaseAlive(id) 201 }(leaseID) 202 } 203 } 204 205 func (ls *leaseStresser) createLeases() { 206 ls.createAliveLeasesWithShortTTL() 207 ls.createAliveLeases() 208 ls.createShortLivedLeases() 209 } 210 211 func (ls *leaseStresser) createAliveLeases() { 212 neededLeases := ls.numLeases - len(ls.aliveLeases.getLeasesMap()) 213 var wg sync.WaitGroup 214 for i := 0; i < neededLeases; i++ { 215 wg.Add(1) 216 go func() { 217 defer wg.Done() 218 leaseID, err := ls.createLeaseWithKeys(defaultTTL) 219 if err != nil { 220 ls.lg.Debug( 221 "createLeaseWithKeys failed", 222 zap.String("endpoint", ls.m.EtcdClientEndpoint), 223 zap.Error(err), 224 ) 225 return 226 } 227 ls.aliveLeases.add(leaseID, time.Now()) 228 // keep track of all the keep lease alive goroutines 229 ls.aliveWg.Add(1) 230 go ls.keepLeaseAlive(leaseID) 231 }() 232 } 233 wg.Wait() 234 } 235 236 func (ls *leaseStresser) createAliveLeasesWithShortTTL() { 237 neededLeases := 2 238 var wg sync.WaitGroup 239 for i := 0; i < neededLeases; i++ { 240 wg.Add(1) 241 go func() { 242 defer wg.Done() 243 leaseID, err := ls.createLeaseWithKeys(defaultTTLShort) 244 if err != nil { 245 ls.lg.Debug( 246 "createLeaseWithKeys failed", 247 zap.String("endpoint", ls.m.EtcdClientEndpoint), 248 zap.Error(err), 249 ) 250 return 251 } 252 ls.lg.Debug("createAliveLeasesWithShortTTL", zap.Int64("lease-id", leaseID)) 253 ls.alivedLeasesWithShortTTL.add(leaseID, time.Now()) 254 // keep track of all the keep lease alive goroutines 255 ls.aliveWg.Add(1) 256 go ls.keepLeaseAlive(leaseID) 257 }() 258 } 259 wg.Wait() 260 } 261 262 func (ls *leaseStresser) createShortLivedLeases() { 263 // one round of createLeases() might not create all the short lived leases we want due to failures. 264 // thus, we want to create remaining short lived leases in the future round. 265 neededLeases := ls.numLeases - len(ls.shortLivedLeases.getLeasesMap()) 266 var wg sync.WaitGroup 267 for i := 0; i < neededLeases; i++ { 268 wg.Add(1) 269 go func() { 270 defer wg.Done() 271 leaseID, err := ls.createLeaseWithKeys(defaultTTLShort) 272 if err != nil { 273 return 274 } 275 ls.shortLivedLeases.add(leaseID, time.Now()) 276 }() 277 } 278 wg.Wait() 279 } 280 281 func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) { 282 leaseID, err := ls.createLease(ttl) 283 if err != nil { 284 ls.lg.Debug( 285 "createLease failed", 286 zap.String("stress-type", ls.stype.String()), 287 zap.String("endpoint", ls.m.EtcdClientEndpoint), 288 zap.Error(err), 289 ) 290 return -1, err 291 } 292 293 ls.lg.Debug( 294 "createLease created lease", 295 zap.String("stress-type", ls.stype.String()), 296 zap.String("endpoint", ls.m.EtcdClientEndpoint), 297 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 298 ) 299 if err := ls.attachKeysWithLease(leaseID); err != nil { 300 return -1, err 301 } 302 return leaseID, nil 303 } 304 305 func (ls *leaseStresser) randomlyDropLeases() { 306 var wg sync.WaitGroup 307 for l := range ls.aliveLeases.getLeasesMap() { 308 wg.Add(1) 309 go func(leaseID int64) { 310 defer wg.Done() 311 dropped, err := ls.randomlyDropLease(leaseID) 312 // if randomlyDropLease encountered an error such as context is cancelled, remove the lease from aliveLeases 313 // because we can't tell whether the lease is dropped or not. 314 if err != nil { 315 ls.lg.Debug( 316 "randomlyDropLease failed", 317 zap.String("endpoint", ls.m.EtcdClientEndpoint), 318 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 319 zap.Error(err), 320 ) 321 ls.aliveLeases.remove(leaseID) 322 return 323 } 324 if !dropped { 325 return 326 } 327 ls.lg.Debug( 328 "randomlyDropLease dropped a lease", 329 zap.String("stress-type", ls.stype.String()), 330 zap.String("endpoint", ls.m.EtcdClientEndpoint), 331 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 332 ) 333 ls.revokedLeases.add(leaseID, time.Now()) 334 ls.aliveLeases.remove(leaseID) 335 }(l) 336 } 337 wg.Wait() 338 } 339 340 func (ls *leaseStresser) createLease(ttl int64) (int64, error) { 341 resp, err := ls.cli.Grant(ls.ctx, ttl) 342 if err != nil { 343 return -1, err 344 } 345 return int64(resp.ID), nil 346 } 347 348 func (ls *leaseStresser) keepLeaseAlive(leaseID int64) { 349 defer ls.aliveWg.Done() 350 ctx, cancel := context.WithCancel(ls.ctx) 351 stream, err := ls.cli.KeepAlive(ctx, clientv3.LeaseID(leaseID)) 352 defer func() { cancel() }() 353 for { 354 select { 355 case <-time.After(500 * time.Millisecond): 356 case <-ls.ctx.Done(): 357 ls.lg.Debug( 358 "keepLeaseAlive context canceled", 359 zap.String("stress-type", ls.stype.String()), 360 zap.String("endpoint", ls.m.EtcdClientEndpoint), 361 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 362 zap.Error(ls.ctx.Err()), 363 ) 364 // it is possible that lease expires at invariant checking phase but not at keepLeaseAlive() phase. 365 // this scenario is possible when alive lease is just about to expire when keepLeaseAlive() exists and expires at invariant checking phase. 366 // to circumvent that scenario, we check each lease before keepalive loop exist to see if it has been renewed in last TTL/2 duration. 367 // if it is renewed, this means that invariant checking have at least ttl/2 time before lease expires which is long enough for the checking to finish. 368 // if it is not renewed, we remove the lease from the alive map so that the lease doesn't expire during invariant checking 369 renewTime, ok := ls.aliveLeases.read(leaseID) 370 if ok && renewTime.Add(defaultTTL/2*time.Second).Before(time.Now()) { 371 ls.aliveLeases.remove(leaseID) 372 ls.lg.Debug( 373 "keepLeaseAlive lease has not been renewed, dropped it", 374 zap.String("stress-type", ls.stype.String()), 375 zap.String("endpoint", ls.m.EtcdClientEndpoint), 376 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 377 ) 378 } 379 return 380 } 381 382 if err != nil { 383 ls.lg.Debug( 384 "keepLeaseAlive lease creates stream error", 385 zap.String("stress-type", ls.stype.String()), 386 zap.String("endpoint", ls.m.EtcdClientEndpoint), 387 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 388 zap.Error(err), 389 ) 390 cancel() 391 ctx, cancel = context.WithCancel(ls.ctx) 392 stream, err = ls.cli.KeepAlive(ctx, clientv3.LeaseID(leaseID)) 393 cancel() 394 continue 395 } 396 if err != nil { 397 ls.lg.Debug( 398 "keepLeaseAlive failed to receive lease keepalive response", 399 zap.String("stress-type", ls.stype.String()), 400 zap.String("endpoint", ls.m.EtcdClientEndpoint), 401 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 402 zap.Error(err), 403 ) 404 continue 405 } 406 407 ls.lg.Debug( 408 "keepLeaseAlive waiting on lease stream", 409 zap.String("stress-type", ls.stype.String()), 410 zap.String("endpoint", ls.m.EtcdClientEndpoint), 411 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 412 ) 413 leaseRenewTime := time.Now() 414 respRC := <-stream 415 if respRC == nil { 416 ls.lg.Debug( 417 "keepLeaseAlive received nil lease keepalive response", 418 zap.String("stress-type", ls.stype.String()), 419 zap.String("endpoint", ls.m.EtcdClientEndpoint), 420 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 421 ) 422 continue 423 } 424 425 // lease expires after TTL become 0 426 // don't send keepalive if the lease has expired 427 if respRC.TTL <= 0 { 428 ls.lg.Debug( 429 "keepLeaseAlive stream received lease keepalive response TTL <= 0", 430 zap.String("stress-type", ls.stype.String()), 431 zap.String("endpoint", ls.m.EtcdClientEndpoint), 432 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 433 zap.Int64("ttl", respRC.TTL), 434 ) 435 ls.aliveLeases.remove(leaseID) 436 return 437 } 438 // renew lease timestamp only if lease is present 439 ls.lg.Debug( 440 "keepLeaseAlive renewed a lease", 441 zap.String("stress-type", ls.stype.String()), 442 zap.String("endpoint", ls.m.EtcdClientEndpoint), 443 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 444 ) 445 ls.aliveLeases.update(leaseID, leaseRenewTime) 446 } 447 } 448 449 // attachKeysWithLease function attaches keys to the lease. 450 // the format of key is the concat of leaseID + '_' + '<order of key creation>' 451 // e.g 5186835655248304152_0 for first created key and 5186835655248304152_1 for second created key 452 func (ls *leaseStresser) attachKeysWithLease(leaseID int64) error { 453 var txnPuts []clientv3.Op 454 for j := 0; j < ls.keysPerLease; j++ { 455 txnput := clientv3.OpPut( 456 fmt.Sprintf("%d%s%d", leaseID, "_", j), 457 fmt.Sprintf("bar"), 458 clientv3.WithLease(clientv3.LeaseID(leaseID)), 459 ) 460 txnPuts = append(txnPuts, txnput) 461 } 462 // keep retrying until lease is not found or ctx is being canceled 463 for ls.ctx.Err() == nil { 464 _, err := ls.cli.Txn(ls.ctx).Then(txnPuts...).Commit() 465 if err == nil { 466 // since all created keys will be deleted too, the number of operations on keys will be roughly 2x the number of created keys 467 atomic.AddInt64(&ls.atomicModifiedKey, 2*int64(ls.keysPerLease)) 468 return nil 469 } 470 if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound { 471 return err 472 } 473 } 474 return ls.ctx.Err() 475 } 476 477 // randomlyDropLease drops the lease only when the rand.Int(2) returns 1. 478 // This creates a 50/50 percents chance of dropping a lease 479 func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) { 480 if rand.Intn(2) != 0 { 481 return false, nil 482 } 483 484 // keep retrying until a lease is dropped or ctx is being canceled 485 for ls.ctx.Err() == nil { 486 _, err := ls.cli.Revoke(ls.ctx, clientv3.LeaseID(leaseID)) 487 if err == nil || rpctypes.Error(err) == rpctypes.ErrLeaseNotFound { 488 return true, nil 489 } 490 } 491 492 ls.lg.Debug( 493 "randomlyDropLease error", 494 zap.String("stress-type", ls.stype.String()), 495 zap.String("endpoint", ls.m.EtcdClientEndpoint), 496 zap.String("lease-id", fmt.Sprintf("%016x", leaseID)), 497 zap.Error(ls.ctx.Err()), 498 ) 499 return false, ls.ctx.Err() 500 } 501 502 func (ls *leaseStresser) Pause() map[string]int { 503 return ls.Close() 504 } 505 506 func (ls *leaseStresser) Close() map[string]int { 507 ls.cancel() 508 ls.runWg.Wait() 509 ls.aliveWg.Wait() 510 ls.cli.Close() 511 ls.lg.Info( 512 "stress STOP", 513 zap.String("stress-type", ls.stype.String()), 514 zap.String("endpoint", ls.m.EtcdClientEndpoint), 515 ) 516 return nil 517 } 518 519 func (ls *leaseStresser) ModifiedKeys() int64 { 520 return atomic.LoadInt64(&ls.atomicModifiedKey) 521 }