github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/lease/manager_async_test.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package lease_test 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/clock/testclock" 11 "github.com/juju/errors" 12 "github.com/juju/loggo" 13 "github.com/juju/testing" 14 jc "github.com/juju/testing/checkers" 15 "github.com/juju/worker/v3/workertest" 16 gc "gopkg.in/check.v1" 17 18 corelease "github.com/juju/juju/core/lease" 19 coretesting "github.com/juju/juju/testing" 20 "github.com/juju/juju/worker/lease" 21 ) 22 23 type leaseMap = map[corelease.Key]corelease.Info 24 25 // AsyncSuite checks that expiries and claims that block don't prevent 26 // subsequent updates. 27 type AsyncSuite struct { 28 testing.IsolationSuite 29 } 30 31 var _ = gc.Suite(&AsyncSuite{}) 32 33 func (s *AsyncSuite) SetUpTest(c *gc.C) { 34 s.IsolationSuite.SetUpTest(c) 35 logger := loggo.GetLogger("juju.worker.lease") 36 logger.SetLogLevel(loggo.TRACE) 37 logger = loggo.GetLogger("lease_test") 38 logger.SetLogLevel(loggo.TRACE) 39 } 40 41 func (s *AsyncSuite) TestRevokeTimeout(c *gc.C) { 42 // When a timeout happens on revoke we retry. 43 revokeCalls := make(chan struct{}) 44 fix := Fixture{ 45 leases: leaseMap{ 46 key("requiem"): { 47 Holder: "verdi", 48 Expiry: offset(time.Minute), 49 }, 50 }, 51 expectCalls: []call{{ 52 method: "RevokeLease", 53 args: []interface{}{key("requiem"), "verdi"}, 54 err: corelease.ErrTimeout, 55 callback: func(_ leaseMap) { 56 select { 57 case revokeCalls <- struct{}{}: 58 case <-time.After(coretesting.LongWait): 59 c.Errorf("timed out sending revoke") 60 } 61 }, 62 }, { 63 method: "RevokeLease", 64 args: []interface{}{key("requiem"), "verdi"}, 65 callback: func(leases leaseMap) { 66 delete(leases, key("requiem")) 67 close(revokeCalls) 68 }, 69 }}, 70 } 71 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 72 revoker, err := manager.Revoker("namespace", "modelUUID") 73 c.Assert(err, jc.ErrorIsNil) 74 75 result := make(chan error) 76 go func() { 77 result <- revoker.Revoke("requiem", "verdi") 78 }() 79 80 select { 81 case <-revokeCalls: 82 case <-time.After(coretesting.LongWait): 83 c.Fatalf("timed out waiting for revoke") 84 } 85 86 // Two waiters: 87 // - one is the nextTick timer, set for 1 minute in the future 88 // - two is the claim retry timer 89 err = clock.WaitAdvance(50*time.Millisecond, coretesting.LongWait, 2) 90 c.Assert(err, jc.ErrorIsNil) 91 92 select { 93 case err := <-result: 94 c.Assert(err, jc.ErrorIsNil) 95 case <-time.After(coretesting.LongWait): 96 c.Fatalf("timed out waiting for response") 97 } 98 }) 99 } 100 101 func (s *AsyncSuite) TestRevokeRepeatedTimeout(c *gc.C) { 102 // When a timeout happens on revoke we retry - if we hit the retry 103 // limit we should kill the manager. 104 revokeCalls := make(chan struct{}) 105 106 var calls []call 107 for i := 0; i < lease.MaxRetries; i++ { 108 calls = append(calls, call{ 109 method: "RevokeLease", 110 args: []interface{}{key("requiem"), "verdi"}, 111 err: corelease.ErrTimeout, 112 callback: func(_ leaseMap) { 113 select { 114 case revokeCalls <- struct{}{}: 115 case <-time.After(coretesting.LongWait): 116 c.Errorf("timed out sending revoke") 117 } 118 }, 119 }) 120 } 121 fix := Fixture{ 122 leases: leaseMap{ 123 key("requiem"): { 124 Holder: "verdi", 125 Expiry: offset(time.Minute), 126 }, 127 }, 128 expectCalls: calls, 129 expectDirty: true, 130 } 131 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 132 result := make(chan error) 133 revoker, err := manager.Revoker("namespace", "modelUUID") 134 c.Assert(err, jc.ErrorIsNil) 135 go func() { 136 result <- revoker.Revoke("requiem", "verdi") 137 }() 138 139 duration := lease.InitialRetryDelay 140 for i := 0; i < lease.MaxRetries-1; i++ { 141 c.Logf("retry %d", i) 142 select { 143 case <-revokeCalls: 144 case <-result: 145 c.Fatalf("got result too soon") 146 case <-time.After(coretesting.LongWait): 147 c.Fatalf("timed out waiting for revoke call") 148 } 149 150 // There should be 2 waiters: 151 // - nextTick has a timer once things expire 152 // - retryingClaim has an attempt timer 153 c.Assert(clock.WaitAdvance(duration, coretesting.LongWait, 2), jc.ErrorIsNil) 154 duration = time.Duration(float64(duration)*lease.RetryBackoffFactor + 1) 155 } 156 157 select { 158 case <-revokeCalls: 159 case <-time.After(coretesting.LongWait): 160 c.Fatalf("timed out waiting for final revoke call") 161 } 162 163 select { 164 case err := <-result: 165 c.Assert(errors.Cause(err), gc.Equals, corelease.ErrTimeout) 166 case <-time.After(coretesting.LongWait): 167 c.Fatalf("timed out waiting for result") 168 } 169 170 workertest.CheckAlive(c, manager) 171 }) 172 } 173 174 func (s *AsyncSuite) TestClaimSlow(c *gc.C) { 175 slowStarted := make(chan struct{}) 176 slowFinish := make(chan struct{}) 177 178 fix := Fixture{ 179 leases: leaseMap{ 180 key("dmdc"): { 181 Holder: "terry", 182 Expiry: offset(time.Second), 183 }, 184 }, 185 expectCalls: []call{{ 186 method: "ExtendLease", 187 args: []interface{}{ 188 key("dmdc"), 189 corelease.Request{"terry", time.Minute}, 190 }, 191 err: corelease.ErrInvalid, 192 parallelCallback: func(mu *sync.Mutex, leases leaseMap) { 193 select { 194 case slowStarted <- struct{}{}: 195 case <-time.After(coretesting.LongWait): 196 c.Errorf("timed out sending slowStarted") 197 } 198 select { 199 case <-slowFinish: 200 case <-time.After(coretesting.LongWait): 201 c.Errorf("timed out waiting for slowFinish") 202 } 203 mu.Lock() 204 leases[key("dmdc")] = corelease.Info{ 205 Holder: "lance", 206 Expiry: offset(time.Minute), 207 } 208 mu.Unlock() 209 }, 210 }, { 211 method: "ClaimLease", 212 args: []interface{}{ 213 key("antiquisearchers"), 214 corelease.Request{"art", time.Minute}, 215 }, 216 callback: func(leases leaseMap) { 217 leases[key("antiquisearchers")] = corelease.Info{ 218 Holder: "art", 219 Expiry: offset(time.Minute), 220 } 221 }, 222 }}, 223 } 224 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 225 claimer, err := manager.Claimer("namespace", "modelUUID") 226 c.Assert(err, jc.ErrorIsNil) 227 228 response1 := make(chan error) 229 go func() { 230 response1 <- claimer.Claim("dmdc", "terry", time.Minute) 231 }() 232 233 select { 234 case <-slowStarted: 235 case <-time.After(coretesting.LongWait): 236 c.Fatalf("timed out waiting for slowStarted") 237 } 238 response2 := make(chan error) 239 go func() { 240 response2 <- claimer.Claim("antiquisearchers", "art", time.Minute) 241 }() 242 243 // response1 should have failed its claim, and now be waiting to retry 244 // only 1 waiter, which is the 'when should we expire next' timer. 245 c.Assert(clock.WaitAdvance(50*time.Millisecond, testing.LongWait, 1), jc.ErrorIsNil) 246 247 // We should be able to get the response for the second claim 248 // even though the first hasn't come back yet. 249 select { 250 case err := <-response2: 251 c.Assert(err, jc.ErrorIsNil) 252 case <-response1: 253 c.Fatalf("response1 was ready") 254 case <-time.After(coretesting.LongWait): 255 c.Fatalf("timed out waiting for response2") 256 } 257 258 close(slowFinish) 259 260 c.Assert(clock.WaitAdvance(50*time.Millisecond, testing.LongWait, 1), jc.ErrorIsNil) 261 262 // Now response1 should come back. 263 select { 264 case err := <-response1: 265 c.Assert(errors.Cause(err), gc.Equals, corelease.ErrClaimDenied) 266 case <-time.After(coretesting.LongWait): 267 c.Fatalf("timed out waiting for response1") 268 } 269 }) 270 } 271 272 func (s *AsyncSuite) TestClaimTwoErrors(c *gc.C) { 273 oneStarted := make(chan struct{}) 274 oneFinish := make(chan struct{}) 275 twoStarted := make(chan struct{}) 276 twoFinish := make(chan struct{}) 277 278 fix := Fixture{ 279 expectDirty: true, 280 expectCalls: []call{{ 281 method: "ClaimLease", 282 args: []interface{}{ 283 key("one"), 284 corelease.Request{"terry", time.Minute}, 285 }, 286 err: errors.New("terry is bad"), 287 parallelCallback: func(mu *sync.Mutex, leases leaseMap) { 288 close(oneStarted) 289 select { 290 case <-oneFinish: 291 case <-time.After(coretesting.LongWait): 292 c.Errorf("timed out waiting for oneFinish") 293 } 294 }, 295 }, { 296 method: "ClaimLease", 297 args: []interface{}{ 298 key("two"), 299 corelease.Request{"lance", time.Minute}, 300 }, 301 err: errors.New("lance is also bad"), 302 parallelCallback: func(mu *sync.Mutex, leases leaseMap) { 303 close(twoStarted) 304 select { 305 case <-twoFinish: 306 case <-time.After(coretesting.LongWait): 307 c.Errorf("timed out waiting for twoFinish") 308 } 309 }, 310 }}, 311 } 312 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 313 claimer, err := manager.Claimer("namespace", "modelUUID") 314 c.Assert(err, jc.ErrorIsNil) 315 316 response1 := make(chan error) 317 go func() { 318 response1 <- claimer.Claim("one", "terry", time.Minute) 319 }() 320 select { 321 case <-oneStarted: 322 case <-time.After(coretesting.LongWait): 323 c.Fatalf("timed out waiting for oneStarted") 324 } 325 326 response2 := make(chan error) 327 go func() { 328 response2 <- claimer.Claim("two", "lance", time.Minute) 329 }() 330 331 select { 332 case <-twoStarted: 333 case <-time.After(coretesting.LongWait): 334 c.Fatalf("timed out waiting for twoStarted") 335 } 336 337 // By now, both of the claims have had their processing started 338 // by the store, so the lease manager will have two elements 339 // in the wait group. 340 close(oneFinish) 341 // We should be able to get error responses from both of them. 342 select { 343 case err1 := <-response1: 344 c.Check(err1, gc.ErrorMatches, "lease manager stopped") 345 case <-time.After(coretesting.LongWait): 346 c.Fatalf("timed out waiting for response2") 347 } 348 349 close(twoFinish) 350 select { 351 case err2 := <-response2: 352 c.Check(err2, gc.ErrorMatches, "lease manager stopped") 353 case <-time.After(coretesting.LongWait): 354 c.Fatalf("timed out waiting for response2") 355 } 356 357 // Since we unblock one before two, we know the error from 358 // the manager is bad terry 359 err = workertest.CheckKilled(c, manager) 360 c.Assert(err, gc.ErrorMatches, "terry is bad") 361 }) 362 } 363 364 func (s *AsyncSuite) TestClaimTimeout(c *gc.C) { 365 // When a claim times out we retry. 366 claimCalls := make(chan struct{}) 367 fix := Fixture{ 368 expectCalls: []call{{ 369 method: "ClaimLease", 370 args: []interface{}{ 371 key("icecream"), 372 corelease.Request{"rosie", time.Minute}, 373 }, 374 err: corelease.ErrTimeout, 375 callback: func(_ leaseMap) { 376 select { 377 case claimCalls <- struct{}{}: 378 case <-time.After(coretesting.LongWait): 379 c.Fatalf("timed out sending claim") 380 } 381 }, 382 }, { 383 method: "ClaimLease", 384 args: []interface{}{ 385 key("icecream"), 386 corelease.Request{"rosie", time.Minute}, 387 }, 388 callback: func(leases leaseMap) { 389 leases[key("icecream")] = corelease.Info{ 390 Holder: "rosie", 391 Expiry: offset(time.Minute), 392 } 393 }, 394 }}, 395 } 396 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 397 result := make(chan error) 398 claimer, err := manager.Claimer("namespace", "modelUUID") 399 c.Assert(err, jc.ErrorIsNil) 400 go func() { 401 result <- claimer.Claim("icecream", "rosie", time.Minute) 402 }() 403 404 select { 405 case <-claimCalls: 406 case <-time.After(coretesting.LongWait): 407 c.Fatalf("timed out waiting for claim") 408 } 409 410 // Two waiters: 411 // - one is the nextTick timer, set for 1 minute in the future 412 // - two is the claim retry timer 413 err = clock.WaitAdvance(50*time.Millisecond, coretesting.LongWait, 2) 414 c.Assert(err, jc.ErrorIsNil) 415 416 select { 417 case err := <-result: 418 c.Assert(err, jc.ErrorIsNil) 419 case <-time.After(coretesting.LongWait): 420 c.Fatalf("timed out waiting for response") 421 } 422 }) 423 } 424 425 func (s *AsyncSuite) TestClaimNoticesEarlyExpiry(c *gc.C) { 426 fix := Fixture{ 427 leases: leaseMap{ 428 key("dmdc"): { 429 Holder: "terry", 430 Expiry: offset(10 * time.Minute), 431 }, 432 }, 433 expectCalls: []call{{ 434 method: "ClaimLease", 435 args: []interface{}{ 436 key("icecream"), 437 corelease.Request{"rosie", time.Minute}, 438 }, 439 callback: func(leases leaseMap) { 440 leases[key("icecream")] = corelease.Info{ 441 Holder: "rosie", 442 Expiry: offset(time.Minute), 443 } 444 }, 445 }, { 446 method: "ClaimLease", 447 args: []interface{}{ 448 key("fudge"), 449 corelease.Request{"chocolate", time.Minute}, 450 }, 451 callback: func(leases leaseMap) { 452 leases[key("fudge")] = corelease.Info{ 453 Holder: "chocolate", 454 Expiry: offset(2 * time.Minute), 455 } 456 }, 457 }}, 458 } 459 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 460 // When we first start, we should not yet expire because the 461 // Expiry should be 10 minutes into the future. But the first claim 462 // will create an entry that expires in only 1 minute, so we should 463 // reset our expire timeout 464 claimer, err := manager.Claimer("namespace", "modelUUID") 465 c.Assert(err, jc.ErrorIsNil) 466 err = claimer.Claim("icecream", "rosie", time.Minute) 467 c.Assert(err, jc.ErrorIsNil) 468 // We sleep for 30s which *shouldn't* trigger any Expiry. And then we get 469 // another claim that also wants 1 minute duration. But that should not cause the 470 // timer to wake up in 1minute, but the 30s that are remaining. 471 c.Assert(clock.WaitAdvance(30*time.Second, testing.LongWait, 1), jc.ErrorIsNil) 472 // The second claim tries to set a timeout of another minute, but that should 473 // not cause the timer to get reset any later than it already is. 474 // Chocolate is also given a slightly longer timeout (2min after epoch) 475 err = claimer.Claim("fudge", "chocolate", time.Minute) 476 c.Assert(err, jc.ErrorIsNil) 477 // Now when we advance the clock another 30s, it should wake up and 478 // expire "icecream", and then queue up that we should expire "fudge" 479 // 1m later 480 c.Assert(clock.WaitAdvance(30*time.Second, testing.LongWait, 1), jc.ErrorIsNil) 481 }) 482 } 483 484 func (s *AsyncSuite) TestClaimRepeatedTimeout(c *gc.C) { 485 // When a claim times out too many times we give up. 486 claimCalls := make(chan struct{}) 487 var calls []call 488 for i := 0; i < lease.MaxRetries; i++ { 489 calls = append(calls, call{ 490 method: "ClaimLease", 491 args: []interface{}{ 492 key("icecream"), 493 corelease.Request{"rosie", time.Minute}, 494 }, 495 err: corelease.ErrTimeout, 496 callback: func(_ leaseMap) { 497 select { 498 case claimCalls <- struct{}{}: 499 case <-time.After(coretesting.LongWait): 500 c.Fatalf("timed out sending claim") 501 } 502 }, 503 }) 504 } 505 fix := Fixture{ 506 expectCalls: calls, 507 expectDirty: true, 508 } 509 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 510 result := make(chan error) 511 claimer, err := manager.Claimer("namespace", "modelUUID") 512 c.Assert(err, jc.ErrorIsNil) 513 go func() { 514 result <- claimer.Claim("icecream", "rosie", time.Minute) 515 }() 516 517 duration := lease.InitialRetryDelay 518 for i := 0; i < lease.MaxRetries-1; i++ { 519 c.Logf("retry %d", i) 520 select { 521 case <-claimCalls: 522 case <-result: 523 c.Fatalf("got result too soon") 524 case <-time.After(coretesting.LongWait): 525 c.Fatalf("timed out waiting for claim call") 526 } 527 528 // There should be 2 waiters: 529 // - nextTick has a timer once things expire 530 // - retryingClaim has an attempt timer 531 c.Assert(clock.WaitAdvance(duration, coretesting.LongWait, 2), jc.ErrorIsNil) 532 duration = time.Duration(float64(duration)*lease.RetryBackoffFactor + 1) 533 } 534 535 select { 536 case <-claimCalls: 537 case <-time.After(coretesting.LongWait): 538 c.Fatalf("timed out waiting for final claim call") 539 } 540 541 select { 542 case err := <-result: 543 c.Assert(errors.Cause(err), gc.Equals, corelease.ErrTimeout) 544 case <-time.After(coretesting.LongWait): 545 c.Fatalf("timed out waiting for result") 546 } 547 548 workertest.CheckAlive(c, manager) 549 }) 550 } 551 552 func (s *AsyncSuite) TestClaimRepeatedInvalid(c *gc.C) { 553 // When a claim is invalid for too long, we give up 554 claimCalls := make(chan struct{}) 555 var calls []call 556 for i := 0; i < lease.MaxRetries; i++ { 557 calls = append(calls, call{ 558 method: "ClaimLease", 559 args: []interface{}{ 560 key("icecream"), 561 corelease.Request{"rosie", time.Minute}, 562 }, 563 err: corelease.ErrInvalid, 564 callback: func(_ leaseMap) { 565 select { 566 case claimCalls <- struct{}{}: 567 case <-time.After(coretesting.LongWait): 568 c.Fatalf("timed out sending claim") 569 } 570 }, 571 }) 572 } 573 fix := Fixture{ 574 expectCalls: calls, 575 expectDirty: true, 576 } 577 fix.RunTest(c, func(manager *lease.Manager, clock *testclock.Clock) { 578 result := make(chan error) 579 claimer, err := manager.Claimer("namespace", "modelUUID") 580 c.Assert(err, jc.ErrorIsNil) 581 go func() { 582 result <- claimer.Claim("icecream", "rosie", time.Minute) 583 }() 584 585 duration := lease.InitialRetryDelay 586 for i := 0; i < lease.MaxRetries-1; i++ { 587 c.Logf("retry %d", i) 588 select { 589 case <-claimCalls: 590 case <-result: 591 c.Fatalf("got result too soon") 592 case <-time.After(coretesting.LongWait): 593 c.Fatalf("timed out waiting for claim call") 594 } 595 596 // There should be 2 waiters: 597 // - nextTick has a timer once things expire 598 // - retryingClaim has an attempt timer 599 c.Assert(clock.WaitAdvance(duration, coretesting.LongWait, 2), jc.ErrorIsNil) 600 duration = time.Duration(float64(duration)*lease.RetryBackoffFactor + 1) 601 } 602 603 select { 604 case <-claimCalls: 605 case <-time.After(coretesting.LongWait): 606 c.Fatalf("timed out waiting for final claim call") 607 } 608 609 select { 610 case err := <-result: 611 c.Assert(errors.Cause(err), gc.Equals, corelease.ErrClaimDenied) 612 case <-time.After(coretesting.LongWait): 613 c.Fatalf("timed out waiting for result") 614 } 615 616 workertest.CheckAlive(c, manager) 617 }) 618 } 619 620 func (s *AsyncSuite) TestWaitsForGoroutines(c *gc.C) { 621 // The manager should wait for all of its child expire and claim 622 // goroutines to be finished before it stops. 623 claimStarted := make(chan struct{}) 624 claimFinish := make(chan struct{}) 625 fix := Fixture{ 626 leases: leaseMap{ 627 key("legacy"): { 628 Holder: "culprate", 629 Expiry: offset(-time.Second), 630 }, 631 }, 632 expectCalls: []call{{ 633 method: "ClaimLease", 634 args: []interface{}{ 635 key("blooadoath"), 636 corelease.Request{"hand", time.Minute}, 637 }, 638 parallelCallback: func(_ *sync.Mutex, _ leaseMap) { 639 close(claimStarted) 640 <-claimFinish 641 }, 642 }}, 643 } 644 fix.RunTest(c, func(manager *lease.Manager, _ *testclock.Clock) { 645 646 result := make(chan error) 647 claimer, err := manager.Claimer("namespace", "modelUUID") 648 c.Assert(err, jc.ErrorIsNil) 649 go func() { 650 result <- claimer.Claim("blooadoath", "hand", time.Minute) 651 }() 652 653 // Ensure we've called claim in the store and are waiting for 654 // a response. 655 select { 656 case <-claimStarted: 657 case <-time.After(coretesting.LongWait): 658 c.Fatalf("timed out waiting for claim start") 659 } 660 661 // If we kill the manager now it won't finish until the claim 662 // call finishes (no worries about timeouts because we aren't 663 // advancing the test clock). 664 manager.Kill() 665 workertest.CheckAlive(c, manager) 666 667 // Now if we finish the claim, the result comes back. 668 close(claimFinish) 669 670 select { 671 case err := <-result: 672 c.Assert(err, gc.ErrorMatches, "lease manager stopped") 673 case <-time.After(coretesting.LongWait): 674 c.Fatalf("timed out waiting for result") 675 } 676 677 err = workertest.CheckKilled(c, manager) 678 c.Assert(err, jc.ErrorIsNil) 679 }) 680 }