github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/worker/runner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package worker_test 5 6 import ( 7 "fmt" 8 "sort" 9 "sync/atomic" 10 "time" 11 12 "github.com/juju/errors" 13 gitjujutesting "github.com/juju/testing" 14 jc "github.com/juju/testing/checkers" 15 gc "gopkg.in/check.v1" 16 "launchpad.net/tomb" 17 18 "github.com/juju/juju/testing" 19 "github.com/juju/juju/worker" 20 workertesting "github.com/juju/juju/worker/testing" 21 ) 22 23 var ( 24 _ = gc.Suite(&runnerSuite{}) 25 _ = gc.Suite(&workersSuite{}) 26 ) 27 28 type runnerSuite struct { 29 testing.BaseSuite 30 } 31 32 func noneFatal(error) bool { 33 return false 34 } 35 36 func allFatal(error) bool { 37 return true 38 } 39 40 func noImportance(err0, err1 error) bool { 41 return false 42 } 43 44 func (*runnerSuite) TestOneWorkerStart(c *gc.C) { 45 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 46 starter := newTestWorkerStarter() 47 err := runner.StartWorker("id", testWorkerStart(starter)) 48 c.Assert(err, jc.ErrorIsNil) 49 starter.assertStarted(c, true) 50 51 c.Assert(worker.Stop(runner), gc.IsNil) 52 starter.assertStarted(c, false) 53 } 54 55 func (*runnerSuite) TestOneWorkerFinish(c *gc.C) { 56 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 57 starter := newTestWorkerStarter() 58 err := runner.StartWorker("id", testWorkerStart(starter)) 59 c.Assert(err, jc.ErrorIsNil) 60 starter.assertStarted(c, true) 61 62 starter.die <- nil 63 starter.assertStarted(c, false) 64 starter.assertNeverStarted(c) 65 66 c.Assert(worker.Stop(runner), gc.IsNil) 67 } 68 69 func (*runnerSuite) TestOneWorkerRestart(c *gc.C) { 70 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 71 starter := newTestWorkerStarter() 72 err := runner.StartWorker("id", testWorkerStart(starter)) 73 c.Assert(err, jc.ErrorIsNil) 74 starter.assertStarted(c, true) 75 76 // Check it restarts a few times time. 77 for i := 0; i < 3; i++ { 78 starter.die <- fmt.Errorf("an error") 79 starter.assertStarted(c, false) 80 starter.assertStarted(c, true) 81 } 82 83 c.Assert(worker.Stop(runner), gc.IsNil) 84 starter.assertStarted(c, false) 85 } 86 87 func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) { 88 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 89 starter := newTestWorkerStarter() 90 starter.startErr = errors.New("cannot start test task") 91 err := runner.StartWorker("id", testWorkerStart(starter)) 92 c.Assert(err, jc.ErrorIsNil) 93 err = runner.Wait() 94 c.Assert(err, gc.Equals, starter.startErr) 95 } 96 97 func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) { 98 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 99 starter := newTestWorkerStarter() 100 err := runner.StartWorker("id", testWorkerStart(starter)) 101 c.Assert(err, jc.ErrorIsNil) 102 starter.assertStarted(c, true) 103 dieErr := errors.New("error when running") 104 starter.die <- dieErr 105 err = runner.Wait() 106 c.Assert(err, gc.Equals, dieErr) 107 starter.assertStarted(c, false) 108 } 109 110 func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) { 111 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 112 starter := newTestWorkerStarter() 113 err := runner.StartWorker("id", testWorkerStart(starter)) 114 c.Assert(err, jc.ErrorIsNil) 115 starter.assertStarted(c, true) 116 err = runner.StopWorker("id") 117 c.Assert(err, jc.ErrorIsNil) 118 starter.assertStarted(c, false) 119 c.Assert(worker.Stop(runner), gc.IsNil) 120 } 121 122 func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) { 123 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 124 starter := newTestWorkerStarter() 125 starter.stopErr = errors.New("stop error") 126 err := runner.StartWorker("id", testWorkerStart(starter)) 127 c.Assert(err, jc.ErrorIsNil) 128 starter.assertStarted(c, true) 129 err = runner.StopWorker("id") 130 c.Assert(err, jc.ErrorIsNil) 131 err = runner.Wait() 132 c.Assert(err, gc.Equals, starter.stopErr) 133 } 134 135 func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { 136 runner := worker.NewRunner(allFatal, noImportance, 3*time.Second) 137 starter := newTestWorkerStarter() 138 starter.stopWait = make(chan struct{}) 139 140 err := runner.StartWorker("id", testWorkerStart(starter)) 141 c.Assert(err, jc.ErrorIsNil) 142 starter.assertStarted(c, true) 143 err = runner.StopWorker("id") 144 c.Assert(err, jc.ErrorIsNil) 145 err = runner.StartWorker("id", testWorkerStart(starter)) 146 c.Assert(err, jc.ErrorIsNil) 147 148 close(starter.stopWait) 149 starter.assertStarted(c, false) 150 // Check that the task is restarted immediately without 151 // the usual restart timeout delay. 152 t0 := time.Now() 153 starter.assertStarted(c, true) 154 restartDuration := time.Since(t0) 155 if restartDuration > 1*time.Second { 156 c.Fatalf("task did not restart immediately") 157 } 158 c.Assert(worker.Stop(runner), gc.IsNil) 159 } 160 161 func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) { 162 const delay = 100 * time.Millisecond 163 runner := worker.NewRunner(noneFatal, noImportance, delay) 164 starter := newTestWorkerStarter() 165 err := runner.StartWorker("id", testWorkerStart(starter)) 166 c.Assert(err, jc.ErrorIsNil) 167 starter.assertStarted(c, true) 168 starter.die <- fmt.Errorf("non-fatal error") 169 starter.assertStarted(c, false) 170 t0 := time.Now() 171 starter.assertStarted(c, true) 172 restartDuration := time.Since(t0) 173 if restartDuration < delay { 174 c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, delay) 175 } 176 c.Assert(worker.Stop(runner), gc.IsNil) 177 } 178 179 type errorLevel int 180 181 func (e errorLevel) Error() string { 182 return fmt.Sprintf("error with importance %d", e) 183 } 184 185 func (*runnerSuite) TestErrorImportance(c *gc.C) { 186 moreImportant := func(err0, err1 error) bool { 187 return err0.(errorLevel) > err1.(errorLevel) 188 } 189 id := func(i int) string { return fmt.Sprint(i) } 190 runner := worker.NewRunner(allFatal, moreImportant, time.Millisecond) 191 for i := 0; i < 10; i++ { 192 starter := newTestWorkerStarter() 193 starter.stopErr = errorLevel(i) 194 err := runner.StartWorker(id(i), testWorkerStart(starter)) 195 c.Assert(err, jc.ErrorIsNil) 196 } 197 err := runner.StopWorker(id(4)) 198 c.Assert(err, jc.ErrorIsNil) 199 err = runner.Wait() 200 c.Assert(err, gc.Equals, errorLevel(9)) 201 } 202 203 func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) { 204 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 205 c.Assert(worker.Stop(runner), gc.IsNil) 206 c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead) 207 } 208 209 func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) { 210 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 211 c.Assert(worker.Stop(runner), gc.IsNil) 212 c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead) 213 } 214 215 func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) { 216 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 217 var starters []*testWorkerStarter 218 for i := 0; i < 10; i++ { 219 starter := newTestWorkerStarter() 220 err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter)) 221 c.Assert(err, jc.ErrorIsNil) 222 starters = append(starters, starter) 223 } 224 for _, starter := range starters { 225 starter.assertStarted(c, true) 226 } 227 dieErr := errors.New("fatal error") 228 starters[4].die <- dieErr 229 err := runner.Wait() 230 c.Assert(err, gc.Equals, dieErr) 231 for _, starter := range starters { 232 starter.assertStarted(c, false) 233 } 234 } 235 236 func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) { 237 // Original deadlock problem that this tests for: 238 // A worker dies with fatal error while another worker 239 // is inside start(). runWorker can't send startInfo on startedc. 240 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 241 242 slowStarter := newTestWorkerStarter() 243 // make the startNotify channel synchronous so 244 // we can delay the start indefinitely. 245 slowStarter.startNotify = make(chan bool) 246 247 err := runner.StartWorker("slow starter", testWorkerStart(slowStarter)) 248 c.Assert(err, jc.ErrorIsNil) 249 250 fatalStarter := newTestWorkerStarter() 251 fatalStarter.startErr = fmt.Errorf("a fatal error") 252 253 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 254 c.Assert(err, jc.ErrorIsNil) 255 256 // Wait for the runner loop to react to the fatal 257 // error and go into final shutdown mode. 258 time.Sleep(10 * time.Millisecond) 259 260 // At this point, the loop is in shutdown mode, but the 261 // slowStarter's worker is still in its start function. 262 // When the start function continues (the first assertStarted 263 // allows that to happen) and returns the new Worker, 264 // runWorker will try to send it on runner.startedc. 265 // This test makes sure that succeeds ok. 266 267 slowStarter.assertStarted(c, true) 268 slowStarter.assertStarted(c, false) 269 err = runner.Wait() 270 c.Assert(err, gc.Equals, fatalStarter.startErr) 271 } 272 273 func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) { 274 // Original deadlock problem that this tests for: 275 // A worker tries to call StartWorker in its start function 276 // at the same time another worker dies with a fatal error. 277 // It might not be able to send on startc. 278 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 279 280 selfStarter := newTestWorkerStarter() 281 // make the startNotify channel synchronous so 282 // we can delay the start indefinitely. 283 selfStarter.startNotify = make(chan bool) 284 selfStarter.hook = func() { 285 runner.StartWorker("another", func() (worker.Worker, error) { 286 return nil, fmt.Errorf("no worker started") 287 }) 288 } 289 err := runner.StartWorker("self starter", testWorkerStart(selfStarter)) 290 c.Assert(err, jc.ErrorIsNil) 291 292 fatalStarter := newTestWorkerStarter() 293 fatalStarter.startErr = fmt.Errorf("a fatal error") 294 295 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 296 c.Assert(err, jc.ErrorIsNil) 297 298 // Wait for the runner loop to react to the fatal 299 // error and go into final shutdown mode. 300 time.Sleep(10 * time.Millisecond) 301 302 // At this point, the loop is in shutdown mode, but the 303 // selfStarter's worker is still in its start function. 304 // When the start function continues (the first assertStarted 305 // allows that to happen) it will try to create a new 306 // worker. This failed in an earlier version of the code because the 307 // loop was not ready to receive start requests. 308 309 selfStarter.assertStarted(c, true) 310 selfStarter.assertStarted(c, false) 311 err = runner.Wait() 312 c.Assert(err, gc.Equals, fatalStarter.startErr) 313 } 314 315 type testWorkerStarter struct { 316 startCount int32 317 318 // startNotify receives true when the worker starts 319 // and false when it exits. If startErr is non-nil, 320 // it sends false only. 321 startNotify chan bool 322 323 // If stopWait is non-nil, the worker will 324 // wait for a value to be sent on it before 325 // exiting. 326 stopWait chan struct{} 327 328 // Sending a value on die causes the worker 329 // to die with the given error. 330 die chan error 331 332 // If startErr is non-nil, the worker will die immediately 333 // with this error after starting. 334 startErr error 335 336 // If stopErr is non-nil, the worker will die with this 337 // error when asked to stop. 338 stopErr error 339 340 // The hook function is called after starting the worker. 341 hook func() 342 } 343 344 func newTestWorkerStarter() *testWorkerStarter { 345 return &testWorkerStarter{ 346 die: make(chan error, 1), 347 startNotify: make(chan bool, 100), 348 hook: func() {}, 349 } 350 } 351 352 func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) { 353 select { 354 case isStarted := <-starter.startNotify: 355 c.Assert(isStarted, gc.Equals, started) 356 case <-time.After(1 * time.Second): 357 c.Fatalf("timed out waiting for start notification") 358 } 359 } 360 361 func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) { 362 select { 363 case isStarted := <-starter.startNotify: 364 c.Fatalf("got unexpected start notification: %v", isStarted) 365 case <-time.After(worker.RestartDelay + testing.ShortWait): 366 } 367 } 368 369 func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) { 370 return func() (worker.Worker, error) { 371 return starter.start() 372 } 373 } 374 375 func (starter *testWorkerStarter) start() (worker.Worker, error) { 376 if count := atomic.AddInt32(&starter.startCount, 1); count != 1 { 377 panic(fmt.Errorf("unexpected start count %d; expected 1", count)) 378 } 379 if starter.startErr != nil { 380 starter.startNotify <- false 381 return nil, starter.startErr 382 } 383 task := &testWorker{ 384 starter: starter, 385 } 386 starter.startNotify <- true 387 go task.run() 388 return task, nil 389 } 390 391 type testWorker struct { 392 starter *testWorkerStarter 393 tomb tomb.Tomb 394 } 395 396 func (t *testWorker) Kill() { 397 t.tomb.Kill(nil) 398 } 399 400 func (t *testWorker) Wait() error { 401 return t.tomb.Wait() 402 } 403 404 func (t *testWorker) run() { 405 defer t.tomb.Done() 406 407 t.starter.hook() 408 select { 409 case <-t.tomb.Dying(): 410 t.tomb.Kill(t.starter.stopErr) 411 case err := <-t.starter.die: 412 t.tomb.Kill(err) 413 } 414 if t.starter.stopWait != nil { 415 <-t.starter.stopWait 416 } 417 t.starter.startNotify <- false 418 if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 { 419 panic(fmt.Errorf("unexpected start count %d; expected 0", count)) 420 } 421 } 422 423 type workersSuite struct { 424 testing.BaseSuite 425 426 calls []string 427 stub *gitjujutesting.Stub 428 } 429 430 func (s *workersSuite) SetUpTest(c *gc.C) { 431 s.BaseSuite.SetUpTest(c) 432 433 s.stub = &gitjujutesting.Stub{} 434 s.calls = nil 435 } 436 437 func (s *workersSuite) newWorkerFunc(id string) func() (worker.Worker, error) { 438 return func() (worker.Worker, error) { 439 s.calls = append(s.calls, id) 440 return nil, nil 441 } 442 } 443 444 func (*workersSuite) TestIDsOkay(c *gc.C) { 445 newWorker := func() (worker.Worker, error) { return nil, nil } 446 447 workers := worker.NewWorkers() 448 err := workers.Add("spam", newWorker) 449 c.Assert(err, jc.ErrorIsNil) 450 err = workers.Add("eggs", newWorker) 451 c.Assert(err, jc.ErrorIsNil) 452 ids := workers.IDs() 453 454 c.Check(ids, jc.DeepEquals, []string{"spam", "eggs"}) 455 } 456 457 func (*workersSuite) TestIDsEmpty(c *gc.C) { 458 workers := worker.NewWorkers() 459 ids := workers.IDs() 460 461 c.Check(ids, gc.HasLen, 0) 462 } 463 464 func (*workersSuite) TestAddAlreadyRegistered(c *gc.C) { 465 newWorker := func() (worker.Worker, error) { return nil, nil } 466 467 workers := worker.NewWorkers() 468 err := workers.Add("spam", newWorker) 469 c.Assert(err, jc.ErrorIsNil) 470 err = workers.Add("spam", newWorker) 471 472 c.Check(err, gc.ErrorMatches, `.*already registered.*`) 473 } 474 475 func (s *workersSuite) TestStartOkay(c *gc.C) { 476 runner := workertesting.NewStubRunner(s.stub) 477 runner.CallWhenStarted = true 478 479 workers := worker.NewWorkers() 480 expected := []string{"spam", "eggs", "ham"} 481 for _, id := range expected { 482 err := workers.Add(id, s.newWorkerFunc(id)) 483 c.Assert(err, jc.ErrorIsNil) 484 } 485 err := workers.Start(runner) 486 c.Assert(err, jc.ErrorIsNil) 487 488 // We would use s.stub.CheckCalls if functions could be compared... 489 runner.CheckCallIDs(c, "StartWorker", expected...) 490 sort.Strings(s.calls) 491 sort.Strings(expected) 492 c.Check(s.calls, jc.DeepEquals, expected) 493 } 494 495 func (s *workersSuite) TestStartError(c *gc.C) { 496 runner := workertesting.NewStubRunner(s.stub) 497 failure := errors.Errorf("<failed>") 498 s.stub.SetErrors(nil, failure) 499 500 workers := worker.NewWorkers() 501 expected := []string{"spam", "eggs", "ham"} 502 for _, id := range expected { 503 err := workers.Add(id, s.newWorkerFunc(id)) 504 c.Assert(err, jc.ErrorIsNil) 505 } 506 err := workers.Start(runner) 507 508 s.stub.CheckCallNames(c, "StartWorker", "StartWorker") 509 c.Check(errors.Cause(err), gc.Equals, failure) 510 }