github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/runner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package worker_test 5 6 import ( 7 "fmt" 8 "sync/atomic" 9 "time" 10 11 "github.com/juju/errors" 12 jc "github.com/juju/testing/checkers" 13 gc "gopkg.in/check.v1" 14 "gopkg.in/tomb.v1" 15 16 "github.com/juju/juju/testing" 17 "github.com/juju/juju/worker" 18 ) 19 20 type RunnerSuite struct { 21 testing.BaseSuite 22 } 23 24 var _ = gc.Suite(&RunnerSuite{}) 25 26 func noneFatal(error) bool { 27 return false 28 } 29 30 func allFatal(error) bool { 31 return true 32 } 33 34 func noImportance(err0, err1 error) bool { 35 return false 36 } 37 38 func (*RunnerSuite) TestOneWorkerStart(c *gc.C) { 39 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 40 starter := newTestWorkerStarter() 41 err := runner.StartWorker("id", testWorkerStart(starter)) 42 c.Assert(err, jc.ErrorIsNil) 43 starter.assertStarted(c, true) 44 45 c.Assert(worker.Stop(runner), gc.IsNil) 46 starter.assertStarted(c, false) 47 } 48 49 func (*RunnerSuite) TestOneWorkerFinish(c *gc.C) { 50 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 51 starter := newTestWorkerStarter() 52 err := runner.StartWorker("id", testWorkerStart(starter)) 53 c.Assert(err, jc.ErrorIsNil) 54 starter.assertStarted(c, true) 55 56 starter.die <- nil 57 starter.assertStarted(c, false) 58 starter.assertNeverStarted(c) 59 60 c.Assert(worker.Stop(runner), gc.IsNil) 61 } 62 63 func (*RunnerSuite) TestOneWorkerRestart(c *gc.C) { 64 runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) 65 starter := newTestWorkerStarter() 66 err := runner.StartWorker("id", testWorkerStart(starter)) 67 c.Assert(err, jc.ErrorIsNil) 68 starter.assertStarted(c, true) 69 70 // Check it restarts a few times time. 71 for i := 0; i < 3; i++ { 72 starter.die <- fmt.Errorf("an error") 73 starter.assertStarted(c, false) 74 starter.assertStarted(c, true) 75 } 76 77 c.Assert(worker.Stop(runner), gc.IsNil) 78 starter.assertStarted(c, false) 79 } 80 81 func (*RunnerSuite) TestOneWorkerStartFatalError(c *gc.C) { 82 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 83 starter := newTestWorkerStarter() 84 starter.startErr = errors.New("cannot start test task") 85 err := runner.StartWorker("id", testWorkerStart(starter)) 86 c.Assert(err, jc.ErrorIsNil) 87 err = runner.Wait() 88 c.Assert(err, gc.Equals, starter.startErr) 89 } 90 91 func (*RunnerSuite) TestOneWorkerDieFatalError(c *gc.C) { 92 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 93 starter := newTestWorkerStarter() 94 err := runner.StartWorker("id", testWorkerStart(starter)) 95 c.Assert(err, jc.ErrorIsNil) 96 starter.assertStarted(c, true) 97 dieErr := errors.New("error when running") 98 starter.die <- dieErr 99 err = runner.Wait() 100 c.Assert(err, gc.Equals, dieErr) 101 starter.assertStarted(c, false) 102 } 103 104 func (*RunnerSuite) TestOneWorkerStartStop(c *gc.C) { 105 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 106 starter := newTestWorkerStarter() 107 err := runner.StartWorker("id", testWorkerStart(starter)) 108 c.Assert(err, jc.ErrorIsNil) 109 starter.assertStarted(c, true) 110 err = runner.StopWorker("id") 111 c.Assert(err, jc.ErrorIsNil) 112 starter.assertStarted(c, false) 113 c.Assert(worker.Stop(runner), gc.IsNil) 114 } 115 116 func (*RunnerSuite) TestOneWorkerStopFatalError(c *gc.C) { 117 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 118 starter := newTestWorkerStarter() 119 starter.stopErr = errors.New("stop error") 120 err := runner.StartWorker("id", testWorkerStart(starter)) 121 c.Assert(err, jc.ErrorIsNil) 122 starter.assertStarted(c, true) 123 err = runner.StopWorker("id") 124 c.Assert(err, jc.ErrorIsNil) 125 err = runner.Wait() 126 c.Assert(err, gc.Equals, starter.stopErr) 127 } 128 129 func (*RunnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { 130 runner := worker.NewRunner(allFatal, noImportance, 3*time.Second) 131 starter := newTestWorkerStarter() 132 starter.stopWait = make(chan struct{}) 133 134 // Start a worker, and wait for it. 135 err := runner.StartWorker("id", testWorkerStart(starter)) 136 c.Assert(err, jc.ErrorIsNil) 137 starter.assertStarted(c, true) 138 139 // XXX the above does not imply the *runner* knows it's started. 140 // voodoo sleep ahoy! 141 time.Sleep(testing.ShortWait) 142 143 // Stop the worker, which will block... 144 err = runner.StopWorker("id") 145 c.Assert(err, jc.ErrorIsNil) 146 147 // While it's still blocked, try to start another. 148 err = runner.StartWorker("id", testWorkerStart(starter)) 149 c.Assert(err, jc.ErrorIsNil) 150 151 // Unblock the stopping worker, and check that the task is 152 // restarted immediately without the usual restart timeout 153 // delay. 154 t0 := time.Now() 155 close(starter.stopWait) 156 starter.assertStarted(c, false) // stop notification 157 starter.assertStarted(c, true) // start notification 158 restartDuration := time.Since(t0) 159 if restartDuration > 1*time.Second { 160 c.Fatalf("task did not restart immediately") 161 } 162 c.Assert(worker.Stop(runner), gc.IsNil) 163 } 164 165 func (*RunnerSuite) TestOneWorkerRestartDelay(c *gc.C) { 166 const delay = 100 * time.Millisecond 167 runner := worker.NewRunner(noneFatal, noImportance, delay) 168 starter := newTestWorkerStarter() 169 err := runner.StartWorker("id", testWorkerStart(starter)) 170 c.Assert(err, jc.ErrorIsNil) 171 starter.assertStarted(c, true) 172 starter.die <- fmt.Errorf("non-fatal error") 173 starter.assertStarted(c, false) 174 t0 := time.Now() 175 starter.assertStarted(c, true) 176 restartDuration := time.Since(t0) 177 if restartDuration < delay { 178 c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, delay) 179 } 180 c.Assert(worker.Stop(runner), gc.IsNil) 181 } 182 183 type errorLevel int 184 185 func (e errorLevel) Error() string { 186 return fmt.Sprintf("error with importance %d", e) 187 } 188 189 func (*RunnerSuite) TestErrorImportance(c *gc.C) { 190 moreImportant := func(err0, err1 error) bool { 191 return err0.(errorLevel) > err1.(errorLevel) 192 } 193 id := func(i int) string { return fmt.Sprint(i) } 194 runner := worker.NewRunner(allFatal, moreImportant, time.Millisecond) 195 for i := 0; i < 10; i++ { 196 starter := newTestWorkerStarter() 197 starter.stopErr = errorLevel(i) 198 err := runner.StartWorker(id(i), testWorkerStart(starter)) 199 c.Assert(err, jc.ErrorIsNil) 200 } 201 err := runner.StopWorker(id(4)) 202 c.Assert(err, jc.ErrorIsNil) 203 err = runner.Wait() 204 c.Assert(err, gc.Equals, errorLevel(9)) 205 } 206 207 func (*RunnerSuite) TestStartWorkerWhenDead(c *gc.C) { 208 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 209 c.Assert(worker.Stop(runner), gc.IsNil) 210 c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead) 211 } 212 213 func (*RunnerSuite) TestStopWorkerWhenDead(c *gc.C) { 214 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 215 c.Assert(worker.Stop(runner), gc.IsNil) 216 c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead) 217 } 218 219 func (*RunnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) { 220 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 221 var starters []*testWorkerStarter 222 for i := 0; i < 10; i++ { 223 starter := newTestWorkerStarter() 224 err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter)) 225 c.Assert(err, jc.ErrorIsNil) 226 starters = append(starters, starter) 227 } 228 for _, starter := range starters { 229 starter.assertStarted(c, true) 230 } 231 dieErr := errors.New("fatal error") 232 starters[4].die <- dieErr 233 err := runner.Wait() 234 c.Assert(err, gc.Equals, dieErr) 235 for _, starter := range starters { 236 starter.assertStarted(c, false) 237 } 238 } 239 240 func (*RunnerSuite) TestFatalErrorWhileStarting(c *gc.C) { 241 // Original deadlock problem that this tests for: 242 // A worker dies with fatal error while another worker 243 // is inside start(). runWorker can't send startInfo on startedc. 244 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 245 246 slowStarter := newTestWorkerStarter() 247 // make the startNotify channel synchronous so 248 // we can delay the start indefinitely. 249 slowStarter.startNotify = make(chan bool) 250 251 err := runner.StartWorker("slow starter", testWorkerStart(slowStarter)) 252 c.Assert(err, jc.ErrorIsNil) 253 254 fatalStarter := newTestWorkerStarter() 255 fatalStarter.startErr = fmt.Errorf("a fatal error") 256 257 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 258 c.Assert(err, jc.ErrorIsNil) 259 260 // Wait for the runner loop to react to the fatal 261 // error and go into final shutdown mode. 262 time.Sleep(10 * time.Millisecond) 263 264 // At this point, the loop is in shutdown mode, but the 265 // slowStarter's worker is still in its start function. 266 // When the start function continues (the first assertStarted 267 // allows that to happen) and returns the new Worker, 268 // runWorker will try to send it on runner.startedc. 269 // This test makes sure that succeeds ok. 270 271 slowStarter.assertStarted(c, true) 272 slowStarter.assertStarted(c, false) 273 err = runner.Wait() 274 c.Assert(err, gc.Equals, fatalStarter.startErr) 275 } 276 277 func (*RunnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) { 278 // Original deadlock problem that this tests for: 279 // A worker tries to call StartWorker in its start function 280 // at the same time another worker dies with a fatal error. 281 // It might not be able to send on startc. 282 runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) 283 284 selfStarter := newTestWorkerStarter() 285 // make the startNotify channel synchronous so 286 // we can delay the start indefinitely. 287 selfStarter.startNotify = make(chan bool) 288 selfStarter.hook = func() { 289 runner.StartWorker("another", func() (worker.Worker, error) { 290 return nil, fmt.Errorf("no worker started") 291 }) 292 } 293 err := runner.StartWorker("self starter", testWorkerStart(selfStarter)) 294 c.Assert(err, jc.ErrorIsNil) 295 296 fatalStarter := newTestWorkerStarter() 297 fatalStarter.startErr = fmt.Errorf("a fatal error") 298 299 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 300 c.Assert(err, jc.ErrorIsNil) 301 302 // Wait for the runner loop to react to the fatal 303 // error and go into final shutdown mode. 304 time.Sleep(10 * time.Millisecond) 305 306 // At this point, the loop is in shutdown mode, but the 307 // selfStarter's worker is still in its start function. 308 // When the start function continues (the first assertStarted 309 // allows that to happen) it will try to create a new 310 // worker. This failed in an earlier version of the code because the 311 // loop was not ready to receive start requests. 312 313 selfStarter.assertStarted(c, true) 314 selfStarter.assertStarted(c, false) 315 err = runner.Wait() 316 c.Assert(err, gc.Equals, fatalStarter.startErr) 317 } 318 319 type testWorkerStarter struct { 320 startCount int32 321 322 // startNotify receives true when the worker starts 323 // and false when it exits. If startErr is non-nil, 324 // it sends false only. 325 startNotify chan bool 326 327 // If stopWait is non-nil, the worker will 328 // wait for a value to be sent on it before 329 // exiting. 330 stopWait chan struct{} 331 332 // Sending a value on die causes the worker 333 // to die with the given error. 334 die chan error 335 336 // If startErr is non-nil, the worker will die immediately 337 // with this error after starting. 338 startErr error 339 340 // If stopErr is non-nil, the worker will die with this 341 // error when asked to stop. 342 stopErr error 343 344 // The hook function is called after starting the worker. 345 hook func() 346 } 347 348 func newTestWorkerStarter() *testWorkerStarter { 349 return &testWorkerStarter{ 350 die: make(chan error, 1), 351 startNotify: make(chan bool, 100), 352 hook: func() {}, 353 } 354 } 355 356 func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) { 357 select { 358 case isStarted := <-starter.startNotify: 359 c.Assert(isStarted, gc.Equals, started) 360 case <-time.After(testing.LongWait): 361 c.Fatalf("timed out waiting for start notification") 362 } 363 } 364 365 func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) { 366 select { 367 case isStarted := <-starter.startNotify: 368 c.Fatalf("got unexpected start notification: %v", isStarted) 369 case <-time.After(worker.RestartDelay + testing.ShortWait): 370 } 371 } 372 373 func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) { 374 return func() (worker.Worker, error) { 375 return starter.start() 376 } 377 } 378 379 func (starter *testWorkerStarter) start() (worker.Worker, error) { 380 if count := atomic.AddInt32(&starter.startCount, 1); count != 1 { 381 panic(fmt.Errorf("unexpected start count %d; expected 1", count)) 382 } 383 if starter.startErr != nil { 384 starter.startNotify <- false 385 return nil, starter.startErr 386 } 387 task := &testWorker{ 388 starter: starter, 389 } 390 starter.startNotify <- true 391 go task.run() 392 return task, nil 393 } 394 395 type testWorker struct { 396 starter *testWorkerStarter 397 tomb tomb.Tomb 398 } 399 400 func (t *testWorker) Kill() { 401 t.tomb.Kill(nil) 402 } 403 404 func (t *testWorker) Wait() error { 405 return t.tomb.Wait() 406 } 407 408 func (t *testWorker) run() { 409 defer t.tomb.Done() 410 411 t.starter.hook() 412 select { 413 case <-t.tomb.Dying(): 414 t.tomb.Kill(t.starter.stopErr) 415 case err := <-t.starter.die: 416 t.tomb.Kill(err) 417 } 418 if t.starter.stopWait != nil { 419 <-t.starter.stopWait 420 } 421 t.starter.startNotify <- false 422 if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 { 423 panic(fmt.Errorf("unexpected start count %d; expected 0", count)) 424 } 425 }