github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/runner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package worker_test 5 6 import ( 7 "errors" 8 "fmt" 9 "sync/atomic" 10 "time" 11 12 jc "github.com/juju/testing/checkers" 13 gc "gopkg.in/check.v1" 14 "launchpad.net/tomb" 15 16 "github.com/juju/juju/testing" 17 "github.com/juju/juju/worker" 18 ) 19 20 type runnerSuite struct { 21 testing.BaseSuite 22 } 23 24 var _ = gc.Suite(&runnerSuite{}) 25 26 func noneFatal(error) bool { 27 return false 28 } 29 30 func allFatal(error) bool { 31 return true 32 } 33 34 func noImportance(err0, err1 error) bool { 35 return false 36 } 37 38 func (s *runnerSuite) SetUpTest(c *gc.C) { 39 s.BaseSuite.SetUpTest(c) 40 s.PatchValue(&worker.RestartDelay, time.Duration(0)) 41 } 42 43 func (*runnerSuite) TestOneWorkerStart(c *gc.C) { 44 runner := worker.NewRunner(noneFatal, noImportance) 45 starter := newTestWorkerStarter() 46 err := runner.StartWorker("id", testWorkerStart(starter)) 47 c.Assert(err, jc.ErrorIsNil) 48 starter.assertStarted(c, true) 49 50 c.Assert(worker.Stop(runner), gc.IsNil) 51 starter.assertStarted(c, false) 52 } 53 54 func (*runnerSuite) TestOneWorkerFinish(c *gc.C) { 55 runner := worker.NewRunner(noneFatal, noImportance) 56 starter := newTestWorkerStarter() 57 err := runner.StartWorker("id", testWorkerStart(starter)) 58 c.Assert(err, jc.ErrorIsNil) 59 starter.assertStarted(c, true) 60 61 starter.die <- nil 62 starter.assertStarted(c, false) 63 starter.assertNeverStarted(c) 64 65 c.Assert(worker.Stop(runner), gc.IsNil) 66 } 67 68 func (*runnerSuite) TestOneWorkerRestart(c *gc.C) { 69 runner := worker.NewRunner(noneFatal, noImportance) 70 starter := newTestWorkerStarter() 71 err := runner.StartWorker("id", testWorkerStart(starter)) 72 c.Assert(err, jc.ErrorIsNil) 73 starter.assertStarted(c, true) 74 75 // Check it restarts a few times time. 76 for i := 0; i < 3; i++ { 77 starter.die <- fmt.Errorf("an error") 78 starter.assertStarted(c, false) 79 starter.assertStarted(c, true) 80 } 81 82 c.Assert(worker.Stop(runner), gc.IsNil) 83 starter.assertStarted(c, false) 84 } 85 86 func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) { 87 runner := worker.NewRunner(allFatal, noImportance) 88 starter := newTestWorkerStarter() 89 starter.startErr = errors.New("cannot start test task") 90 err := runner.StartWorker("id", testWorkerStart(starter)) 91 c.Assert(err, jc.ErrorIsNil) 92 err = runner.Wait() 93 c.Assert(err, gc.Equals, starter.startErr) 94 } 95 96 func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) { 97 runner := worker.NewRunner(allFatal, noImportance) 98 starter := newTestWorkerStarter() 99 err := runner.StartWorker("id", testWorkerStart(starter)) 100 c.Assert(err, jc.ErrorIsNil) 101 starter.assertStarted(c, true) 102 dieErr := errors.New("error when running") 103 starter.die <- dieErr 104 err = runner.Wait() 105 c.Assert(err, gc.Equals, dieErr) 106 starter.assertStarted(c, false) 107 } 108 109 func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) { 110 runner := worker.NewRunner(allFatal, noImportance) 111 starter := newTestWorkerStarter() 112 err := runner.StartWorker("id", testWorkerStart(starter)) 113 c.Assert(err, jc.ErrorIsNil) 114 starter.assertStarted(c, true) 115 err = runner.StopWorker("id") 116 c.Assert(err, jc.ErrorIsNil) 117 starter.assertStarted(c, false) 118 c.Assert(worker.Stop(runner), gc.IsNil) 119 } 120 121 func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) { 122 runner := worker.NewRunner(allFatal, noImportance) 123 starter := newTestWorkerStarter() 124 starter.stopErr = errors.New("stop error") 125 err := runner.StartWorker("id", testWorkerStart(starter)) 126 c.Assert(err, jc.ErrorIsNil) 127 starter.assertStarted(c, true) 128 err = runner.StopWorker("id") 129 c.Assert(err, jc.ErrorIsNil) 130 err = runner.Wait() 131 c.Assert(err, gc.Equals, starter.stopErr) 132 } 133 134 func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { 135 worker.RestartDelay = 3 * time.Second 136 runner := worker.NewRunner(allFatal, noImportance) 137 starter := newTestWorkerStarter() 138 starter.stopWait = make(chan struct{}) 139 140 err := runner.StartWorker("id", testWorkerStart(starter)) 141 c.Assert(err, jc.ErrorIsNil) 142 starter.assertStarted(c, true) 143 err = runner.StopWorker("id") 144 c.Assert(err, jc.ErrorIsNil) 145 err = runner.StartWorker("id", testWorkerStart(starter)) 146 c.Assert(err, jc.ErrorIsNil) 147 148 close(starter.stopWait) 149 starter.assertStarted(c, false) 150 // Check that the task is restarted immediately without 151 // the usual restart timeout delay. 152 t0 := time.Now() 153 starter.assertStarted(c, true) 154 restartDuration := time.Since(t0) 155 if restartDuration > 1*time.Second { 156 c.Fatalf("task did not restart immediately") 157 } 158 c.Assert(worker.Stop(runner), gc.IsNil) 159 } 160 161 func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) { 162 worker.RestartDelay = 100 * time.Millisecond 163 runner := worker.NewRunner(noneFatal, noImportance) 164 starter := newTestWorkerStarter() 165 err := runner.StartWorker("id", testWorkerStart(starter)) 166 c.Assert(err, jc.ErrorIsNil) 167 starter.assertStarted(c, true) 168 starter.die <- fmt.Errorf("non-fatal error") 169 starter.assertStarted(c, false) 170 t0 := time.Now() 171 starter.assertStarted(c, true) 172 restartDuration := time.Since(t0) 173 if restartDuration < worker.RestartDelay { 174 c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, worker.RestartDelay) 175 } 176 c.Assert(worker.Stop(runner), gc.IsNil) 177 } 178 179 type errorLevel int 180 181 func (e errorLevel) Error() string { 182 return fmt.Sprintf("error with importance %d", e) 183 } 184 185 func (*runnerSuite) TestErrorImportance(c *gc.C) { 186 moreImportant := func(err0, err1 error) bool { 187 return err0.(errorLevel) > err1.(errorLevel) 188 } 189 id := func(i int) string { return fmt.Sprint(i) } 190 runner := worker.NewRunner(allFatal, moreImportant) 191 for i := 0; i < 10; i++ { 192 starter := newTestWorkerStarter() 193 starter.stopErr = errorLevel(i) 194 err := runner.StartWorker(id(i), testWorkerStart(starter)) 195 c.Assert(err, jc.ErrorIsNil) 196 } 197 err := runner.StopWorker(id(4)) 198 c.Assert(err, jc.ErrorIsNil) 199 err = runner.Wait() 200 c.Assert(err, gc.Equals, errorLevel(9)) 201 } 202 203 func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) { 204 runner := worker.NewRunner(allFatal, noImportance) 205 c.Assert(worker.Stop(runner), gc.IsNil) 206 c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead) 207 } 208 209 func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) { 210 runner := worker.NewRunner(allFatal, noImportance) 211 c.Assert(worker.Stop(runner), gc.IsNil) 212 c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead) 213 } 214 215 func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) { 216 runner := worker.NewRunner(allFatal, noImportance) 217 var starters []*testWorkerStarter 218 for i := 0; i < 10; i++ { 219 starter := newTestWorkerStarter() 220 err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter)) 221 c.Assert(err, jc.ErrorIsNil) 222 starters = append(starters, starter) 223 } 224 for _, starter := range starters { 225 starter.assertStarted(c, true) 226 } 227 dieErr := errors.New("fatal error") 228 starters[4].die <- dieErr 229 err := runner.Wait() 230 c.Assert(err, gc.Equals, dieErr) 231 for _, starter := range starters { 232 starter.assertStarted(c, false) 233 } 234 } 235 236 func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) { 237 // Original deadlock problem that this tests for: 238 // A worker dies with fatal error while another worker 239 // is inside start(). runWorker can't send startInfo on startedc. 240 runner := worker.NewRunner(allFatal, noImportance) 241 242 slowStarter := newTestWorkerStarter() 243 // make the startNotify channel synchronous so 244 // we can delay the start indefinitely. 245 slowStarter.startNotify = make(chan bool) 246 247 err := runner.StartWorker("slow starter", testWorkerStart(slowStarter)) 248 c.Assert(err, jc.ErrorIsNil) 249 250 fatalStarter := newTestWorkerStarter() 251 fatalStarter.startErr = fmt.Errorf("a fatal error") 252 253 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 254 c.Assert(err, jc.ErrorIsNil) 255 256 // Wait for the runner loop to react to the fatal 257 // error and go into final shutdown mode. 258 time.Sleep(10 * time.Millisecond) 259 260 // At this point, the loop is in shutdown mode, but the 261 // slowStarter's worker is still in its start function. 262 // When the start function continues (the first assertStarted 263 // allows that to happen) and returns the new Worker, 264 // runWorker will try to send it on runner.startedc. 265 // This test makes sure that succeeds ok. 266 267 slowStarter.assertStarted(c, true) 268 slowStarter.assertStarted(c, false) 269 err = runner.Wait() 270 c.Assert(err, gc.Equals, fatalStarter.startErr) 271 } 272 273 func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) { 274 // Original deadlock problem that this tests for: 275 // A worker tries to call StartWorker in its start function 276 // at the same time another worker dies with a fatal error. 277 // It might not be able to send on startc. 278 runner := worker.NewRunner(allFatal, noImportance) 279 280 selfStarter := newTestWorkerStarter() 281 // make the startNotify channel synchronous so 282 // we can delay the start indefinitely. 283 selfStarter.startNotify = make(chan bool) 284 selfStarter.hook = func() { 285 runner.StartWorker("another", func() (worker.Worker, error) { 286 return nil, fmt.Errorf("no worker started") 287 }) 288 } 289 err := runner.StartWorker("self starter", testWorkerStart(selfStarter)) 290 c.Assert(err, jc.ErrorIsNil) 291 292 fatalStarter := newTestWorkerStarter() 293 fatalStarter.startErr = fmt.Errorf("a fatal error") 294 295 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 296 c.Assert(err, jc.ErrorIsNil) 297 298 // Wait for the runner loop to react to the fatal 299 // error and go into final shutdown mode. 300 time.Sleep(10 * time.Millisecond) 301 302 // At this point, the loop is in shutdown mode, but the 303 // selfStarter's worker is still in its start function. 304 // When the start function continues (the first assertStarted 305 // allows that to happen) it will try to create a new 306 // worker. This failed in an earlier version of the code because the 307 // loop was not ready to receive start requests. 308 309 selfStarter.assertStarted(c, true) 310 selfStarter.assertStarted(c, false) 311 err = runner.Wait() 312 c.Assert(err, gc.Equals, fatalStarter.startErr) 313 } 314 315 type testWorkerStarter struct { 316 startCount int32 317 318 // startNotify receives true when the worker starts 319 // and false when it exits. If startErr is non-nil, 320 // it sends false only. 321 startNotify chan bool 322 323 // If stopWait is non-nil, the worker will 324 // wait for a value to be sent on it before 325 // exiting. 326 stopWait chan struct{} 327 328 // Sending a value on die causes the worker 329 // to die with the given error. 330 die chan error 331 332 // If startErr is non-nil, the worker will die immediately 333 // with this error after starting. 334 startErr error 335 336 // If stopErr is non-nil, the worker will die with this 337 // error when asked to stop. 338 stopErr error 339 340 // The hook function is called after starting the worker. 341 hook func() 342 } 343 344 func newTestWorkerStarter() *testWorkerStarter { 345 return &testWorkerStarter{ 346 die: make(chan error, 1), 347 startNotify: make(chan bool, 100), 348 hook: func() {}, 349 } 350 } 351 352 func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) { 353 select { 354 case isStarted := <-starter.startNotify: 355 c.Assert(isStarted, gc.Equals, started) 356 case <-time.After(1 * time.Second): 357 c.Fatalf("timed out waiting for start notification") 358 } 359 } 360 361 func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) { 362 select { 363 case isStarted := <-starter.startNotify: 364 c.Fatalf("got unexpected start notification: %v", isStarted) 365 case <-time.After(worker.RestartDelay + testing.ShortWait): 366 } 367 } 368 369 func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) { 370 return func() (worker.Worker, error) { 371 return starter.start() 372 } 373 } 374 375 func (starter *testWorkerStarter) start() (worker.Worker, error) { 376 if count := atomic.AddInt32(&starter.startCount, 1); count != 1 { 377 panic(fmt.Errorf("unexpected start count %d; expected 1", count)) 378 } 379 if starter.startErr != nil { 380 starter.startNotify <- false 381 return nil, starter.startErr 382 } 383 task := &testWorker{ 384 starter: starter, 385 } 386 starter.startNotify <- true 387 go task.run() 388 return task, nil 389 } 390 391 type testWorker struct { 392 starter *testWorkerStarter 393 tomb tomb.Tomb 394 } 395 396 func (t *testWorker) Kill() { 397 t.tomb.Kill(nil) 398 } 399 400 func (t *testWorker) Wait() error { 401 return t.tomb.Wait() 402 } 403 404 func (t *testWorker) run() { 405 defer t.tomb.Done() 406 407 t.starter.hook() 408 select { 409 case <-t.tomb.Dying(): 410 t.tomb.Kill(t.starter.stopErr) 411 case err := <-t.starter.die: 412 t.tomb.Kill(err) 413 } 414 if t.starter.stopWait != nil { 415 <-t.starter.stopWait 416 } 417 t.starter.startNotify <- false 418 if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 { 419 panic(fmt.Errorf("unexpected start count %d; expected 0", count)) 420 } 421 }