github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/runner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package worker_test 5 6 import ( 7 "errors" 8 "fmt" 9 "sync/atomic" 10 "time" 11 12 jc "github.com/juju/testing/checkers" 13 gc "gopkg.in/check.v1" 14 "launchpad.net/tomb" 15 16 "github.com/juju/juju/testing" 17 "github.com/juju/juju/worker" 18 ) 19 20 type runnerSuite struct { 21 testing.BaseSuite 22 } 23 24 var _ = gc.Suite(&runnerSuite{}) 25 26 func noneFatal(error) bool { 27 return false 28 } 29 30 func allFatal(error) bool { 31 return true 32 } 33 34 func noImportance(err0, err1 error) bool { 35 return false 36 } 37 38 func (s *runnerSuite) SetUpTest(c *gc.C) { 39 s.BaseSuite.SetUpTest(c) 40 // Avoid patching RestartDealy to zero, as it changes worker behaviour. 41 s.PatchValue(&worker.RestartDelay, time.Duration(time.Millisecond)) 42 } 43 44 func (*runnerSuite) TestOneWorkerStart(c *gc.C) { 45 runner := worker.NewRunner(noneFatal, noImportance) 46 starter := newTestWorkerStarter() 47 err := runner.StartWorker("id", testWorkerStart(starter)) 48 c.Assert(err, jc.ErrorIsNil) 49 starter.assertStarted(c, true) 50 51 c.Assert(worker.Stop(runner), gc.IsNil) 52 starter.assertStarted(c, false) 53 } 54 55 func (*runnerSuite) TestOneWorkerFinish(c *gc.C) { 56 runner := worker.NewRunner(noneFatal, noImportance) 57 starter := newTestWorkerStarter() 58 err := runner.StartWorker("id", testWorkerStart(starter)) 59 c.Assert(err, jc.ErrorIsNil) 60 starter.assertStarted(c, true) 61 62 starter.die <- nil 63 starter.assertStarted(c, false) 64 starter.assertNeverStarted(c) 65 66 c.Assert(worker.Stop(runner), gc.IsNil) 67 } 68 69 func (*runnerSuite) TestOneWorkerRestart(c *gc.C) { 70 runner := worker.NewRunner(noneFatal, noImportance) 71 starter := newTestWorkerStarter() 72 err := runner.StartWorker("id", testWorkerStart(starter)) 73 c.Assert(err, jc.ErrorIsNil) 74 starter.assertStarted(c, true) 75 76 // Check it restarts a few times time. 77 for i := 0; i < 3; i++ { 78 starter.die <- fmt.Errorf("an error") 79 starter.assertStarted(c, false) 80 starter.assertStarted(c, true) 81 } 82 83 c.Assert(worker.Stop(runner), gc.IsNil) 84 starter.assertStarted(c, false) 85 } 86 87 func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) { 88 runner := worker.NewRunner(allFatal, noImportance) 89 starter := newTestWorkerStarter() 90 starter.startErr = errors.New("cannot start test task") 91 err := runner.StartWorker("id", testWorkerStart(starter)) 92 c.Assert(err, jc.ErrorIsNil) 93 err = runner.Wait() 94 c.Assert(err, gc.Equals, starter.startErr) 95 } 96 97 func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) { 98 runner := worker.NewRunner(allFatal, noImportance) 99 starter := newTestWorkerStarter() 100 err := runner.StartWorker("id", testWorkerStart(starter)) 101 c.Assert(err, jc.ErrorIsNil) 102 starter.assertStarted(c, true) 103 dieErr := errors.New("error when running") 104 starter.die <- dieErr 105 err = runner.Wait() 106 c.Assert(err, gc.Equals, dieErr) 107 starter.assertStarted(c, false) 108 } 109 110 func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) { 111 runner := worker.NewRunner(allFatal, noImportance) 112 starter := newTestWorkerStarter() 113 err := runner.StartWorker("id", testWorkerStart(starter)) 114 c.Assert(err, jc.ErrorIsNil) 115 starter.assertStarted(c, true) 116 err = runner.StopWorker("id") 117 c.Assert(err, jc.ErrorIsNil) 118 starter.assertStarted(c, false) 119 c.Assert(worker.Stop(runner), gc.IsNil) 120 } 121 122 func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) { 123 runner := worker.NewRunner(allFatal, noImportance) 124 starter := newTestWorkerStarter() 125 starter.stopErr = errors.New("stop error") 126 err := runner.StartWorker("id", testWorkerStart(starter)) 127 c.Assert(err, jc.ErrorIsNil) 128 starter.assertStarted(c, true) 129 err = runner.StopWorker("id") 130 c.Assert(err, jc.ErrorIsNil) 131 err = runner.Wait() 132 c.Assert(err, gc.Equals, starter.stopErr) 133 } 134 135 func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { 136 worker.RestartDelay = 3 * time.Second 137 runner := worker.NewRunner(allFatal, noImportance) 138 starter := newTestWorkerStarter() 139 starter.stopWait = make(chan struct{}) 140 141 err := runner.StartWorker("id", testWorkerStart(starter)) 142 c.Assert(err, jc.ErrorIsNil) 143 starter.assertStarted(c, true) 144 err = runner.StopWorker("id") 145 c.Assert(err, jc.ErrorIsNil) 146 err = runner.StartWorker("id", testWorkerStart(starter)) 147 c.Assert(err, jc.ErrorIsNil) 148 149 close(starter.stopWait) 150 starter.assertStarted(c, false) 151 // Check that the task is restarted immediately without 152 // the usual restart timeout delay. 153 t0 := time.Now() 154 starter.assertStarted(c, true) 155 restartDuration := time.Since(t0) 156 if restartDuration > 1*time.Second { 157 c.Fatalf("task did not restart immediately") 158 } 159 c.Assert(worker.Stop(runner), gc.IsNil) 160 } 161 162 func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) { 163 worker.RestartDelay = 100 * time.Millisecond 164 runner := worker.NewRunner(noneFatal, noImportance) 165 starter := newTestWorkerStarter() 166 err := runner.StartWorker("id", testWorkerStart(starter)) 167 c.Assert(err, jc.ErrorIsNil) 168 starter.assertStarted(c, true) 169 starter.die <- fmt.Errorf("non-fatal error") 170 starter.assertStarted(c, false) 171 t0 := time.Now() 172 starter.assertStarted(c, true) 173 restartDuration := time.Since(t0) 174 if restartDuration < worker.RestartDelay { 175 c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, worker.RestartDelay) 176 } 177 c.Assert(worker.Stop(runner), gc.IsNil) 178 } 179 180 type errorLevel int 181 182 func (e errorLevel) Error() string { 183 return fmt.Sprintf("error with importance %d", e) 184 } 185 186 func (*runnerSuite) TestErrorImportance(c *gc.C) { 187 moreImportant := func(err0, err1 error) bool { 188 return err0.(errorLevel) > err1.(errorLevel) 189 } 190 id := func(i int) string { return fmt.Sprint(i) } 191 runner := worker.NewRunner(allFatal, moreImportant) 192 for i := 0; i < 10; i++ { 193 starter := newTestWorkerStarter() 194 starter.stopErr = errorLevel(i) 195 err := runner.StartWorker(id(i), testWorkerStart(starter)) 196 c.Assert(err, jc.ErrorIsNil) 197 } 198 err := runner.StopWorker(id(4)) 199 c.Assert(err, jc.ErrorIsNil) 200 err = runner.Wait() 201 c.Assert(err, gc.Equals, errorLevel(9)) 202 } 203 204 func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) { 205 runner := worker.NewRunner(allFatal, noImportance) 206 c.Assert(worker.Stop(runner), gc.IsNil) 207 c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead) 208 } 209 210 func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) { 211 runner := worker.NewRunner(allFatal, noImportance) 212 c.Assert(worker.Stop(runner), gc.IsNil) 213 c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead) 214 } 215 216 func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) { 217 runner := worker.NewRunner(allFatal, noImportance) 218 var starters []*testWorkerStarter 219 for i := 0; i < 10; i++ { 220 starter := newTestWorkerStarter() 221 err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter)) 222 c.Assert(err, jc.ErrorIsNil) 223 starters = append(starters, starter) 224 } 225 for _, starter := range starters { 226 starter.assertStarted(c, true) 227 } 228 dieErr := errors.New("fatal error") 229 starters[4].die <- dieErr 230 err := runner.Wait() 231 c.Assert(err, gc.Equals, dieErr) 232 for _, starter := range starters { 233 starter.assertStarted(c, false) 234 } 235 } 236 237 func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) { 238 // Original deadlock problem that this tests for: 239 // A worker dies with fatal error while another worker 240 // is inside start(). runWorker can't send startInfo on startedc. 241 runner := worker.NewRunner(allFatal, noImportance) 242 243 slowStarter := newTestWorkerStarter() 244 // make the startNotify channel synchronous so 245 // we can delay the start indefinitely. 246 slowStarter.startNotify = make(chan bool) 247 248 err := runner.StartWorker("slow starter", testWorkerStart(slowStarter)) 249 c.Assert(err, jc.ErrorIsNil) 250 251 fatalStarter := newTestWorkerStarter() 252 fatalStarter.startErr = fmt.Errorf("a fatal error") 253 254 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 255 c.Assert(err, jc.ErrorIsNil) 256 257 // Wait for the runner loop to react to the fatal 258 // error and go into final shutdown mode. 259 time.Sleep(10 * time.Millisecond) 260 261 // At this point, the loop is in shutdown mode, but the 262 // slowStarter's worker is still in its start function. 263 // When the start function continues (the first assertStarted 264 // allows that to happen) and returns the new Worker, 265 // runWorker will try to send it on runner.startedc. 266 // This test makes sure that succeeds ok. 267 268 slowStarter.assertStarted(c, true) 269 slowStarter.assertStarted(c, false) 270 err = runner.Wait() 271 c.Assert(err, gc.Equals, fatalStarter.startErr) 272 } 273 274 func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) { 275 // Original deadlock problem that this tests for: 276 // A worker tries to call StartWorker in its start function 277 // at the same time another worker dies with a fatal error. 278 // It might not be able to send on startc. 279 runner := worker.NewRunner(allFatal, noImportance) 280 281 selfStarter := newTestWorkerStarter() 282 // make the startNotify channel synchronous so 283 // we can delay the start indefinitely. 284 selfStarter.startNotify = make(chan bool) 285 selfStarter.hook = func() { 286 runner.StartWorker("another", func() (worker.Worker, error) { 287 return nil, fmt.Errorf("no worker started") 288 }) 289 } 290 err := runner.StartWorker("self starter", testWorkerStart(selfStarter)) 291 c.Assert(err, jc.ErrorIsNil) 292 293 fatalStarter := newTestWorkerStarter() 294 fatalStarter.startErr = fmt.Errorf("a fatal error") 295 296 err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) 297 c.Assert(err, jc.ErrorIsNil) 298 299 // Wait for the runner loop to react to the fatal 300 // error and go into final shutdown mode. 301 time.Sleep(10 * time.Millisecond) 302 303 // At this point, the loop is in shutdown mode, but the 304 // selfStarter's worker is still in its start function. 305 // When the start function continues (the first assertStarted 306 // allows that to happen) it will try to create a new 307 // worker. This failed in an earlier version of the code because the 308 // loop was not ready to receive start requests. 309 310 selfStarter.assertStarted(c, true) 311 selfStarter.assertStarted(c, false) 312 err = runner.Wait() 313 c.Assert(err, gc.Equals, fatalStarter.startErr) 314 } 315 316 type testWorkerStarter struct { 317 startCount int32 318 319 // startNotify receives true when the worker starts 320 // and false when it exits. If startErr is non-nil, 321 // it sends false only. 322 startNotify chan bool 323 324 // If stopWait is non-nil, the worker will 325 // wait for a value to be sent on it before 326 // exiting. 327 stopWait chan struct{} 328 329 // Sending a value on die causes the worker 330 // to die with the given error. 331 die chan error 332 333 // If startErr is non-nil, the worker will die immediately 334 // with this error after starting. 335 startErr error 336 337 // If stopErr is non-nil, the worker will die with this 338 // error when asked to stop. 339 stopErr error 340 341 // The hook function is called after starting the worker. 342 hook func() 343 } 344 345 func newTestWorkerStarter() *testWorkerStarter { 346 return &testWorkerStarter{ 347 die: make(chan error, 1), 348 startNotify: make(chan bool, 100), 349 hook: func() {}, 350 } 351 } 352 353 func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) { 354 select { 355 case isStarted := <-starter.startNotify: 356 c.Assert(isStarted, gc.Equals, started) 357 case <-time.After(1 * time.Second): 358 c.Fatalf("timed out waiting for start notification") 359 } 360 } 361 362 func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) { 363 select { 364 case isStarted := <-starter.startNotify: 365 c.Fatalf("got unexpected start notification: %v", isStarted) 366 case <-time.After(worker.RestartDelay + testing.ShortWait): 367 } 368 } 369 370 func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) { 371 return func() (worker.Worker, error) { 372 return starter.start() 373 } 374 } 375 376 func (starter *testWorkerStarter) start() (worker.Worker, error) { 377 if count := atomic.AddInt32(&starter.startCount, 1); count != 1 { 378 panic(fmt.Errorf("unexpected start count %d; expected 1", count)) 379 } 380 if starter.startErr != nil { 381 starter.startNotify <- false 382 return nil, starter.startErr 383 } 384 task := &testWorker{ 385 starter: starter, 386 } 387 starter.startNotify <- true 388 go task.run() 389 return task, nil 390 } 391 392 type testWorker struct { 393 starter *testWorkerStarter 394 tomb tomb.Tomb 395 } 396 397 func (t *testWorker) Kill() { 398 t.tomb.Kill(nil) 399 } 400 401 func (t *testWorker) Wait() error { 402 return t.tomb.Wait() 403 } 404 405 func (t *testWorker) run() { 406 defer t.tomb.Done() 407 408 t.starter.hook() 409 select { 410 case <-t.tomb.Dying(): 411 t.tomb.Kill(t.starter.stopErr) 412 case err := <-t.starter.die: 413 t.tomb.Kill(err) 414 } 415 if t.starter.stopWait != nil { 416 <-t.starter.stopWait 417 } 418 t.starter.startNotify <- false 419 if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 { 420 panic(fmt.Errorf("unexpected start count %d; expected 0", count)) 421 } 422 }