github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/runner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package worker_test
     5  
     6  import (
     7  	"fmt"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"github.com/juju/errors"
    12  	jc "github.com/juju/testing/checkers"
    13  	gc "gopkg.in/check.v1"
    14  	"gopkg.in/tomb.v1"
    15  
    16  	"github.com/juju/juju/testing"
    17  	"github.com/juju/juju/worker"
    18  )
    19  
    20  type RunnerSuite struct {
    21  	testing.BaseSuite
    22  }
    23  
    24  var _ = gc.Suite(&RunnerSuite{})
    25  
    26  func noneFatal(error) bool {
    27  	return false
    28  }
    29  
    30  func allFatal(error) bool {
    31  	return true
    32  }
    33  
    34  func noImportance(err0, err1 error) bool {
    35  	return false
    36  }
    37  
    38  func (*RunnerSuite) TestOneWorkerStart(c *gc.C) {
    39  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    40  	starter := newTestWorkerStarter()
    41  	err := runner.StartWorker("id", testWorkerStart(starter))
    42  	c.Assert(err, jc.ErrorIsNil)
    43  	starter.assertStarted(c, true)
    44  
    45  	c.Assert(worker.Stop(runner), gc.IsNil)
    46  	starter.assertStarted(c, false)
    47  }
    48  
    49  func (*RunnerSuite) TestOneWorkerFinish(c *gc.C) {
    50  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    51  	starter := newTestWorkerStarter()
    52  	err := runner.StartWorker("id", testWorkerStart(starter))
    53  	c.Assert(err, jc.ErrorIsNil)
    54  	starter.assertStarted(c, true)
    55  
    56  	starter.die <- nil
    57  	starter.assertStarted(c, false)
    58  	starter.assertNeverStarted(c)
    59  
    60  	c.Assert(worker.Stop(runner), gc.IsNil)
    61  }
    62  
    63  func (*RunnerSuite) TestOneWorkerRestart(c *gc.C) {
    64  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    65  	starter := newTestWorkerStarter()
    66  	err := runner.StartWorker("id", testWorkerStart(starter))
    67  	c.Assert(err, jc.ErrorIsNil)
    68  	starter.assertStarted(c, true)
    69  
    70  	// Check it restarts a few times time.
    71  	for i := 0; i < 3; i++ {
    72  		starter.die <- fmt.Errorf("an error")
    73  		starter.assertStarted(c, false)
    74  		starter.assertStarted(c, true)
    75  	}
    76  
    77  	c.Assert(worker.Stop(runner), gc.IsNil)
    78  	starter.assertStarted(c, false)
    79  }
    80  
    81  func (*RunnerSuite) TestOneWorkerStartFatalError(c *gc.C) {
    82  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
    83  	starter := newTestWorkerStarter()
    84  	starter.startErr = errors.New("cannot start test task")
    85  	err := runner.StartWorker("id", testWorkerStart(starter))
    86  	c.Assert(err, jc.ErrorIsNil)
    87  	err = runner.Wait()
    88  	c.Assert(err, gc.Equals, starter.startErr)
    89  }
    90  
    91  func (*RunnerSuite) TestOneWorkerDieFatalError(c *gc.C) {
    92  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
    93  	starter := newTestWorkerStarter()
    94  	err := runner.StartWorker("id", testWorkerStart(starter))
    95  	c.Assert(err, jc.ErrorIsNil)
    96  	starter.assertStarted(c, true)
    97  	dieErr := errors.New("error when running")
    98  	starter.die <- dieErr
    99  	err = runner.Wait()
   100  	c.Assert(err, gc.Equals, dieErr)
   101  	starter.assertStarted(c, false)
   102  }
   103  
   104  func (*RunnerSuite) TestOneWorkerStartStop(c *gc.C) {
   105  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   106  	starter := newTestWorkerStarter()
   107  	err := runner.StartWorker("id", testWorkerStart(starter))
   108  	c.Assert(err, jc.ErrorIsNil)
   109  	starter.assertStarted(c, true)
   110  	err = runner.StopWorker("id")
   111  	c.Assert(err, jc.ErrorIsNil)
   112  	starter.assertStarted(c, false)
   113  	c.Assert(worker.Stop(runner), gc.IsNil)
   114  }
   115  
   116  func (*RunnerSuite) TestOneWorkerStopFatalError(c *gc.C) {
   117  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   118  	starter := newTestWorkerStarter()
   119  	starter.stopErr = errors.New("stop error")
   120  	err := runner.StartWorker("id", testWorkerStart(starter))
   121  	c.Assert(err, jc.ErrorIsNil)
   122  	starter.assertStarted(c, true)
   123  	err = runner.StopWorker("id")
   124  	c.Assert(err, jc.ErrorIsNil)
   125  	err = runner.Wait()
   126  	c.Assert(err, gc.Equals, starter.stopErr)
   127  }
   128  
   129  func (*RunnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) {
   130  	runner := worker.NewRunner(allFatal, noImportance, 3*time.Second)
   131  	starter := newTestWorkerStarter()
   132  	starter.stopWait = make(chan struct{})
   133  
   134  	// Start a worker, and wait for it.
   135  	err := runner.StartWorker("id", testWorkerStart(starter))
   136  	c.Assert(err, jc.ErrorIsNil)
   137  	starter.assertStarted(c, true)
   138  
   139  	// XXX the above does not imply the *runner* knows it's started.
   140  	// voodoo sleep ahoy!
   141  	time.Sleep(testing.ShortWait)
   142  
   143  	// Stop the worker, which will block...
   144  	err = runner.StopWorker("id")
   145  	c.Assert(err, jc.ErrorIsNil)
   146  
   147  	// While it's still blocked, try to start another.
   148  	err = runner.StartWorker("id", testWorkerStart(starter))
   149  	c.Assert(err, jc.ErrorIsNil)
   150  
   151  	// Unblock the stopping worker, and check that the task is
   152  	// restarted immediately without the usual restart timeout
   153  	// delay.
   154  	t0 := time.Now()
   155  	close(starter.stopWait)
   156  	starter.assertStarted(c, false) // stop notification
   157  	starter.assertStarted(c, true)  // start notification
   158  	restartDuration := time.Since(t0)
   159  	if restartDuration > 1*time.Second {
   160  		c.Fatalf("task did not restart immediately")
   161  	}
   162  	c.Assert(worker.Stop(runner), gc.IsNil)
   163  }
   164  
   165  func (*RunnerSuite) TestOneWorkerRestartDelay(c *gc.C) {
   166  	const delay = 100 * time.Millisecond
   167  	runner := worker.NewRunner(noneFatal, noImportance, delay)
   168  	starter := newTestWorkerStarter()
   169  	err := runner.StartWorker("id", testWorkerStart(starter))
   170  	c.Assert(err, jc.ErrorIsNil)
   171  	starter.assertStarted(c, true)
   172  	starter.die <- fmt.Errorf("non-fatal error")
   173  	starter.assertStarted(c, false)
   174  	t0 := time.Now()
   175  	starter.assertStarted(c, true)
   176  	restartDuration := time.Since(t0)
   177  	if restartDuration < delay {
   178  		c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, delay)
   179  	}
   180  	c.Assert(worker.Stop(runner), gc.IsNil)
   181  }
   182  
   183  type errorLevel int
   184  
   185  func (e errorLevel) Error() string {
   186  	return fmt.Sprintf("error with importance %d", e)
   187  }
   188  
   189  func (*RunnerSuite) TestErrorImportance(c *gc.C) {
   190  	moreImportant := func(err0, err1 error) bool {
   191  		return err0.(errorLevel) > err1.(errorLevel)
   192  	}
   193  	id := func(i int) string { return fmt.Sprint(i) }
   194  	runner := worker.NewRunner(allFatal, moreImportant, time.Millisecond)
   195  	for i := 0; i < 10; i++ {
   196  		starter := newTestWorkerStarter()
   197  		starter.stopErr = errorLevel(i)
   198  		err := runner.StartWorker(id(i), testWorkerStart(starter))
   199  		c.Assert(err, jc.ErrorIsNil)
   200  	}
   201  	err := runner.StopWorker(id(4))
   202  	c.Assert(err, jc.ErrorIsNil)
   203  	err = runner.Wait()
   204  	c.Assert(err, gc.Equals, errorLevel(9))
   205  }
   206  
   207  func (*RunnerSuite) TestStartWorkerWhenDead(c *gc.C) {
   208  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   209  	c.Assert(worker.Stop(runner), gc.IsNil)
   210  	c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead)
   211  }
   212  
   213  func (*RunnerSuite) TestStopWorkerWhenDead(c *gc.C) {
   214  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   215  	c.Assert(worker.Stop(runner), gc.IsNil)
   216  	c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead)
   217  }
   218  
   219  func (*RunnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) {
   220  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   221  	var starters []*testWorkerStarter
   222  	for i := 0; i < 10; i++ {
   223  		starter := newTestWorkerStarter()
   224  		err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter))
   225  		c.Assert(err, jc.ErrorIsNil)
   226  		starters = append(starters, starter)
   227  	}
   228  	for _, starter := range starters {
   229  		starter.assertStarted(c, true)
   230  	}
   231  	dieErr := errors.New("fatal error")
   232  	starters[4].die <- dieErr
   233  	err := runner.Wait()
   234  	c.Assert(err, gc.Equals, dieErr)
   235  	for _, starter := range starters {
   236  		starter.assertStarted(c, false)
   237  	}
   238  }
   239  
   240  func (*RunnerSuite) TestFatalErrorWhileStarting(c *gc.C) {
   241  	// Original deadlock problem that this tests for:
   242  	// A worker dies with fatal error while another worker
   243  	// is inside start(). runWorker can't send startInfo on startedc.
   244  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   245  
   246  	slowStarter := newTestWorkerStarter()
   247  	// make the startNotify channel synchronous so
   248  	// we can delay the start indefinitely.
   249  	slowStarter.startNotify = make(chan bool)
   250  
   251  	err := runner.StartWorker("slow starter", testWorkerStart(slowStarter))
   252  	c.Assert(err, jc.ErrorIsNil)
   253  
   254  	fatalStarter := newTestWorkerStarter()
   255  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   256  
   257  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   258  	c.Assert(err, jc.ErrorIsNil)
   259  
   260  	// Wait for the runner loop to react to the fatal
   261  	// error and go into final shutdown mode.
   262  	time.Sleep(10 * time.Millisecond)
   263  
   264  	// At this point, the loop is in shutdown mode, but the
   265  	// slowStarter's worker is still in its start function.
   266  	// When the start function continues (the first assertStarted
   267  	// allows that to happen) and returns the new Worker,
   268  	// runWorker will try to send it on runner.startedc.
   269  	// This test makes sure that succeeds ok.
   270  
   271  	slowStarter.assertStarted(c, true)
   272  	slowStarter.assertStarted(c, false)
   273  	err = runner.Wait()
   274  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   275  }
   276  
   277  func (*RunnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) {
   278  	// Original deadlock problem that this tests for:
   279  	// A worker tries to call StartWorker in its start function
   280  	// at the same time another worker dies with a fatal error.
   281  	// It might not be able to send on startc.
   282  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   283  
   284  	selfStarter := newTestWorkerStarter()
   285  	// make the startNotify channel synchronous so
   286  	// we can delay the start indefinitely.
   287  	selfStarter.startNotify = make(chan bool)
   288  	selfStarter.hook = func() {
   289  		runner.StartWorker("another", func() (worker.Worker, error) {
   290  			return nil, fmt.Errorf("no worker started")
   291  		})
   292  	}
   293  	err := runner.StartWorker("self starter", testWorkerStart(selfStarter))
   294  	c.Assert(err, jc.ErrorIsNil)
   295  
   296  	fatalStarter := newTestWorkerStarter()
   297  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   298  
   299  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   300  	c.Assert(err, jc.ErrorIsNil)
   301  
   302  	// Wait for the runner loop to react to the fatal
   303  	// error and go into final shutdown mode.
   304  	time.Sleep(10 * time.Millisecond)
   305  
   306  	// At this point, the loop is in shutdown mode, but the
   307  	// selfStarter's worker is still in its start function.
   308  	// When the start function continues (the first assertStarted
   309  	// allows that to happen) it will try to create a new
   310  	// worker. This failed in an earlier version of the code because the
   311  	// loop was not ready to receive start requests.
   312  
   313  	selfStarter.assertStarted(c, true)
   314  	selfStarter.assertStarted(c, false)
   315  	err = runner.Wait()
   316  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   317  }
   318  
   319  type testWorkerStarter struct {
   320  	startCount int32
   321  
   322  	// startNotify receives true when the worker starts
   323  	// and false when it exits. If startErr is non-nil,
   324  	// it sends false only.
   325  	startNotify chan bool
   326  
   327  	// If stopWait is non-nil, the worker will
   328  	// wait for a value to be sent on it before
   329  	// exiting.
   330  	stopWait chan struct{}
   331  
   332  	// Sending a value on die causes the worker
   333  	// to die with the given error.
   334  	die chan error
   335  
   336  	// If startErr is non-nil, the worker will die immediately
   337  	// with this error after starting.
   338  	startErr error
   339  
   340  	// If stopErr is non-nil, the worker will die with this
   341  	// error when asked to stop.
   342  	stopErr error
   343  
   344  	// The hook function is called after starting the worker.
   345  	hook func()
   346  }
   347  
   348  func newTestWorkerStarter() *testWorkerStarter {
   349  	return &testWorkerStarter{
   350  		die:         make(chan error, 1),
   351  		startNotify: make(chan bool, 100),
   352  		hook:        func() {},
   353  	}
   354  }
   355  
   356  func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) {
   357  	select {
   358  	case isStarted := <-starter.startNotify:
   359  		c.Assert(isStarted, gc.Equals, started)
   360  	case <-time.After(testing.LongWait):
   361  		c.Fatalf("timed out waiting for start notification")
   362  	}
   363  }
   364  
   365  func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) {
   366  	select {
   367  	case isStarted := <-starter.startNotify:
   368  		c.Fatalf("got unexpected start notification: %v", isStarted)
   369  	case <-time.After(worker.RestartDelay + testing.ShortWait):
   370  	}
   371  }
   372  
   373  func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) {
   374  	return func() (worker.Worker, error) {
   375  		return starter.start()
   376  	}
   377  }
   378  
   379  func (starter *testWorkerStarter) start() (worker.Worker, error) {
   380  	if count := atomic.AddInt32(&starter.startCount, 1); count != 1 {
   381  		panic(fmt.Errorf("unexpected start count %d; expected 1", count))
   382  	}
   383  	if starter.startErr != nil {
   384  		starter.startNotify <- false
   385  		return nil, starter.startErr
   386  	}
   387  	task := &testWorker{
   388  		starter: starter,
   389  	}
   390  	starter.startNotify <- true
   391  	go task.run()
   392  	return task, nil
   393  }
   394  
   395  type testWorker struct {
   396  	starter *testWorkerStarter
   397  	tomb    tomb.Tomb
   398  }
   399  
   400  func (t *testWorker) Kill() {
   401  	t.tomb.Kill(nil)
   402  }
   403  
   404  func (t *testWorker) Wait() error {
   405  	return t.tomb.Wait()
   406  }
   407  
   408  func (t *testWorker) run() {
   409  	defer t.tomb.Done()
   410  
   411  	t.starter.hook()
   412  	select {
   413  	case <-t.tomb.Dying():
   414  		t.tomb.Kill(t.starter.stopErr)
   415  	case err := <-t.starter.die:
   416  		t.tomb.Kill(err)
   417  	}
   418  	if t.starter.stopWait != nil {
   419  		<-t.starter.stopWait
   420  	}
   421  	t.starter.startNotify <- false
   422  	if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 {
   423  		panic(fmt.Errorf("unexpected start count %d; expected 0", count))
   424  	}
   425  }