github.com/mwhudson/juju@v0.0.0-20160512215208-90ff01f3497f/worker/runner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package worker_test
     5  
     6  import (
     7  	"fmt"
     8  	"sort"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	"github.com/juju/errors"
    13  	gitjujutesting "github.com/juju/testing"
    14  	jc "github.com/juju/testing/checkers"
    15  	gc "gopkg.in/check.v1"
    16  	"launchpad.net/tomb"
    17  
    18  	"github.com/juju/juju/testing"
    19  	"github.com/juju/juju/worker"
    20  	workertesting "github.com/juju/juju/worker/testing"
    21  )
    22  
    23  var (
    24  	_ = gc.Suite(&runnerSuite{})
    25  	_ = gc.Suite(&workersSuite{})
    26  )
    27  
    28  type runnerSuite struct {
    29  	testing.BaseSuite
    30  }
    31  
    32  func noneFatal(error) bool {
    33  	return false
    34  }
    35  
    36  func allFatal(error) bool {
    37  	return true
    38  }
    39  
    40  func noImportance(err0, err1 error) bool {
    41  	return false
    42  }
    43  
    44  func (*runnerSuite) TestOneWorkerStart(c *gc.C) {
    45  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    46  	starter := newTestWorkerStarter()
    47  	err := runner.StartWorker("id", testWorkerStart(starter))
    48  	c.Assert(err, jc.ErrorIsNil)
    49  	starter.assertStarted(c, true)
    50  
    51  	c.Assert(worker.Stop(runner), gc.IsNil)
    52  	starter.assertStarted(c, false)
    53  }
    54  
    55  func (*runnerSuite) TestOneWorkerFinish(c *gc.C) {
    56  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    57  	starter := newTestWorkerStarter()
    58  	err := runner.StartWorker("id", testWorkerStart(starter))
    59  	c.Assert(err, jc.ErrorIsNil)
    60  	starter.assertStarted(c, true)
    61  
    62  	starter.die <- nil
    63  	starter.assertStarted(c, false)
    64  	starter.assertNeverStarted(c)
    65  
    66  	c.Assert(worker.Stop(runner), gc.IsNil)
    67  }
    68  
    69  func (*runnerSuite) TestOneWorkerRestart(c *gc.C) {
    70  	runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond)
    71  	starter := newTestWorkerStarter()
    72  	err := runner.StartWorker("id", testWorkerStart(starter))
    73  	c.Assert(err, jc.ErrorIsNil)
    74  	starter.assertStarted(c, true)
    75  
    76  	// Check it restarts a few times time.
    77  	for i := 0; i < 3; i++ {
    78  		starter.die <- fmt.Errorf("an error")
    79  		starter.assertStarted(c, false)
    80  		starter.assertStarted(c, true)
    81  	}
    82  
    83  	c.Assert(worker.Stop(runner), gc.IsNil)
    84  	starter.assertStarted(c, false)
    85  }
    86  
    87  func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) {
    88  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
    89  	starter := newTestWorkerStarter()
    90  	starter.startErr = errors.New("cannot start test task")
    91  	err := runner.StartWorker("id", testWorkerStart(starter))
    92  	c.Assert(err, jc.ErrorIsNil)
    93  	err = runner.Wait()
    94  	c.Assert(err, gc.Equals, starter.startErr)
    95  }
    96  
    97  func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) {
    98  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
    99  	starter := newTestWorkerStarter()
   100  	err := runner.StartWorker("id", testWorkerStart(starter))
   101  	c.Assert(err, jc.ErrorIsNil)
   102  	starter.assertStarted(c, true)
   103  	dieErr := errors.New("error when running")
   104  	starter.die <- dieErr
   105  	err = runner.Wait()
   106  	c.Assert(err, gc.Equals, dieErr)
   107  	starter.assertStarted(c, false)
   108  }
   109  
   110  func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) {
   111  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   112  	starter := newTestWorkerStarter()
   113  	err := runner.StartWorker("id", testWorkerStart(starter))
   114  	c.Assert(err, jc.ErrorIsNil)
   115  	starter.assertStarted(c, true)
   116  	err = runner.StopWorker("id")
   117  	c.Assert(err, jc.ErrorIsNil)
   118  	starter.assertStarted(c, false)
   119  	c.Assert(worker.Stop(runner), gc.IsNil)
   120  }
   121  
   122  func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) {
   123  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   124  	starter := newTestWorkerStarter()
   125  	starter.stopErr = errors.New("stop error")
   126  	err := runner.StartWorker("id", testWorkerStart(starter))
   127  	c.Assert(err, jc.ErrorIsNil)
   128  	starter.assertStarted(c, true)
   129  	err = runner.StopWorker("id")
   130  	c.Assert(err, jc.ErrorIsNil)
   131  	err = runner.Wait()
   132  	c.Assert(err, gc.Equals, starter.stopErr)
   133  }
   134  
   135  func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) {
   136  	runner := worker.NewRunner(allFatal, noImportance, 3*time.Second)
   137  	starter := newTestWorkerStarter()
   138  	starter.stopWait = make(chan struct{})
   139  
   140  	err := runner.StartWorker("id", testWorkerStart(starter))
   141  	c.Assert(err, jc.ErrorIsNil)
   142  	starter.assertStarted(c, true)
   143  	err = runner.StopWorker("id")
   144  	c.Assert(err, jc.ErrorIsNil)
   145  	err = runner.StartWorker("id", testWorkerStart(starter))
   146  	c.Assert(err, jc.ErrorIsNil)
   147  
   148  	close(starter.stopWait)
   149  	starter.assertStarted(c, false)
   150  	// Check that the task is restarted immediately without
   151  	// the usual restart timeout delay.
   152  	t0 := time.Now()
   153  	starter.assertStarted(c, true)
   154  	restartDuration := time.Since(t0)
   155  	if restartDuration > 1*time.Second {
   156  		c.Fatalf("task did not restart immediately")
   157  	}
   158  	c.Assert(worker.Stop(runner), gc.IsNil)
   159  }
   160  
   161  func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) {
   162  	const delay = 100 * time.Millisecond
   163  	runner := worker.NewRunner(noneFatal, noImportance, delay)
   164  	starter := newTestWorkerStarter()
   165  	err := runner.StartWorker("id", testWorkerStart(starter))
   166  	c.Assert(err, jc.ErrorIsNil)
   167  	starter.assertStarted(c, true)
   168  	starter.die <- fmt.Errorf("non-fatal error")
   169  	starter.assertStarted(c, false)
   170  	t0 := time.Now()
   171  	starter.assertStarted(c, true)
   172  	restartDuration := time.Since(t0)
   173  	if restartDuration < delay {
   174  		c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, delay)
   175  	}
   176  	c.Assert(worker.Stop(runner), gc.IsNil)
   177  }
   178  
   179  type errorLevel int
   180  
   181  func (e errorLevel) Error() string {
   182  	return fmt.Sprintf("error with importance %d", e)
   183  }
   184  
   185  func (*runnerSuite) TestErrorImportance(c *gc.C) {
   186  	moreImportant := func(err0, err1 error) bool {
   187  		return err0.(errorLevel) > err1.(errorLevel)
   188  	}
   189  	id := func(i int) string { return fmt.Sprint(i) }
   190  	runner := worker.NewRunner(allFatal, moreImportant, time.Millisecond)
   191  	for i := 0; i < 10; i++ {
   192  		starter := newTestWorkerStarter()
   193  		starter.stopErr = errorLevel(i)
   194  		err := runner.StartWorker(id(i), testWorkerStart(starter))
   195  		c.Assert(err, jc.ErrorIsNil)
   196  	}
   197  	err := runner.StopWorker(id(4))
   198  	c.Assert(err, jc.ErrorIsNil)
   199  	err = runner.Wait()
   200  	c.Assert(err, gc.Equals, errorLevel(9))
   201  }
   202  
   203  func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) {
   204  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   205  	c.Assert(worker.Stop(runner), gc.IsNil)
   206  	c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead)
   207  }
   208  
   209  func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) {
   210  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   211  	c.Assert(worker.Stop(runner), gc.IsNil)
   212  	c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead)
   213  }
   214  
   215  func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) {
   216  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   217  	var starters []*testWorkerStarter
   218  	for i := 0; i < 10; i++ {
   219  		starter := newTestWorkerStarter()
   220  		err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter))
   221  		c.Assert(err, jc.ErrorIsNil)
   222  		starters = append(starters, starter)
   223  	}
   224  	for _, starter := range starters {
   225  		starter.assertStarted(c, true)
   226  	}
   227  	dieErr := errors.New("fatal error")
   228  	starters[4].die <- dieErr
   229  	err := runner.Wait()
   230  	c.Assert(err, gc.Equals, dieErr)
   231  	for _, starter := range starters {
   232  		starter.assertStarted(c, false)
   233  	}
   234  }
   235  
   236  func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) {
   237  	// Original deadlock problem that this tests for:
   238  	// A worker dies with fatal error while another worker
   239  	// is inside start(). runWorker can't send startInfo on startedc.
   240  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   241  
   242  	slowStarter := newTestWorkerStarter()
   243  	// make the startNotify channel synchronous so
   244  	// we can delay the start indefinitely.
   245  	slowStarter.startNotify = make(chan bool)
   246  
   247  	err := runner.StartWorker("slow starter", testWorkerStart(slowStarter))
   248  	c.Assert(err, jc.ErrorIsNil)
   249  
   250  	fatalStarter := newTestWorkerStarter()
   251  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   252  
   253  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   254  	c.Assert(err, jc.ErrorIsNil)
   255  
   256  	// Wait for the runner loop to react to the fatal
   257  	// error and go into final shutdown mode.
   258  	time.Sleep(10 * time.Millisecond)
   259  
   260  	// At this point, the loop is in shutdown mode, but the
   261  	// slowStarter's worker is still in its start function.
   262  	// When the start function continues (the first assertStarted
   263  	// allows that to happen) and returns the new Worker,
   264  	// runWorker will try to send it on runner.startedc.
   265  	// This test makes sure that succeeds ok.
   266  
   267  	slowStarter.assertStarted(c, true)
   268  	slowStarter.assertStarted(c, false)
   269  	err = runner.Wait()
   270  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   271  }
   272  
   273  func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) {
   274  	// Original deadlock problem that this tests for:
   275  	// A worker tries to call StartWorker in its start function
   276  	// at the same time another worker dies with a fatal error.
   277  	// It might not be able to send on startc.
   278  	runner := worker.NewRunner(allFatal, noImportance, time.Millisecond)
   279  
   280  	selfStarter := newTestWorkerStarter()
   281  	// make the startNotify channel synchronous so
   282  	// we can delay the start indefinitely.
   283  	selfStarter.startNotify = make(chan bool)
   284  	selfStarter.hook = func() {
   285  		runner.StartWorker("another", func() (worker.Worker, error) {
   286  			return nil, fmt.Errorf("no worker started")
   287  		})
   288  	}
   289  	err := runner.StartWorker("self starter", testWorkerStart(selfStarter))
   290  	c.Assert(err, jc.ErrorIsNil)
   291  
   292  	fatalStarter := newTestWorkerStarter()
   293  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   294  
   295  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   296  	c.Assert(err, jc.ErrorIsNil)
   297  
   298  	// Wait for the runner loop to react to the fatal
   299  	// error and go into final shutdown mode.
   300  	time.Sleep(10 * time.Millisecond)
   301  
   302  	// At this point, the loop is in shutdown mode, but the
   303  	// selfStarter's worker is still in its start function.
   304  	// When the start function continues (the first assertStarted
   305  	// allows that to happen) it will try to create a new
   306  	// worker. This failed in an earlier version of the code because the
   307  	// loop was not ready to receive start requests.
   308  
   309  	selfStarter.assertStarted(c, true)
   310  	selfStarter.assertStarted(c, false)
   311  	err = runner.Wait()
   312  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   313  }
   314  
   315  type testWorkerStarter struct {
   316  	startCount int32
   317  
   318  	// startNotify receives true when the worker starts
   319  	// and false when it exits. If startErr is non-nil,
   320  	// it sends false only.
   321  	startNotify chan bool
   322  
   323  	// If stopWait is non-nil, the worker will
   324  	// wait for a value to be sent on it before
   325  	// exiting.
   326  	stopWait chan struct{}
   327  
   328  	// Sending a value on die causes the worker
   329  	// to die with the given error.
   330  	die chan error
   331  
   332  	// If startErr is non-nil, the worker will die immediately
   333  	// with this error after starting.
   334  	startErr error
   335  
   336  	// If stopErr is non-nil, the worker will die with this
   337  	// error when asked to stop.
   338  	stopErr error
   339  
   340  	// The hook function is called after starting the worker.
   341  	hook func()
   342  }
   343  
   344  func newTestWorkerStarter() *testWorkerStarter {
   345  	return &testWorkerStarter{
   346  		die:         make(chan error, 1),
   347  		startNotify: make(chan bool, 100),
   348  		hook:        func() {},
   349  	}
   350  }
   351  
   352  func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) {
   353  	select {
   354  	case isStarted := <-starter.startNotify:
   355  		c.Assert(isStarted, gc.Equals, started)
   356  	case <-time.After(1 * time.Second):
   357  		c.Fatalf("timed out waiting for start notification")
   358  	}
   359  }
   360  
   361  func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) {
   362  	select {
   363  	case isStarted := <-starter.startNotify:
   364  		c.Fatalf("got unexpected start notification: %v", isStarted)
   365  	case <-time.After(worker.RestartDelay + testing.ShortWait):
   366  	}
   367  }
   368  
   369  func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) {
   370  	return func() (worker.Worker, error) {
   371  		return starter.start()
   372  	}
   373  }
   374  
   375  func (starter *testWorkerStarter) start() (worker.Worker, error) {
   376  	if count := atomic.AddInt32(&starter.startCount, 1); count != 1 {
   377  		panic(fmt.Errorf("unexpected start count %d; expected 1", count))
   378  	}
   379  	if starter.startErr != nil {
   380  		starter.startNotify <- false
   381  		return nil, starter.startErr
   382  	}
   383  	task := &testWorker{
   384  		starter: starter,
   385  	}
   386  	starter.startNotify <- true
   387  	go task.run()
   388  	return task, nil
   389  }
   390  
   391  type testWorker struct {
   392  	starter *testWorkerStarter
   393  	tomb    tomb.Tomb
   394  }
   395  
   396  func (t *testWorker) Kill() {
   397  	t.tomb.Kill(nil)
   398  }
   399  
   400  func (t *testWorker) Wait() error {
   401  	return t.tomb.Wait()
   402  }
   403  
   404  func (t *testWorker) run() {
   405  	defer t.tomb.Done()
   406  
   407  	t.starter.hook()
   408  	select {
   409  	case <-t.tomb.Dying():
   410  		t.tomb.Kill(t.starter.stopErr)
   411  	case err := <-t.starter.die:
   412  		t.tomb.Kill(err)
   413  	}
   414  	if t.starter.stopWait != nil {
   415  		<-t.starter.stopWait
   416  	}
   417  	t.starter.startNotify <- false
   418  	if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 {
   419  		panic(fmt.Errorf("unexpected start count %d; expected 0", count))
   420  	}
   421  }
   422  
   423  type workersSuite struct {
   424  	testing.BaseSuite
   425  
   426  	calls []string
   427  	stub  *gitjujutesting.Stub
   428  }
   429  
   430  func (s *workersSuite) SetUpTest(c *gc.C) {
   431  	s.BaseSuite.SetUpTest(c)
   432  
   433  	s.stub = &gitjujutesting.Stub{}
   434  	s.calls = nil
   435  }
   436  
   437  func (s *workersSuite) newWorkerFunc(id string) func() (worker.Worker, error) {
   438  	return func() (worker.Worker, error) {
   439  		s.calls = append(s.calls, id)
   440  		return nil, nil
   441  	}
   442  }
   443  
   444  func (*workersSuite) TestIDsOkay(c *gc.C) {
   445  	newWorker := func() (worker.Worker, error) { return nil, nil }
   446  
   447  	workers := worker.NewWorkers()
   448  	err := workers.Add("spam", newWorker)
   449  	c.Assert(err, jc.ErrorIsNil)
   450  	err = workers.Add("eggs", newWorker)
   451  	c.Assert(err, jc.ErrorIsNil)
   452  	ids := workers.IDs()
   453  
   454  	c.Check(ids, jc.DeepEquals, []string{"spam", "eggs"})
   455  }
   456  
   457  func (*workersSuite) TestIDsEmpty(c *gc.C) {
   458  	workers := worker.NewWorkers()
   459  	ids := workers.IDs()
   460  
   461  	c.Check(ids, gc.HasLen, 0)
   462  }
   463  
   464  func (*workersSuite) TestAddAlreadyRegistered(c *gc.C) {
   465  	newWorker := func() (worker.Worker, error) { return nil, nil }
   466  
   467  	workers := worker.NewWorkers()
   468  	err := workers.Add("spam", newWorker)
   469  	c.Assert(err, jc.ErrorIsNil)
   470  	err = workers.Add("spam", newWorker)
   471  
   472  	c.Check(err, gc.ErrorMatches, `.*already registered.*`)
   473  }
   474  
   475  func (s *workersSuite) TestStartOkay(c *gc.C) {
   476  	runner := workertesting.NewStubRunner(s.stub)
   477  	runner.CallWhenStarted = true
   478  
   479  	workers := worker.NewWorkers()
   480  	expected := []string{"spam", "eggs", "ham"}
   481  	for _, id := range expected {
   482  		err := workers.Add(id, s.newWorkerFunc(id))
   483  		c.Assert(err, jc.ErrorIsNil)
   484  	}
   485  	err := workers.Start(runner)
   486  	c.Assert(err, jc.ErrorIsNil)
   487  
   488  	// We would use s.stub.CheckCalls if functions could be compared...
   489  	runner.CheckCallIDs(c, "StartWorker", expected...)
   490  	sort.Strings(s.calls)
   491  	sort.Strings(expected)
   492  	c.Check(s.calls, jc.DeepEquals, expected)
   493  }
   494  
   495  func (s *workersSuite) TestStartError(c *gc.C) {
   496  	runner := workertesting.NewStubRunner(s.stub)
   497  	failure := errors.Errorf("<failed>")
   498  	s.stub.SetErrors(nil, failure)
   499  
   500  	workers := worker.NewWorkers()
   501  	expected := []string{"spam", "eggs", "ham"}
   502  	for _, id := range expected {
   503  		err := workers.Add(id, s.newWorkerFunc(id))
   504  		c.Assert(err, jc.ErrorIsNil)
   505  	}
   506  	err := workers.Start(runner)
   507  
   508  	s.stub.CheckCallNames(c, "StartWorker", "StartWorker")
   509  	c.Check(errors.Cause(err), gc.Equals, failure)
   510  }