github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/runner_test.go

github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/runner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package worker_test
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	jc "github.com/juju/testing/checkers"
    13  	gc "gopkg.in/check.v1"
    14  	"launchpad.net/tomb"
    15  
    16  	"github.com/juju/juju/testing"
    17  	"github.com/juju/juju/worker"
    18  )
    19  
    20  type runnerSuite struct {
    21  	testing.BaseSuite
    22  }
    23  
    24  var _ = gc.Suite(&runnerSuite{})
    25  
    26  func noneFatal(error) bool {
    27  	return false
    28  }
    29  
    30  func allFatal(error) bool {
    31  	return true
    32  }
    33  
    34  func noImportance(err0, err1 error) bool {
    35  	return false
    36  }
    37  
    38  func (s *runnerSuite) SetUpTest(c *gc.C) {
    39  	s.BaseSuite.SetUpTest(c)
    40  	s.PatchValue(&worker.RestartDelay, time.Duration(0))
    41  }
    42  
    43  func (*runnerSuite) TestOneWorkerStart(c *gc.C) {
    44  	runner := worker.NewRunner(noneFatal, noImportance)
    45  	starter := newTestWorkerStarter()
    46  	err := runner.StartWorker("id", testWorkerStart(starter))
    47  	c.Assert(err, jc.ErrorIsNil)
    48  	starter.assertStarted(c, true)
    49  
    50  	c.Assert(worker.Stop(runner), gc.IsNil)
    51  	starter.assertStarted(c, false)
    52  }
    53  
    54  func (*runnerSuite) TestOneWorkerFinish(c *gc.C) {
    55  	runner := worker.NewRunner(noneFatal, noImportance)
    56  	starter := newTestWorkerStarter()
    57  	err := runner.StartWorker("id", testWorkerStart(starter))
    58  	c.Assert(err, jc.ErrorIsNil)
    59  	starter.assertStarted(c, true)
    60  
    61  	starter.die <- nil
    62  	starter.assertStarted(c, false)
    63  	starter.assertNeverStarted(c)
    64  
    65  	c.Assert(worker.Stop(runner), gc.IsNil)
    66  }
    67  
    68  func (*runnerSuite) TestOneWorkerRestart(c *gc.C) {
    69  	runner := worker.NewRunner(noneFatal, noImportance)
    70  	starter := newTestWorkerStarter()
    71  	err := runner.StartWorker("id", testWorkerStart(starter))
    72  	c.Assert(err, jc.ErrorIsNil)
    73  	starter.assertStarted(c, true)
    74  
    75  	// Check it restarts a few times time.
    76  	for i := 0; i < 3; i++ {
    77  		starter.die <- fmt.Errorf("an error")
    78  		starter.assertStarted(c, false)
    79  		starter.assertStarted(c, true)
    80  	}
    81  
    82  	c.Assert(worker.Stop(runner), gc.IsNil)
    83  	starter.assertStarted(c, false)
    84  }
    85  
    86  func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) {
    87  	runner := worker.NewRunner(allFatal, noImportance)
    88  	starter := newTestWorkerStarter()
    89  	starter.startErr = errors.New("cannot start test task")
    90  	err := runner.StartWorker("id", testWorkerStart(starter))
    91  	c.Assert(err, jc.ErrorIsNil)
    92  	err = runner.Wait()
    93  	c.Assert(err, gc.Equals, starter.startErr)
    94  }
    95  
    96  func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) {
    97  	runner := worker.NewRunner(allFatal, noImportance)
    98  	starter := newTestWorkerStarter()
    99  	err := runner.StartWorker("id", testWorkerStart(starter))
   100  	c.Assert(err, jc.ErrorIsNil)
   101  	starter.assertStarted(c, true)
   102  	dieErr := errors.New("error when running")
   103  	starter.die <- dieErr
   104  	err = runner.Wait()
   105  	c.Assert(err, gc.Equals, dieErr)
   106  	starter.assertStarted(c, false)
   107  }
   108  
   109  func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) {
   110  	runner := worker.NewRunner(allFatal, noImportance)
   111  	starter := newTestWorkerStarter()
   112  	err := runner.StartWorker("id", testWorkerStart(starter))
   113  	c.Assert(err, jc.ErrorIsNil)
   114  	starter.assertStarted(c, true)
   115  	err = runner.StopWorker("id")
   116  	c.Assert(err, jc.ErrorIsNil)
   117  	starter.assertStarted(c, false)
   118  	c.Assert(worker.Stop(runner), gc.IsNil)
   119  }
   120  
   121  func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) {
   122  	runner := worker.NewRunner(allFatal, noImportance)
   123  	starter := newTestWorkerStarter()
   124  	starter.stopErr = errors.New("stop error")
   125  	err := runner.StartWorker("id", testWorkerStart(starter))
   126  	c.Assert(err, jc.ErrorIsNil)
   127  	starter.assertStarted(c, true)
   128  	err = runner.StopWorker("id")
   129  	c.Assert(err, jc.ErrorIsNil)
   130  	err = runner.Wait()
   131  	c.Assert(err, gc.Equals, starter.stopErr)
   132  }
   133  
   134  func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) {
   135  	worker.RestartDelay = 3 * time.Second
   136  	runner := worker.NewRunner(allFatal, noImportance)
   137  	starter := newTestWorkerStarter()
   138  	starter.stopWait = make(chan struct{})
   139  
   140  	err := runner.StartWorker("id", testWorkerStart(starter))
   141  	c.Assert(err, jc.ErrorIsNil)
   142  	starter.assertStarted(c, true)
   143  	err = runner.StopWorker("id")
   144  	c.Assert(err, jc.ErrorIsNil)
   145  	err = runner.StartWorker("id", testWorkerStart(starter))
   146  	c.Assert(err, jc.ErrorIsNil)
   147  
   148  	close(starter.stopWait)
   149  	starter.assertStarted(c, false)
   150  	// Check that the task is restarted immediately without
   151  	// the usual restart timeout delay.
   152  	t0 := time.Now()
   153  	starter.assertStarted(c, true)
   154  	restartDuration := time.Since(t0)
   155  	if restartDuration > 1*time.Second {
   156  		c.Fatalf("task did not restart immediately")
   157  	}
   158  	c.Assert(worker.Stop(runner), gc.IsNil)
   159  }
   160  
   161  func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) {
   162  	worker.RestartDelay = 100 * time.Millisecond
   163  	runner := worker.NewRunner(noneFatal, noImportance)
   164  	starter := newTestWorkerStarter()
   165  	err := runner.StartWorker("id", testWorkerStart(starter))
   166  	c.Assert(err, jc.ErrorIsNil)
   167  	starter.assertStarted(c, true)
   168  	starter.die <- fmt.Errorf("non-fatal error")
   169  	starter.assertStarted(c, false)
   170  	t0 := time.Now()
   171  	starter.assertStarted(c, true)
   172  	restartDuration := time.Since(t0)
   173  	if restartDuration < worker.RestartDelay {
   174  		c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, worker.RestartDelay)
   175  	}
   176  	c.Assert(worker.Stop(runner), gc.IsNil)
   177  }
   178  
   179  type errorLevel int
   180  
   181  func (e errorLevel) Error() string {
   182  	return fmt.Sprintf("error with importance %d", e)
   183  }
   184  
   185  func (*runnerSuite) TestErrorImportance(c *gc.C) {
   186  	moreImportant := func(err0, err1 error) bool {
   187  		return err0.(errorLevel) > err1.(errorLevel)
   188  	}
   189  	id := func(i int) string { return fmt.Sprint(i) }
   190  	runner := worker.NewRunner(allFatal, moreImportant)
   191  	for i := 0; i < 10; i++ {
   192  		starter := newTestWorkerStarter()
   193  		starter.stopErr = errorLevel(i)
   194  		err := runner.StartWorker(id(i), testWorkerStart(starter))
   195  		c.Assert(err, jc.ErrorIsNil)
   196  	}
   197  	err := runner.StopWorker(id(4))
   198  	c.Assert(err, jc.ErrorIsNil)
   199  	err = runner.Wait()
   200  	c.Assert(err, gc.Equals, errorLevel(9))
   201  }
   202  
   203  func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) {
   204  	runner := worker.NewRunner(allFatal, noImportance)
   205  	c.Assert(worker.Stop(runner), gc.IsNil)
   206  	c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead)
   207  }
   208  
   209  func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) {
   210  	runner := worker.NewRunner(allFatal, noImportance)
   211  	c.Assert(worker.Stop(runner), gc.IsNil)
   212  	c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead)
   213  }
   214  
   215  func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) {
   216  	runner := worker.NewRunner(allFatal, noImportance)
   217  	var starters []*testWorkerStarter
   218  	for i := 0; i < 10; i++ {
   219  		starter := newTestWorkerStarter()
   220  		err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter))
   221  		c.Assert(err, jc.ErrorIsNil)
   222  		starters = append(starters, starter)
   223  	}
   224  	for _, starter := range starters {
   225  		starter.assertStarted(c, true)
   226  	}
   227  	dieErr := errors.New("fatal error")
   228  	starters[4].die <- dieErr
   229  	err := runner.Wait()
   230  	c.Assert(err, gc.Equals, dieErr)
   231  	for _, starter := range starters {
   232  		starter.assertStarted(c, false)
   233  	}
   234  }
   235  
   236  func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) {
   237  	// Original deadlock problem that this tests for:
   238  	// A worker dies with fatal error while another worker
   239  	// is inside start(). runWorker can't send startInfo on startedc.
   240  	runner := worker.NewRunner(allFatal, noImportance)
   241  
   242  	slowStarter := newTestWorkerStarter()
   243  	// make the startNotify channel synchronous so
   244  	// we can delay the start indefinitely.
   245  	slowStarter.startNotify = make(chan bool)
   246  
   247  	err := runner.StartWorker("slow starter", testWorkerStart(slowStarter))
   248  	c.Assert(err, jc.ErrorIsNil)
   249  
   250  	fatalStarter := newTestWorkerStarter()
   251  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   252  
   253  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   254  	c.Assert(err, jc.ErrorIsNil)
   255  
   256  	// Wait for the runner loop to react to the fatal
   257  	// error and go into final shutdown mode.
   258  	time.Sleep(10 * time.Millisecond)
   259  
   260  	// At this point, the loop is in shutdown mode, but the
   261  	// slowStarter's worker is still in its start function.
   262  	// When the start function continues (the first assertStarted
   263  	// allows that to happen) and returns the new Worker,
   264  	// runWorker will try to send it on runner.startedc.
   265  	// This test makes sure that succeeds ok.
   266  
   267  	slowStarter.assertStarted(c, true)
   268  	slowStarter.assertStarted(c, false)
   269  	err = runner.Wait()
   270  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   271  }
   272  
   273  func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) {
   274  	// Original deadlock problem that this tests for:
   275  	// A worker tries to call StartWorker in its start function
   276  	// at the same time another worker dies with a fatal error.
   277  	// It might not be able to send on startc.
   278  	runner := worker.NewRunner(allFatal, noImportance)
   279  
   280  	selfStarter := newTestWorkerStarter()
   281  	// make the startNotify channel synchronous so
   282  	// we can delay the start indefinitely.
   283  	selfStarter.startNotify = make(chan bool)
   284  	selfStarter.hook = func() {
   285  		runner.StartWorker("another", func() (worker.Worker, error) {
   286  			return nil, fmt.Errorf("no worker started")
   287  		})
   288  	}
   289  	err := runner.StartWorker("self starter", testWorkerStart(selfStarter))
   290  	c.Assert(err, jc.ErrorIsNil)
   291  
   292  	fatalStarter := newTestWorkerStarter()
   293  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   294  
   295  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   296  	c.Assert(err, jc.ErrorIsNil)
   297  
   298  	// Wait for the runner loop to react to the fatal
   299  	// error and go into final shutdown mode.
   300  	time.Sleep(10 * time.Millisecond)
   301  
   302  	// At this point, the loop is in shutdown mode, but the
   303  	// selfStarter's worker is still in its start function.
   304  	// When the start function continues (the first assertStarted
   305  	// allows that to happen) it will try to create a new
   306  	// worker. This failed in an earlier version of the code because the
   307  	// loop was not ready to receive start requests.
   308  
   309  	selfStarter.assertStarted(c, true)
   310  	selfStarter.assertStarted(c, false)
   311  	err = runner.Wait()
   312  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   313  }
   314  
   315  type testWorkerStarter struct {
   316  	startCount int32
   317  
   318  	// startNotify receives true when the worker starts
   319  	// and false when it exits. If startErr is non-nil,
   320  	// it sends false only.
   321  	startNotify chan bool
   322  
   323  	// If stopWait is non-nil, the worker will
   324  	// wait for a value to be sent on it before
   325  	// exiting.
   326  	stopWait chan struct{}
   327  
   328  	// Sending a value on die causes the worker
   329  	// to die with the given error.
   330  	die chan error
   331  
   332  	// If startErr is non-nil, the worker will die immediately
   333  	// with this error after starting.
   334  	startErr error
   335  
   336  	// If stopErr is non-nil, the worker will die with this
   337  	// error when asked to stop.
   338  	stopErr error
   339  
   340  	// The hook function is called after starting the worker.
   341  	hook func()
   342  }
   343  
   344  func newTestWorkerStarter() *testWorkerStarter {
   345  	return &testWorkerStarter{
   346  		die:         make(chan error, 1),
   347  		startNotify: make(chan bool, 100),
   348  		hook:        func() {},
   349  	}
   350  }
   351  
   352  func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) {
   353  	select {
   354  	case isStarted := <-starter.startNotify:
   355  		c.Assert(isStarted, gc.Equals, started)
   356  	case <-time.After(1 * time.Second):
   357  		c.Fatalf("timed out waiting for start notification")
   358  	}
   359  }
   360  
   361  func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) {
   362  	select {
   363  	case isStarted := <-starter.startNotify:
   364  		c.Fatalf("got unexpected start notification: %v", isStarted)
   365  	case <-time.After(worker.RestartDelay + testing.ShortWait):
   366  	}
   367  }
   368  
   369  func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) {
   370  	return func() (worker.Worker, error) {
   371  		return starter.start()
   372  	}
   373  }
   374  
   375  func (starter *testWorkerStarter) start() (worker.Worker, error) {
   376  	if count := atomic.AddInt32(&starter.startCount, 1); count != 1 {
   377  		panic(fmt.Errorf("unexpected start count %d; expected 1", count))
   378  	}
   379  	if starter.startErr != nil {
   380  		starter.startNotify <- false
   381  		return nil, starter.startErr
   382  	}
   383  	task := &testWorker{
   384  		starter: starter,
   385  	}
   386  	starter.startNotify <- true
   387  	go task.run()
   388  	return task, nil
   389  }
   390  
   391  type testWorker struct {
   392  	starter *testWorkerStarter
   393  	tomb    tomb.Tomb
   394  }
   395  
   396  func (t *testWorker) Kill() {
   397  	t.tomb.Kill(nil)
   398  }
   399  
   400  func (t *testWorker) Wait() error {
   401  	return t.tomb.Wait()
   402  }
   403  
   404  func (t *testWorker) run() {
   405  	defer t.tomb.Done()
   406  
   407  	t.starter.hook()
   408  	select {
   409  	case <-t.tomb.Dying():
   410  		t.tomb.Kill(t.starter.stopErr)
   411  	case err := <-t.starter.die:
   412  		t.tomb.Kill(err)
   413  	}
   414  	if t.starter.stopWait != nil {
   415  		<-t.starter.stopWait
   416  	}
   417  	t.starter.startNotify <- false
   418  	if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 {
   419  		panic(fmt.Errorf("unexpected start count %d; expected 0", count))
   420  	}
   421  }