github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/runner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package worker_test
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	jc "github.com/juju/testing/checkers"
    13  	gc "gopkg.in/check.v1"
    14  	"launchpad.net/tomb"
    15  
    16  	"github.com/juju/juju/testing"
    17  	"github.com/juju/juju/worker"
    18  )
    19  
    20  type runnerSuite struct {
    21  	testing.BaseSuite
    22  }
    23  
    24  var _ = gc.Suite(&runnerSuite{})
    25  
    26  func noneFatal(error) bool {
    27  	return false
    28  }
    29  
    30  func allFatal(error) bool {
    31  	return true
    32  }
    33  
    34  func noImportance(err0, err1 error) bool {
    35  	return false
    36  }
    37  
    38  func (s *runnerSuite) SetUpTest(c *gc.C) {
    39  	s.BaseSuite.SetUpTest(c)
    40  	// Avoid patching RestartDealy to zero, as it changes worker behaviour.
    41  	s.PatchValue(&worker.RestartDelay, time.Duration(time.Millisecond))
    42  }
    43  
    44  func (*runnerSuite) TestOneWorkerStart(c *gc.C) {
    45  	runner := worker.NewRunner(noneFatal, noImportance)
    46  	starter := newTestWorkerStarter()
    47  	err := runner.StartWorker("id", testWorkerStart(starter))
    48  	c.Assert(err, jc.ErrorIsNil)
    49  	starter.assertStarted(c, true)
    50  
    51  	c.Assert(worker.Stop(runner), gc.IsNil)
    52  	starter.assertStarted(c, false)
    53  }
    54  
    55  func (*runnerSuite) TestOneWorkerFinish(c *gc.C) {
    56  	runner := worker.NewRunner(noneFatal, noImportance)
    57  	starter := newTestWorkerStarter()
    58  	err := runner.StartWorker("id", testWorkerStart(starter))
    59  	c.Assert(err, jc.ErrorIsNil)
    60  	starter.assertStarted(c, true)
    61  
    62  	starter.die <- nil
    63  	starter.assertStarted(c, false)
    64  	starter.assertNeverStarted(c)
    65  
    66  	c.Assert(worker.Stop(runner), gc.IsNil)
    67  }
    68  
    69  func (*runnerSuite) TestOneWorkerRestart(c *gc.C) {
    70  	runner := worker.NewRunner(noneFatal, noImportance)
    71  	starter := newTestWorkerStarter()
    72  	err := runner.StartWorker("id", testWorkerStart(starter))
    73  	c.Assert(err, jc.ErrorIsNil)
    74  	starter.assertStarted(c, true)
    75  
    76  	// Check it restarts a few times time.
    77  	for i := 0; i < 3; i++ {
    78  		starter.die <- fmt.Errorf("an error")
    79  		starter.assertStarted(c, false)
    80  		starter.assertStarted(c, true)
    81  	}
    82  
    83  	c.Assert(worker.Stop(runner), gc.IsNil)
    84  	starter.assertStarted(c, false)
    85  }
    86  
    87  func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) {
    88  	runner := worker.NewRunner(allFatal, noImportance)
    89  	starter := newTestWorkerStarter()
    90  	starter.startErr = errors.New("cannot start test task")
    91  	err := runner.StartWorker("id", testWorkerStart(starter))
    92  	c.Assert(err, jc.ErrorIsNil)
    93  	err = runner.Wait()
    94  	c.Assert(err, gc.Equals, starter.startErr)
    95  }
    96  
    97  func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) {
    98  	runner := worker.NewRunner(allFatal, noImportance)
    99  	starter := newTestWorkerStarter()
   100  	err := runner.StartWorker("id", testWorkerStart(starter))
   101  	c.Assert(err, jc.ErrorIsNil)
   102  	starter.assertStarted(c, true)
   103  	dieErr := errors.New("error when running")
   104  	starter.die <- dieErr
   105  	err = runner.Wait()
   106  	c.Assert(err, gc.Equals, dieErr)
   107  	starter.assertStarted(c, false)
   108  }
   109  
   110  func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) {
   111  	runner := worker.NewRunner(allFatal, noImportance)
   112  	starter := newTestWorkerStarter()
   113  	err := runner.StartWorker("id", testWorkerStart(starter))
   114  	c.Assert(err, jc.ErrorIsNil)
   115  	starter.assertStarted(c, true)
   116  	err = runner.StopWorker("id")
   117  	c.Assert(err, jc.ErrorIsNil)
   118  	starter.assertStarted(c, false)
   119  	c.Assert(worker.Stop(runner), gc.IsNil)
   120  }
   121  
   122  func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) {
   123  	runner := worker.NewRunner(allFatal, noImportance)
   124  	starter := newTestWorkerStarter()
   125  	starter.stopErr = errors.New("stop error")
   126  	err := runner.StartWorker("id", testWorkerStart(starter))
   127  	c.Assert(err, jc.ErrorIsNil)
   128  	starter.assertStarted(c, true)
   129  	err = runner.StopWorker("id")
   130  	c.Assert(err, jc.ErrorIsNil)
   131  	err = runner.Wait()
   132  	c.Assert(err, gc.Equals, starter.stopErr)
   133  }
   134  
   135  func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) {
   136  	worker.RestartDelay = 3 * time.Second
   137  	runner := worker.NewRunner(allFatal, noImportance)
   138  	starter := newTestWorkerStarter()
   139  	starter.stopWait = make(chan struct{})
   140  
   141  	err := runner.StartWorker("id", testWorkerStart(starter))
   142  	c.Assert(err, jc.ErrorIsNil)
   143  	starter.assertStarted(c, true)
   144  	err = runner.StopWorker("id")
   145  	c.Assert(err, jc.ErrorIsNil)
   146  	err = runner.StartWorker("id", testWorkerStart(starter))
   147  	c.Assert(err, jc.ErrorIsNil)
   148  
   149  	close(starter.stopWait)
   150  	starter.assertStarted(c, false)
   151  	// Check that the task is restarted immediately without
   152  	// the usual restart timeout delay.
   153  	t0 := time.Now()
   154  	starter.assertStarted(c, true)
   155  	restartDuration := time.Since(t0)
   156  	if restartDuration > 1*time.Second {
   157  		c.Fatalf("task did not restart immediately")
   158  	}
   159  	c.Assert(worker.Stop(runner), gc.IsNil)
   160  }
   161  
   162  func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) {
   163  	worker.RestartDelay = 100 * time.Millisecond
   164  	runner := worker.NewRunner(noneFatal, noImportance)
   165  	starter := newTestWorkerStarter()
   166  	err := runner.StartWorker("id", testWorkerStart(starter))
   167  	c.Assert(err, jc.ErrorIsNil)
   168  	starter.assertStarted(c, true)
   169  	starter.die <- fmt.Errorf("non-fatal error")
   170  	starter.assertStarted(c, false)
   171  	t0 := time.Now()
   172  	starter.assertStarted(c, true)
   173  	restartDuration := time.Since(t0)
   174  	if restartDuration < worker.RestartDelay {
   175  		c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, worker.RestartDelay)
   176  	}
   177  	c.Assert(worker.Stop(runner), gc.IsNil)
   178  }
   179  
   180  type errorLevel int
   181  
   182  func (e errorLevel) Error() string {
   183  	return fmt.Sprintf("error with importance %d", e)
   184  }
   185  
   186  func (*runnerSuite) TestErrorImportance(c *gc.C) {
   187  	moreImportant := func(err0, err1 error) bool {
   188  		return err0.(errorLevel) > err1.(errorLevel)
   189  	}
   190  	id := func(i int) string { return fmt.Sprint(i) }
   191  	runner := worker.NewRunner(allFatal, moreImportant)
   192  	for i := 0; i < 10; i++ {
   193  		starter := newTestWorkerStarter()
   194  		starter.stopErr = errorLevel(i)
   195  		err := runner.StartWorker(id(i), testWorkerStart(starter))
   196  		c.Assert(err, jc.ErrorIsNil)
   197  	}
   198  	err := runner.StopWorker(id(4))
   199  	c.Assert(err, jc.ErrorIsNil)
   200  	err = runner.Wait()
   201  	c.Assert(err, gc.Equals, errorLevel(9))
   202  }
   203  
   204  func (*runnerSuite) TestStartWorkerWhenDead(c *gc.C) {
   205  	runner := worker.NewRunner(allFatal, noImportance)
   206  	c.Assert(worker.Stop(runner), gc.IsNil)
   207  	c.Assert(runner.StartWorker("foo", nil), gc.Equals, worker.ErrDead)
   208  }
   209  
   210  func (*runnerSuite) TestStopWorkerWhenDead(c *gc.C) {
   211  	runner := worker.NewRunner(allFatal, noImportance)
   212  	c.Assert(worker.Stop(runner), gc.IsNil)
   213  	c.Assert(runner.StopWorker("foo"), gc.Equals, worker.ErrDead)
   214  }
   215  
   216  func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) {
   217  	runner := worker.NewRunner(allFatal, noImportance)
   218  	var starters []*testWorkerStarter
   219  	for i := 0; i < 10; i++ {
   220  		starter := newTestWorkerStarter()
   221  		err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter))
   222  		c.Assert(err, jc.ErrorIsNil)
   223  		starters = append(starters, starter)
   224  	}
   225  	for _, starter := range starters {
   226  		starter.assertStarted(c, true)
   227  	}
   228  	dieErr := errors.New("fatal error")
   229  	starters[4].die <- dieErr
   230  	err := runner.Wait()
   231  	c.Assert(err, gc.Equals, dieErr)
   232  	for _, starter := range starters {
   233  		starter.assertStarted(c, false)
   234  	}
   235  }
   236  
   237  func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) {
   238  	// Original deadlock problem that this tests for:
   239  	// A worker dies with fatal error while another worker
   240  	// is inside start(). runWorker can't send startInfo on startedc.
   241  	runner := worker.NewRunner(allFatal, noImportance)
   242  
   243  	slowStarter := newTestWorkerStarter()
   244  	// make the startNotify channel synchronous so
   245  	// we can delay the start indefinitely.
   246  	slowStarter.startNotify = make(chan bool)
   247  
   248  	err := runner.StartWorker("slow starter", testWorkerStart(slowStarter))
   249  	c.Assert(err, jc.ErrorIsNil)
   250  
   251  	fatalStarter := newTestWorkerStarter()
   252  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   253  
   254  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   255  	c.Assert(err, jc.ErrorIsNil)
   256  
   257  	// Wait for the runner loop to react to the fatal
   258  	// error and go into final shutdown mode.
   259  	time.Sleep(10 * time.Millisecond)
   260  
   261  	// At this point, the loop is in shutdown mode, but the
   262  	// slowStarter's worker is still in its start function.
   263  	// When the start function continues (the first assertStarted
   264  	// allows that to happen) and returns the new Worker,
   265  	// runWorker will try to send it on runner.startedc.
   266  	// This test makes sure that succeeds ok.
   267  
   268  	slowStarter.assertStarted(c, true)
   269  	slowStarter.assertStarted(c, false)
   270  	err = runner.Wait()
   271  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   272  }
   273  
   274  func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) {
   275  	// Original deadlock problem that this tests for:
   276  	// A worker tries to call StartWorker in its start function
   277  	// at the same time another worker dies with a fatal error.
   278  	// It might not be able to send on startc.
   279  	runner := worker.NewRunner(allFatal, noImportance)
   280  
   281  	selfStarter := newTestWorkerStarter()
   282  	// make the startNotify channel synchronous so
   283  	// we can delay the start indefinitely.
   284  	selfStarter.startNotify = make(chan bool)
   285  	selfStarter.hook = func() {
   286  		runner.StartWorker("another", func() (worker.Worker, error) {
   287  			return nil, fmt.Errorf("no worker started")
   288  		})
   289  	}
   290  	err := runner.StartWorker("self starter", testWorkerStart(selfStarter))
   291  	c.Assert(err, jc.ErrorIsNil)
   292  
   293  	fatalStarter := newTestWorkerStarter()
   294  	fatalStarter.startErr = fmt.Errorf("a fatal error")
   295  
   296  	err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter))
   297  	c.Assert(err, jc.ErrorIsNil)
   298  
   299  	// Wait for the runner loop to react to the fatal
   300  	// error and go into final shutdown mode.
   301  	time.Sleep(10 * time.Millisecond)
   302  
   303  	// At this point, the loop is in shutdown mode, but the
   304  	// selfStarter's worker is still in its start function.
   305  	// When the start function continues (the first assertStarted
   306  	// allows that to happen) it will try to create a new
   307  	// worker. This failed in an earlier version of the code because the
   308  	// loop was not ready to receive start requests.
   309  
   310  	selfStarter.assertStarted(c, true)
   311  	selfStarter.assertStarted(c, false)
   312  	err = runner.Wait()
   313  	c.Assert(err, gc.Equals, fatalStarter.startErr)
   314  }
   315  
   316  type testWorkerStarter struct {
   317  	startCount int32
   318  
   319  	// startNotify receives true when the worker starts
   320  	// and false when it exits. If startErr is non-nil,
   321  	// it sends false only.
   322  	startNotify chan bool
   323  
   324  	// If stopWait is non-nil, the worker will
   325  	// wait for a value to be sent on it before
   326  	// exiting.
   327  	stopWait chan struct{}
   328  
   329  	// Sending a value on die causes the worker
   330  	// to die with the given error.
   331  	die chan error
   332  
   333  	// If startErr is non-nil, the worker will die immediately
   334  	// with this error after starting.
   335  	startErr error
   336  
   337  	// If stopErr is non-nil, the worker will die with this
   338  	// error when asked to stop.
   339  	stopErr error
   340  
   341  	// The hook function is called after starting the worker.
   342  	hook func()
   343  }
   344  
   345  func newTestWorkerStarter() *testWorkerStarter {
   346  	return &testWorkerStarter{
   347  		die:         make(chan error, 1),
   348  		startNotify: make(chan bool, 100),
   349  		hook:        func() {},
   350  	}
   351  }
   352  
   353  func (starter *testWorkerStarter) assertStarted(c *gc.C, started bool) {
   354  	select {
   355  	case isStarted := <-starter.startNotify:
   356  		c.Assert(isStarted, gc.Equals, started)
   357  	case <-time.After(1 * time.Second):
   358  		c.Fatalf("timed out waiting for start notification")
   359  	}
   360  }
   361  
   362  func (starter *testWorkerStarter) assertNeverStarted(c *gc.C) {
   363  	select {
   364  	case isStarted := <-starter.startNotify:
   365  		c.Fatalf("got unexpected start notification: %v", isStarted)
   366  	case <-time.After(worker.RestartDelay + testing.ShortWait):
   367  	}
   368  }
   369  
   370  func testWorkerStart(starter *testWorkerStarter) func() (worker.Worker, error) {
   371  	return func() (worker.Worker, error) {
   372  		return starter.start()
   373  	}
   374  }
   375  
   376  func (starter *testWorkerStarter) start() (worker.Worker, error) {
   377  	if count := atomic.AddInt32(&starter.startCount, 1); count != 1 {
   378  		panic(fmt.Errorf("unexpected start count %d; expected 1", count))
   379  	}
   380  	if starter.startErr != nil {
   381  		starter.startNotify <- false
   382  		return nil, starter.startErr
   383  	}
   384  	task := &testWorker{
   385  		starter: starter,
   386  	}
   387  	starter.startNotify <- true
   388  	go task.run()
   389  	return task, nil
   390  }
   391  
   392  type testWorker struct {
   393  	starter *testWorkerStarter
   394  	tomb    tomb.Tomb
   395  }
   396  
   397  func (t *testWorker) Kill() {
   398  	t.tomb.Kill(nil)
   399  }
   400  
   401  func (t *testWorker) Wait() error {
   402  	return t.tomb.Wait()
   403  }
   404  
   405  func (t *testWorker) run() {
   406  	defer t.tomb.Done()
   407  
   408  	t.starter.hook()
   409  	select {
   410  	case <-t.tomb.Dying():
   411  		t.tomb.Kill(t.starter.stopErr)
   412  	case err := <-t.starter.die:
   413  		t.tomb.Kill(err)
   414  	}
   415  	if t.starter.stopWait != nil {
   416  		<-t.starter.stopWait
   417  	}
   418  	t.starter.startNotify <- false
   419  	if count := atomic.AddInt32(&t.starter.startCount, -1); count != 0 {
   420  		panic(fmt.Errorf("unexpected start count %d; expected 0", count))
   421  	}
   422  }