github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/peergrouper/worker_test.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"time"
    10  
    11  	jc "github.com/juju/testing/checkers"
    12  	"github.com/juju/utils/voyeur"
    13  	gc "launchpad.net/gocheck"
    14  
    15  	"github.com/juju/juju/instance"
    16  	"github.com/juju/juju/juju/testing"
    17  	statetesting "github.com/juju/juju/state/testing"
    18  	coretesting "github.com/juju/juju/testing"
    19  	"github.com/juju/juju/worker"
    20  )
    21  
    22  type workerJujuConnSuite struct {
    23  	testing.JujuConnSuite
    24  }
    25  
    26  var _ = gc.Suite(&workerJujuConnSuite{})
    27  
    28  func (s *workerJujuConnSuite) TestStartStop(c *gc.C) {
    29  	w, err := New(s.State)
    30  	c.Assert(err, gc.IsNil)
    31  	err = worker.Stop(w)
    32  	c.Assert(err, gc.IsNil)
    33  }
    34  
    35  func (s *workerJujuConnSuite) TestPublisherSetsAPIHostPorts(c *gc.C) {
    36  	st := newFakeState()
    37  	initState(c, st, 3)
    38  
    39  	watcher := s.State.WatchAPIHostPorts()
    40  	cwatch := statetesting.NewNotifyWatcherC(c, s.State, watcher)
    41  	cwatch.AssertOneChange()
    42  
    43  	statePublish := newPublisher(s.State)
    44  
    45  	// Wrap the publisher so that we can call StartSync immediately
    46  	// after the publishAPIServers method is called.
    47  	publish := func(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
    48  		err := statePublish.publishAPIServers(apiServers, instanceIds)
    49  		s.State.StartSync()
    50  		return err
    51  	}
    52  
    53  	w := newWorker(st, publisherFunc(publish))
    54  	defer func() {
    55  		c.Check(worker.Stop(w), gc.IsNil)
    56  	}()
    57  
    58  	cwatch.AssertOneChange()
    59  	hps, err := s.State.APIHostPorts()
    60  	c.Assert(err, gc.IsNil)
    61  	assertAPIHostPorts(c, hps, expectedAPIHostPorts(3))
    62  }
    63  
    64  type workerSuite struct {
    65  	coretesting.BaseSuite
    66  }
    67  
    68  var _ = gc.Suite(&workerSuite{})
    69  
    70  func (s *workerSuite) SetUpTest(c *gc.C) {
    71  	s.BaseSuite.SetUpTest(c)
    72  	resetErrors()
    73  }
    74  
    75  // initState initializes the fake state with a single
    76  // replicaset member and numMachines machines
    77  // primed to vote.
    78  func initState(c *gc.C, st *fakeState, numMachines int) {
    79  	var ids []string
    80  	for i := 10; i < 10+numMachines; i++ {
    81  		id := fmt.Sprint(i)
    82  		m := st.addMachine(id, true)
    83  		m.setInstanceId(instance.Id("id-" + id))
    84  		m.setStateHostPort(fmt.Sprintf("0.1.2.%d:%d", i, mongoPort))
    85  		ids = append(ids, id)
    86  		c.Assert(m.MongoHostPorts(), gc.HasLen, 1)
    87  
    88  		m.setAPIHostPorts(addressesWithPort(apiPort, fmt.Sprintf("0.1.2.%d", i)))
    89  	}
    90  	st.machine("10").SetHasVote(true)
    91  	st.setStateServers(ids...)
    92  	st.session.Set(mkMembers("0v"))
    93  	st.session.setStatus(mkStatuses("0p"))
    94  	st.check = checkInvariants
    95  }
    96  
    97  // expectedAPIHostPorts returns the expected addresses
    98  // of the machines as created by initState.
    99  func expectedAPIHostPorts(n int) [][]instance.HostPort {
   100  	servers := make([][]instance.HostPort, n)
   101  	for i := range servers {
   102  		servers[i] = []instance.HostPort{{
   103  			Address: instance.NewAddress(fmt.Sprintf("0.1.2.%d", i+10), instance.NetworkUnknown),
   104  			Port:    apiPort,
   105  		}}
   106  	}
   107  	return servers
   108  }
   109  
   110  func addressesWithPort(port int, addrs ...string) []instance.HostPort {
   111  	return instance.AddressesWithPort(instance.NewAddresses(addrs...), port)
   112  }
   113  
   114  func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) {
   115  	s.PatchValue(&pollInterval, 5*time.Millisecond)
   116  
   117  	st := newFakeState()
   118  	initState(c, st, 3)
   119  
   120  	memberWatcher := st.session.members.Watch()
   121  	mustNext(c, memberWatcher)
   122  	assertMembers(c, memberWatcher.Value(), mkMembers("0v"))
   123  
   124  	logger.Infof("starting worker")
   125  	w := newWorker(st, noPublisher{})
   126  	defer func() {
   127  		c.Check(worker.Stop(w), gc.IsNil)
   128  	}()
   129  
   130  	// Wait for the worker to set the initial members.
   131  	mustNext(c, memberWatcher)
   132  	assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2"))
   133  
   134  	// Update the status of the new members
   135  	// and check that they become voting.
   136  	c.Logf("updating new member status")
   137  	st.session.setStatus(mkStatuses("0p 1s 2s"))
   138  	mustNext(c, memberWatcher)
   139  	assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v"))
   140  
   141  	c.Logf("adding another machine")
   142  	// Add another machine.
   143  	m13 := st.addMachine("13", false)
   144  	m13.setStateHostPort(fmt.Sprintf("0.1.2.%d:%d", 13, mongoPort))
   145  	st.setStateServers("10", "11", "12", "13")
   146  
   147  	c.Logf("waiting for new member to be added")
   148  	mustNext(c, memberWatcher)
   149  	assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3"))
   150  
   151  	// Remove vote from an existing member;
   152  	// and give it to the new machine.
   153  	// Also set the status of the new machine to
   154  	// healthy.
   155  	c.Logf("removing vote from machine 10 and adding it to machine 13")
   156  	st.machine("10").setWantsVote(false)
   157  	st.machine("13").setWantsVote(true)
   158  
   159  	st.session.setStatus(mkStatuses("0p 1s 2s 3s"))
   160  
   161  	// Check that the new machine gets the vote and the
   162  	// old machine loses it.
   163  	c.Logf("waiting for vote switch")
   164  	mustNext(c, memberWatcher)
   165  	assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v"))
   166  
   167  	c.Logf("removing old machine")
   168  	// Remove the old machine.
   169  	st.removeMachine("10")
   170  	st.setStateServers("11", "12", "13")
   171  
   172  	// Check that it's removed from the members.
   173  	c.Logf("waiting for removal")
   174  	mustNext(c, memberWatcher)
   175  	assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v"))
   176  }
   177  
   178  func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) {
   179  	st := newFakeState()
   180  
   181  	// Simulate a state where we have four state servers,
   182  	// one has gone down, and we're replacing it:
   183  	// 0 - hasvote true, wantsvote false, down
   184  	// 1 - hasvote true, wantsvote true
   185  	// 2 - hasvote true, wantsvote true
   186  	// 3 - hasvote false, wantsvote true
   187  	//
   188  	// When it starts, the worker should move the vote from
   189  	// 0 to 3. We'll arrange things so that it will succeed in
   190  	// setting the membership but fail setting the HasVote
   191  	// to false.
   192  	initState(c, st, 4)
   193  	st.machine("10").SetHasVote(true)
   194  	st.machine("11").SetHasVote(true)
   195  	st.machine("12").SetHasVote(true)
   196  	st.machine("13").SetHasVote(false)
   197  
   198  	st.machine("10").setWantsVote(false)
   199  	st.machine("11").setWantsVote(true)
   200  	st.machine("12").setWantsVote(true)
   201  	st.machine("13").setWantsVote(true)
   202  
   203  	st.session.Set(mkMembers("0v 1v 2v 3"))
   204  	st.session.setStatus(mkStatuses("0H 1p 2s 3s"))
   205  
   206  	// Make the worker fail to set HasVote to false
   207  	// after changing the replica set membership.
   208  	setErrorFor("Machine.SetHasVote * false", errors.New("frood"))
   209  
   210  	memberWatcher := st.session.members.Watch()
   211  	mustNext(c, memberWatcher)
   212  	assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3"))
   213  
   214  	w := newWorker(st, noPublisher{})
   215  	done := make(chan error)
   216  	go func() {
   217  		done <- w.Wait()
   218  	}()
   219  
   220  	// Wait for the worker to set the initial members.
   221  	mustNext(c, memberWatcher)
   222  	assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v"))
   223  
   224  	// The worker should encounter an error setting the
   225  	// has-vote status to false and exit.
   226  	select {
   227  	case err := <-done:
   228  		c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`)
   229  	case <-time.After(coretesting.LongWait):
   230  		c.Fatalf("timed out waiting for worker to exit")
   231  	}
   232  
   233  	// Start the worker again - although the membership should
   234  	// not change, the HasVote status should be updated correctly.
   235  	resetErrors()
   236  	w = newWorker(st, noPublisher{})
   237  
   238  	// Watch all the machines for changes, so we can check
   239  	// their has-vote status without polling.
   240  	changed := make(chan struct{}, 1)
   241  	for i := 10; i < 14; i++ {
   242  		watcher := st.machine(fmt.Sprint(i)).val.Watch()
   243  		defer watcher.Close()
   244  		go func() {
   245  			for watcher.Next() {
   246  				select {
   247  				case changed <- struct{}{}:
   248  				default:
   249  				}
   250  			}
   251  		}()
   252  	}
   253  	timeout := time.After(coretesting.LongWait)
   254  loop:
   255  	for {
   256  		select {
   257  		case <-changed:
   258  			correct := true
   259  			for i := 10; i < 14; i++ {
   260  				hasVote := st.machine(fmt.Sprint(i)).HasVote()
   261  				expectHasVote := i != 10
   262  				if hasVote != expectHasVote {
   263  					correct = false
   264  				}
   265  			}
   266  			if correct {
   267  				break loop
   268  			}
   269  		case <-timeout:
   270  			c.Fatalf("timed out waiting for vote to be set")
   271  		}
   272  	}
   273  }
   274  
   275  func (s *workerSuite) TestAddressChange(c *gc.C) {
   276  	st := newFakeState()
   277  	initState(c, st, 3)
   278  
   279  	memberWatcher := st.session.members.Watch()
   280  	mustNext(c, memberWatcher)
   281  	assertMembers(c, memberWatcher.Value(), mkMembers("0v"))
   282  
   283  	logger.Infof("starting worker")
   284  	w := newWorker(st, noPublisher{})
   285  	defer func() {
   286  		c.Check(worker.Stop(w), gc.IsNil)
   287  	}()
   288  
   289  	// Wait for the worker to set the initial members.
   290  	mustNext(c, memberWatcher)
   291  	assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2"))
   292  
   293  	// Change an address and wait for it to be changed in the
   294  	// members.
   295  	st.machine("11").setStateHostPort("0.1.99.99:9876")
   296  
   297  	mustNext(c, memberWatcher)
   298  	expectMembers := mkMembers("0v 1 2")
   299  	expectMembers[1].Address = "0.1.99.99:9876"
   300  	assertMembers(c, memberWatcher.Value(), expectMembers)
   301  }
   302  
   303  var fatalErrorsTests = []struct {
   304  	errPattern string
   305  	err        error
   306  	expectErr  string
   307  }{{
   308  	errPattern: "State.StateServerInfo",
   309  	expectErr:  "cannot get state server info: sample",
   310  }, {
   311  	errPattern: "Machine.SetHasVote 11 true",
   312  	expectErr:  `cannot set voting status of "11" to true: sample`,
   313  }, {
   314  	errPattern: "Session.CurrentStatus",
   315  	expectErr:  "cannot get replica set status: sample",
   316  }, {
   317  	errPattern: "Session.CurrentMembers",
   318  	expectErr:  "cannot get replica set members: sample",
   319  }, {
   320  	errPattern: "State.Machine *",
   321  	expectErr:  `cannot get machine "10": sample`,
   322  }, {
   323  	errPattern: "Machine.InstanceId *",
   324  	expectErr:  `cannot get API server info: sample`,
   325  }}
   326  
   327  func (s *workerSuite) TestFatalErrors(c *gc.C) {
   328  	s.PatchValue(&pollInterval, 5*time.Millisecond)
   329  	for i, test := range fatalErrorsTests {
   330  		c.Logf("test %d: %s -> %s", i, test.errPattern, test.expectErr)
   331  		resetErrors()
   332  		st := newFakeState()
   333  		st.session.InstantlyReady = true
   334  		initState(c, st, 3)
   335  		setErrorFor(test.errPattern, errors.New("sample"))
   336  		w := newWorker(st, noPublisher{})
   337  		done := make(chan error)
   338  		go func() {
   339  			done <- w.Wait()
   340  		}()
   341  		select {
   342  		case err := <-done:
   343  			c.Assert(err, gc.ErrorMatches, test.expectErr)
   344  		case <-time.After(coretesting.LongWait):
   345  			c.Fatalf("timed out waiting for error")
   346  		}
   347  	}
   348  }
   349  
   350  func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) {
   351  	st := newFakeState()
   352  	initState(c, st, 3)
   353  	st.session.setStatus(mkStatuses("0p 1s 2s"))
   354  	var isSet voyeur.Value
   355  	count := 0
   356  	setErrorFuncFor("Session.Set", func() error {
   357  		isSet.Set(count)
   358  		count++
   359  		return errors.New("sample")
   360  	})
   361  	s.PatchValue(&initialRetryInterval, 10*time.Microsecond)
   362  	s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4)
   363  
   364  	expectedIterations := 0
   365  	for d := initialRetryInterval; d < maxRetryInterval*2; d *= 2 {
   366  		expectedIterations++
   367  	}
   368  
   369  	w := newWorker(st, noPublisher{})
   370  	defer func() {
   371  		c.Check(worker.Stop(w), gc.IsNil)
   372  	}()
   373  	isSetWatcher := isSet.Watch()
   374  
   375  	n0 := mustNext(c, isSetWatcher).(int)
   376  	time.Sleep(maxRetryInterval * 2)
   377  	n1 := mustNext(c, isSetWatcher).(int)
   378  
   379  	// The worker should have backed off exponentially...
   380  	c.Assert(n1-n0, jc.LessThan, expectedIterations+1)
   381  	c.Logf("actual iterations %d; expected iterations %d", n1-n0, expectedIterations)
   382  
   383  	// ... but only up to the maximum retry interval
   384  	n0 = mustNext(c, isSetWatcher).(int)
   385  	time.Sleep(maxRetryInterval * 2)
   386  	n1 = mustNext(c, isSetWatcher).(int)
   387  
   388  	c.Assert(n1-n0, jc.LessThan, 3)
   389  }
   390  
   391  type publisherFunc func(apiServers [][]instance.HostPort, instanceIds []instance.Id) error
   392  
   393  func (f publisherFunc) publishAPIServers(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
   394  	return f(apiServers, instanceIds)
   395  }
   396  
   397  func (s *workerSuite) TestStateServersArePublished(c *gc.C) {
   398  	publishCh := make(chan [][]instance.HostPort)
   399  	publish := func(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
   400  		publishCh <- apiServers
   401  		return nil
   402  	}
   403  
   404  	st := newFakeState()
   405  	initState(c, st, 3)
   406  	w := newWorker(st, publisherFunc(publish))
   407  	defer func() {
   408  		c.Check(worker.Stop(w), gc.IsNil)
   409  	}()
   410  	select {
   411  	case servers := <-publishCh:
   412  		assertAPIHostPorts(c, servers, expectedAPIHostPorts(3))
   413  	case <-time.After(coretesting.LongWait):
   414  		c.Fatalf("timed out waiting for publish")
   415  	}
   416  
   417  	// Change one of the servers' API addresses and check that it's published.
   418  
   419  	newMachine10APIHostPorts := addressesWithPort(apiPort, "0.2.8.124")
   420  	st.machine("10").setAPIHostPorts(newMachine10APIHostPorts)
   421  	select {
   422  	case servers := <-publishCh:
   423  		expected := expectedAPIHostPorts(3)
   424  		expected[0] = newMachine10APIHostPorts
   425  		assertAPIHostPorts(c, servers, expected)
   426  	case <-time.After(coretesting.LongWait):
   427  		c.Fatalf("timed out waiting for publish")
   428  	}
   429  }
   430  
   431  func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) {
   432  	s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   433  	s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   434  	s.PatchValue(&maxRetryInterval, initialRetryInterval)
   435  
   436  	publishCh := make(chan [][]instance.HostPort, 100)
   437  
   438  	count := 0
   439  	publish := func(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
   440  		publishCh <- apiServers
   441  		count++
   442  		if count <= 3 {
   443  			return fmt.Errorf("publish error")
   444  		}
   445  		return nil
   446  	}
   447  	st := newFakeState()
   448  	initState(c, st, 3)
   449  
   450  	w := newWorker(st, publisherFunc(publish))
   451  	defer func() {
   452  		c.Check(worker.Stop(w), gc.IsNil)
   453  	}()
   454  
   455  	for i := 0; i < 4; i++ {
   456  		select {
   457  		case servers := <-publishCh:
   458  			assertAPIHostPorts(c, servers, expectedAPIHostPorts(3))
   459  		case <-time.After(coretesting.LongWait):
   460  			c.Fatalf("timed out waiting for publish #%d", i)
   461  		}
   462  	}
   463  	select {
   464  	case <-publishCh:
   465  		c.Errorf("unexpected publish event")
   466  	case <-time.After(coretesting.ShortWait):
   467  	}
   468  }
   469  
   470  func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) {
   471  	s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   472  	s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   473  	s.PatchValue(&maxRetryInterval, initialRetryInterval)
   474  
   475  	publishCh := make(chan []instance.Id, 100)
   476  
   477  	publish := func(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
   478  		publishCh <- instanceIds
   479  		return nil
   480  	}
   481  	st := newFakeState()
   482  	initState(c, st, 3)
   483  
   484  	w := newWorker(st, publisherFunc(publish))
   485  	defer func() {
   486  		c.Check(worker.Stop(w), gc.IsNil)
   487  	}()
   488  
   489  	select {
   490  	case instanceIds := <-publishCh:
   491  		c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"})
   492  	case <-time.After(coretesting.LongWait):
   493  		c.Errorf("timed out waiting for publish")
   494  	}
   495  }
   496  
   497  // mustNext waits for w's value to be set and returns it.
   498  func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) {
   499  	done := make(chan bool)
   500  	go func() {
   501  		c.Logf("mustNext %p", w)
   502  		ok := w.Next()
   503  		val = w.Value()
   504  		c.Logf("mustNext done %p, ok %v", w, ok)
   505  		done <- ok
   506  	}()
   507  	select {
   508  	case ok := <-done:
   509  		c.Assert(ok, jc.IsTrue)
   510  		return
   511  	case <-time.After(coretesting.LongWait):
   512  		c.Fatalf("timed out waiting for value to be set")
   513  	}
   514  	panic("unreachable")
   515  }
   516  
   517  type noPublisher struct{}
   518  
   519  func (noPublisher) publishAPIServers(apiServers [][]instance.HostPort, instanceIds []instance.Id) error {
   520  	return nil
   521  }