github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/peergrouper/worker_test.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"time"
    10  
    11  	jc "github.com/juju/testing/checkers"
    12  	"github.com/juju/utils/voyeur"
    13  	gc "gopkg.in/check.v1"
    14  
    15  	"github.com/juju/juju/instance"
    16  	"github.com/juju/juju/network"
    17  	coretesting "github.com/juju/juju/testing"
    18  	"github.com/juju/juju/worker"
    19  )
    20  
    21  type TestIPVersion struct {
    22  	version           string
    23  	formatHostPort    string
    24  	formatHost        string
    25  	machineFormatHost string
    26  	extraHostPort     string
    27  	extraHost         string
    28  	extraAddress      string
    29  	addressType       network.AddressType
    30  }
    31  
    32  var (
    33  	testIPv4 = TestIPVersion{
    34  		version:           "IPv4",
    35  		formatHostPort:    "0.1.2.%d:%d",
    36  		formatHost:        "0.1.2.%d",
    37  		machineFormatHost: "0.1.2.%d",
    38  		extraHostPort:     "0.1.99.99:9876",
    39  		extraHost:         "0.1.99.13",
    40  		extraAddress:      "0.1.99.13:1234",
    41  		addressType:       network.IPv4Address,
    42  	}
    43  	testIPv6 = TestIPVersion{
    44  		version:           "IPv6",
    45  		formatHostPort:    "[2001:DB8::%d]:%d",
    46  		formatHost:        "[2001:DB8::%d]",
    47  		machineFormatHost: "2001:DB8::%d",
    48  		extraHostPort:     "[2001:DB8::99:99]:9876",
    49  		extraHost:         "2001:DB8::99:13",
    50  		extraAddress:      "[2001:DB8::99:13]:1234",
    51  		addressType:       network.IPv6Address,
    52  	}
    53  )
    54  
    55  // DoTestForIPv4AndIPv6 runs the passed test for IPv4 and IPv6.
    56  func DoTestForIPv4AndIPv6(t func(ipVersion TestIPVersion)) {
    57  	t(testIPv4)
    58  	t(testIPv6)
    59  }
    60  
    61  type workerSuite struct {
    62  	coretesting.BaseSuite
    63  }
    64  
    65  var _ = gc.Suite(&workerSuite{})
    66  
    67  func (s *workerSuite) SetUpTest(c *gc.C) {
    68  	s.BaseSuite.SetUpTest(c)
    69  	resetErrors()
    70  }
    71  
    72  // InitState initializes the fake state with a single
    73  // replicaset member and numMachines machines
    74  // primed to vote.
    75  func InitState(c *gc.C, st *fakeState, numMachines int, ipVersion TestIPVersion) {
    76  	var ids []string
    77  	for i := 10; i < 10+numMachines; i++ {
    78  		id := fmt.Sprint(i)
    79  		m := st.addMachine(id, true)
    80  		m.setInstanceId(instance.Id("id-" + id))
    81  		m.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, i, mongoPort))
    82  		ids = append(ids, id)
    83  		c.Assert(m.MongoHostPorts(), gc.HasLen, 1)
    84  
    85  		m.setAPIHostPorts(addressesWithPort(apiPort, fmt.Sprintf(ipVersion.formatHost, i)))
    86  	}
    87  	st.machine("10").SetHasVote(true)
    88  	st.setStateServers(ids...)
    89  	st.session.Set(mkMembers("0v", ipVersion))
    90  	st.session.setStatus(mkStatuses("0p", ipVersion))
    91  	st.check = checkInvariants
    92  }
    93  
    94  // ExpectedAPIHostPorts returns the expected addresses
    95  // of the machines as created by InitState.
    96  func ExpectedAPIHostPorts(n int, ipVersion TestIPVersion) [][]network.HostPort {
    97  	servers := make([][]network.HostPort, n)
    98  	for i := range servers {
    99  		servers[i] = []network.HostPort{{
   100  			Address: network.NewAddress(fmt.Sprintf(ipVersion.formatHost, i+10), network.ScopeUnknown),
   101  			Port:    apiPort,
   102  		}}
   103  	}
   104  	return servers
   105  }
   106  
   107  func addressesWithPort(port int, addrs ...string) []network.HostPort {
   108  	return network.AddressesWithPort(network.NewAddresses(addrs...), port)
   109  }
   110  
   111  func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) {
   112  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   113  		s.PatchValue(&pollInterval, 5*time.Millisecond)
   114  
   115  		st := NewFakeState()
   116  		InitState(c, st, 3, ipVersion)
   117  
   118  		memberWatcher := st.session.members.Watch()
   119  		mustNext(c, memberWatcher)
   120  		assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion))
   121  
   122  		logger.Infof("starting worker")
   123  		w := newWorker(st, noPublisher{})
   124  		defer func() {
   125  			c.Check(worker.Stop(w), gc.IsNil)
   126  		}()
   127  
   128  		// Wait for the worker to set the initial members.
   129  		mustNext(c, memberWatcher)
   130  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion))
   131  
   132  		// Update the status of the new members
   133  		// and check that they become voting.
   134  		c.Logf("updating new member status")
   135  		st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion))
   136  		mustNext(c, memberWatcher)
   137  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v", ipVersion))
   138  
   139  		c.Logf("adding another machine")
   140  		// Add another machine.
   141  		m13 := st.addMachine("13", false)
   142  		m13.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, 13, mongoPort))
   143  		st.setStateServers("10", "11", "12", "13")
   144  
   145  		c.Logf("waiting for new member to be added")
   146  		mustNext(c, memberWatcher)
   147  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion))
   148  
   149  		// Remove vote from an existing member;
   150  		// and give it to the new machine.
   151  		// Also set the status of the new machine to
   152  		// healthy.
   153  		c.Logf("removing vote from machine 10 and adding it to machine 13")
   154  		st.machine("10").setWantsVote(false)
   155  		st.machine("13").setWantsVote(true)
   156  
   157  		st.session.setStatus(mkStatuses("0p 1s 2s 3s", ipVersion))
   158  
   159  		// Check that the new machine gets the vote and the
   160  		// old machine loses it.
   161  		c.Logf("waiting for vote switch")
   162  		mustNext(c, memberWatcher)
   163  		assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion))
   164  
   165  		c.Logf("removing old machine")
   166  		// Remove the old machine.
   167  		st.removeMachine("10")
   168  		st.setStateServers("11", "12", "13")
   169  
   170  		// Check that it's removed from the members.
   171  		c.Logf("waiting for removal")
   172  		mustNext(c, memberWatcher)
   173  		assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v", ipVersion))
   174  	})
   175  }
   176  
   177  func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) {
   178  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   179  		st := NewFakeState()
   180  
   181  		// Simulate a state where we have four state servers,
   182  		// one has gone down, and we're replacing it:
   183  		// 0 - hasvote true, wantsvote false, down
   184  		// 1 - hasvote true, wantsvote true
   185  		// 2 - hasvote true, wantsvote true
   186  		// 3 - hasvote false, wantsvote true
   187  		//
   188  		// When it starts, the worker should move the vote from
   189  		// 0 to 3. We'll arrange things so that it will succeed in
   190  		// setting the membership but fail setting the HasVote
   191  		// to false.
   192  		InitState(c, st, 4, ipVersion)
   193  		st.machine("10").SetHasVote(true)
   194  		st.machine("11").SetHasVote(true)
   195  		st.machine("12").SetHasVote(true)
   196  		st.machine("13").SetHasVote(false)
   197  
   198  		st.machine("10").setWantsVote(false)
   199  		st.machine("11").setWantsVote(true)
   200  		st.machine("12").setWantsVote(true)
   201  		st.machine("13").setWantsVote(true)
   202  
   203  		st.session.Set(mkMembers("0v 1v 2v 3", ipVersion))
   204  		st.session.setStatus(mkStatuses("0H 1p 2s 3s", ipVersion))
   205  
   206  		// Make the worker fail to set HasVote to false
   207  		// after changing the replica set membership.
   208  		setErrorFor("Machine.SetHasVote * false", errors.New("frood"))
   209  
   210  		memberWatcher := st.session.members.Watch()
   211  		mustNext(c, memberWatcher)
   212  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion))
   213  
   214  		w := newWorker(st, noPublisher{})
   215  		done := make(chan error)
   216  		go func() {
   217  			done <- w.Wait()
   218  		}()
   219  
   220  		// Wait for the worker to set the initial members.
   221  		mustNext(c, memberWatcher)
   222  		assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion))
   223  
   224  		// The worker should encounter an error setting the
   225  		// has-vote status to false and exit.
   226  		select {
   227  		case err := <-done:
   228  			c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`)
   229  		case <-time.After(coretesting.LongWait):
   230  			c.Fatalf("timed out waiting for worker to exit")
   231  		}
   232  
   233  		// Start the worker again - although the membership should
   234  		// not change, the HasVote status should be updated correctly.
   235  		resetErrors()
   236  		w = newWorker(st, noPublisher{})
   237  
   238  		// Watch all the machines for changes, so we can check
   239  		// their has-vote status without polling.
   240  		changed := make(chan struct{}, 1)
   241  		for i := 10; i < 14; i++ {
   242  			watcher := st.machine(fmt.Sprint(i)).val.Watch()
   243  			defer watcher.Close()
   244  			go func() {
   245  				for watcher.Next() {
   246  					select {
   247  					case changed <- struct{}{}:
   248  					default:
   249  					}
   250  				}
   251  			}()
   252  		}
   253  		timeout := time.After(coretesting.LongWait)
   254  	loop:
   255  		for {
   256  			select {
   257  			case <-changed:
   258  				correct := true
   259  				for i := 10; i < 14; i++ {
   260  					hasVote := st.machine(fmt.Sprint(i)).HasVote()
   261  					expectHasVote := i != 10
   262  					if hasVote != expectHasVote {
   263  						correct = false
   264  					}
   265  				}
   266  				if correct {
   267  					break loop
   268  				}
   269  			case <-timeout:
   270  				c.Fatalf("timed out waiting for vote to be set")
   271  			}
   272  		}
   273  	})
   274  }
   275  
   276  func (s *workerSuite) TestAddressChange(c *gc.C) {
   277  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   278  		st := NewFakeState()
   279  		InitState(c, st, 3, ipVersion)
   280  
   281  		memberWatcher := st.session.members.Watch()
   282  		mustNext(c, memberWatcher)
   283  		assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion))
   284  
   285  		logger.Infof("starting worker")
   286  		w := newWorker(st, noPublisher{})
   287  		defer func() {
   288  			c.Check(worker.Stop(w), gc.IsNil)
   289  		}()
   290  
   291  		// Wait for the worker to set the initial members.
   292  		mustNext(c, memberWatcher)
   293  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion))
   294  
   295  		// Change an address and wait for it to be changed in the
   296  		// members.
   297  		st.machine("11").setStateHostPort(ipVersion.extraHostPort)
   298  
   299  		mustNext(c, memberWatcher)
   300  		expectMembers := mkMembers("0v 1 2", ipVersion)
   301  		expectMembers[1].Address = ipVersion.extraHostPort
   302  		assertMembers(c, memberWatcher.Value(), expectMembers)
   303  		resetErrors()
   304  	})
   305  }
   306  
   307  var fatalErrorsTests = []struct {
   308  	errPattern string
   309  	err        error
   310  	expectErr  string
   311  }{{
   312  	errPattern: "State.StateServerInfo",
   313  	expectErr:  "cannot get state server info: sample",
   314  }, {
   315  	errPattern: "Machine.SetHasVote 11 true",
   316  	expectErr:  `cannot set voting status of "11" to true: sample`,
   317  }, {
   318  	errPattern: "Session.CurrentStatus",
   319  	expectErr:  "cannot get replica set status: sample",
   320  }, {
   321  	errPattern: "Session.CurrentMembers",
   322  	expectErr:  "cannot get replica set members: sample",
   323  }, {
   324  	errPattern: "State.Machine *",
   325  	expectErr:  `cannot get machine "10": sample`,
   326  }, {
   327  	errPattern: "Machine.InstanceId *",
   328  	expectErr:  `cannot get API server info: sample`,
   329  }}
   330  
   331  func (s *workerSuite) TestFatalErrors(c *gc.C) {
   332  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   333  		s.PatchValue(&pollInterval, 5*time.Millisecond)
   334  		for i, testCase := range fatalErrorsTests {
   335  			c.Logf("test %d: %s -> %s", i, testCase.errPattern, testCase.expectErr)
   336  			resetErrors()
   337  			st := NewFakeState()
   338  			st.session.InstantlyReady = true
   339  			InitState(c, st, 3, ipVersion)
   340  			setErrorFor(testCase.errPattern, errors.New("sample"))
   341  			w := newWorker(st, noPublisher{})
   342  			done := make(chan error)
   343  			go func() {
   344  				done <- w.Wait()
   345  			}()
   346  			select {
   347  			case err := <-done:
   348  				c.Assert(err, gc.ErrorMatches, testCase.expectErr)
   349  			case <-time.After(coretesting.LongWait):
   350  				c.Fatalf("timed out waiting for error")
   351  			}
   352  		}
   353  	})
   354  }
   355  
   356  func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) {
   357  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   358  		st := NewFakeState()
   359  		InitState(c, st, 3, ipVersion)
   360  		st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion))
   361  		var setCount voyeur.Value
   362  		setErrorFuncFor("Session.Set", func() error {
   363  			setCount.Set(true)
   364  			return errors.New("sample")
   365  		})
   366  		s.PatchValue(&initialRetryInterval, 10*time.Microsecond)
   367  		s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4)
   368  
   369  		w := newWorker(st, noPublisher{})
   370  		defer func() {
   371  			c.Check(worker.Stop(w), gc.IsNil)
   372  		}()
   373  
   374  		// See that the worker is retrying.
   375  		setCountW := setCount.Watch()
   376  		mustNext(c, setCountW)
   377  		mustNext(c, setCountW)
   378  		mustNext(c, setCountW)
   379  
   380  		resetErrors()
   381  	})
   382  }
   383  
   384  type PublisherFunc func(apiServers [][]network.HostPort, instanceIds []instance.Id) error
   385  
   386  func (f PublisherFunc) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   387  	return f(apiServers, instanceIds)
   388  }
   389  
   390  func (s *workerSuite) TestStateServersArePublished(c *gc.C) {
   391  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   392  		publishCh := make(chan [][]network.HostPort)
   393  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   394  			publishCh <- apiServers
   395  			return nil
   396  		}
   397  
   398  		st := NewFakeState()
   399  		InitState(c, st, 3, ipVersion)
   400  		w := newWorker(st, PublisherFunc(publish))
   401  		defer func() {
   402  			c.Check(worker.Stop(w), gc.IsNil)
   403  		}()
   404  		select {
   405  		case servers := <-publishCh:
   406  			AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion))
   407  		case <-time.After(coretesting.LongWait):
   408  			c.Fatalf("timed out waiting for publish")
   409  		}
   410  
   411  		// Change one of the servers' API addresses and check that it's published.
   412  		var newMachine10APIHostPorts []network.HostPort
   413  		newMachine10APIHostPorts = addressesWithPort(apiPort, ipVersion.extraHostPort)
   414  		st.machine("10").setAPIHostPorts(newMachine10APIHostPorts)
   415  		select {
   416  		case servers := <-publishCh:
   417  			expected := ExpectedAPIHostPorts(3, ipVersion)
   418  			expected[0] = newMachine10APIHostPorts
   419  			AssertAPIHostPorts(c, servers, expected)
   420  		case <-time.After(coretesting.LongWait):
   421  			c.Fatalf("timed out waiting for publish")
   422  		}
   423  	})
   424  }
   425  
   426  func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) {
   427  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   428  		s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   429  		s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   430  		s.PatchValue(&maxRetryInterval, initialRetryInterval)
   431  
   432  		publishCh := make(chan [][]network.HostPort, 100)
   433  
   434  		count := 0
   435  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   436  			publishCh <- apiServers
   437  			count++
   438  			if count <= 3 {
   439  				return fmt.Errorf("publish error")
   440  			}
   441  			return nil
   442  		}
   443  		st := NewFakeState()
   444  		InitState(c, st, 3, ipVersion)
   445  
   446  		w := newWorker(st, PublisherFunc(publish))
   447  		defer func() {
   448  			c.Check(worker.Stop(w), gc.IsNil)
   449  		}()
   450  
   451  		for i := 0; i < 4; i++ {
   452  			select {
   453  			case servers := <-publishCh:
   454  				AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion))
   455  			case <-time.After(coretesting.LongWait):
   456  				c.Fatalf("timed out waiting for publish #%d", i)
   457  			}
   458  		}
   459  		select {
   460  		case <-publishCh:
   461  			c.Errorf("unexpected publish event")
   462  		case <-time.After(coretesting.ShortWait):
   463  		}
   464  	})
   465  }
   466  
   467  func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) {
   468  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   469  		s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   470  		s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   471  		s.PatchValue(&maxRetryInterval, initialRetryInterval)
   472  
   473  		publishCh := make(chan []instance.Id, 100)
   474  
   475  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   476  			publishCh <- instanceIds
   477  			return nil
   478  		}
   479  		st := NewFakeState()
   480  		InitState(c, st, 3, ipVersion)
   481  
   482  		w := newWorker(st, PublisherFunc(publish))
   483  		defer func() {
   484  			c.Check(worker.Stop(w), gc.IsNil)
   485  		}()
   486  
   487  		select {
   488  		case instanceIds := <-publishCh:
   489  			c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"})
   490  		case <-time.After(coretesting.LongWait):
   491  			c.Errorf("timed out waiting for publish")
   492  		}
   493  	})
   494  }
   495  
   496  // mustNext waits for w's value to be set and returns it.
   497  func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) {
   498  	type voyeurResult struct {
   499  		ok  bool
   500  		val interface{}
   501  	}
   502  	done := make(chan voyeurResult)
   503  	go func() {
   504  		c.Logf("mustNext %p", w)
   505  		ok := w.Next()
   506  		val = w.Value()
   507  		c.Logf("mustNext done %p, ok: %v, val: %#v", w, ok, val)
   508  		done <- voyeurResult{ok, val}
   509  	}()
   510  	select {
   511  	case result := <-done:
   512  		c.Assert(result.ok, jc.IsTrue)
   513  		return result.val
   514  	case <-time.After(coretesting.LongWait):
   515  		c.Fatalf("timed out waiting for value to be set")
   516  	}
   517  	panic("unreachable")
   518  }
   519  
   520  type noPublisher struct{}
   521  
   522  func (noPublisher) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   523  	return nil
   524  }