github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/worker/peergrouper/worker_test.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"time"
    10  
    11  	jc "github.com/juju/testing/checkers"
    12  	"github.com/juju/utils/voyeur"
    13  	gc "gopkg.in/check.v1"
    14  
    15  	"github.com/juju/juju/instance"
    16  	"github.com/juju/juju/network"
    17  	coretesting "github.com/juju/juju/testing"
    18  	"github.com/juju/juju/worker"
    19  )
    20  
    21  type TestIPVersion struct {
    22  	version           string
    23  	formatHostPort    string
    24  	formatHost        string
    25  	machineFormatHost string
    26  	extraHostPort     string
    27  	extraHost         string
    28  	extraAddress      string
    29  	addressType       network.AddressType
    30  }
    31  
    32  var (
    33  	testIPv4 = TestIPVersion{
    34  		version:           "IPv4",
    35  		formatHostPort:    "0.1.2.%d:%d",
    36  		formatHost:        "0.1.2.%d",
    37  		machineFormatHost: "0.1.2.%d",
    38  		extraHostPort:     "0.1.99.99:9876",
    39  		extraHost:         "0.1.99.13",
    40  		extraAddress:      "0.1.99.13:1234",
    41  		addressType:       network.IPv4Address,
    42  	}
    43  	testIPv6 = TestIPVersion{
    44  		version:           "IPv6",
    45  		formatHostPort:    "[2001:DB8::%d]:%d",
    46  		formatHost:        "[2001:DB8::%d]",
    47  		machineFormatHost: "2001:DB8::%d",
    48  		extraHostPort:     "[2001:DB8::99:99]:9876",
    49  		extraHost:         "2001:DB8::99:13",
    50  		extraAddress:      "[2001:DB8::99:13]:1234",
    51  		addressType:       network.IPv6Address,
    52  	}
    53  )
    54  
    55  // DoTestForIPv4AndIPv6 runs the passed test for IPv4 and IPv6.
    56  func DoTestForIPv4AndIPv6(t func(ipVersion TestIPVersion)) {
    57  	t(testIPv4)
    58  	t(testIPv6)
    59  }
    60  
    61  type workerSuite struct {
    62  	coretesting.BaseSuite
    63  }
    64  
    65  var _ = gc.Suite(&workerSuite{})
    66  
    67  func (s *workerSuite) SetUpTest(c *gc.C) {
    68  	s.BaseSuite.SetUpTest(c)
    69  	resetErrors()
    70  }
    71  
    72  // InitState initializes the fake state with a single
    73  // replicaset member and numMachines machines
    74  // primed to vote.
    75  func InitState(c *gc.C, st *fakeState, numMachines int, ipVersion TestIPVersion) {
    76  	var ids []string
    77  	for i := 10; i < 10+numMachines; i++ {
    78  		id := fmt.Sprint(i)
    79  		m := st.addMachine(id, true)
    80  		m.setInstanceId(instance.Id("id-" + id))
    81  		m.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, i, mongoPort))
    82  		ids = append(ids, id)
    83  		c.Assert(m.MongoHostPorts(), gc.HasLen, 1)
    84  
    85  		m.setAPIHostPorts(network.NewHostPorts(
    86  			apiPort, fmt.Sprintf(ipVersion.formatHost, i),
    87  		))
    88  	}
    89  	st.machine("10").SetHasVote(true)
    90  	st.setStateServers(ids...)
    91  	st.session.Set(mkMembers("0v", ipVersion))
    92  	st.session.setStatus(mkStatuses("0p", ipVersion))
    93  	st.check = checkInvariants
    94  }
    95  
    96  // ExpectedAPIHostPorts returns the expected addresses
    97  // of the machines as created by InitState.
    98  func ExpectedAPIHostPorts(n int, ipVersion TestIPVersion) [][]network.HostPort {
    99  	servers := make([][]network.HostPort, n)
   100  	for i := range servers {
   101  		servers[i] = network.NewHostPorts(
   102  			apiPort,
   103  			fmt.Sprintf(ipVersion.formatHost, i+10),
   104  		)
   105  	}
   106  	return servers
   107  }
   108  
   109  func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) {
   110  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   111  		s.PatchValue(&pollInterval, 5*time.Millisecond)
   112  
   113  		st := NewFakeState()
   114  		InitState(c, st, 3, ipVersion)
   115  
   116  		memberWatcher := st.session.members.Watch()
   117  		mustNext(c, memberWatcher)
   118  		assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion))
   119  
   120  		logger.Infof("starting worker")
   121  		w := newWorker(st, noPublisher{})
   122  		defer func() {
   123  			c.Check(worker.Stop(w), gc.IsNil)
   124  		}()
   125  
   126  		// Wait for the worker to set the initial members.
   127  		mustNext(c, memberWatcher)
   128  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion))
   129  
   130  		// Update the status of the new members
   131  		// and check that they become voting.
   132  		c.Logf("updating new member status")
   133  		st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion))
   134  		mustNext(c, memberWatcher)
   135  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v", ipVersion))
   136  
   137  		c.Logf("adding another machine")
   138  		// Add another machine.
   139  		m13 := st.addMachine("13", false)
   140  		m13.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, 13, mongoPort))
   141  		st.setStateServers("10", "11", "12", "13")
   142  
   143  		c.Logf("waiting for new member to be added")
   144  		mustNext(c, memberWatcher)
   145  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion))
   146  
   147  		// Remove vote from an existing member;
   148  		// and give it to the new machine.
   149  		// Also set the status of the new machine to
   150  		// healthy.
   151  		c.Logf("removing vote from machine 10 and adding it to machine 13")
   152  		st.machine("10").setWantsVote(false)
   153  		st.machine("13").setWantsVote(true)
   154  
   155  		st.session.setStatus(mkStatuses("0p 1s 2s 3s", ipVersion))
   156  
   157  		// Check that the new machine gets the vote and the
   158  		// old machine loses it.
   159  		c.Logf("waiting for vote switch")
   160  		mustNext(c, memberWatcher)
   161  		assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion))
   162  
   163  		c.Logf("removing old machine")
   164  		// Remove the old machine.
   165  		st.removeMachine("10")
   166  		st.setStateServers("11", "12", "13")
   167  
   168  		// Check that it's removed from the members.
   169  		c.Logf("waiting for removal")
   170  		mustNext(c, memberWatcher)
   171  		assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v", ipVersion))
   172  	})
   173  }
   174  
   175  func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) {
   176  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   177  		st := NewFakeState()
   178  
   179  		// Simulate a state where we have four state servers,
   180  		// one has gone down, and we're replacing it:
   181  		// 0 - hasvote true, wantsvote false, down
   182  		// 1 - hasvote true, wantsvote true
   183  		// 2 - hasvote true, wantsvote true
   184  		// 3 - hasvote false, wantsvote true
   185  		//
   186  		// When it starts, the worker should move the vote from
   187  		// 0 to 3. We'll arrange things so that it will succeed in
   188  		// setting the membership but fail setting the HasVote
   189  		// to false.
   190  		InitState(c, st, 4, ipVersion)
   191  		st.machine("10").SetHasVote(true)
   192  		st.machine("11").SetHasVote(true)
   193  		st.machine("12").SetHasVote(true)
   194  		st.machine("13").SetHasVote(false)
   195  
   196  		st.machine("10").setWantsVote(false)
   197  		st.machine("11").setWantsVote(true)
   198  		st.machine("12").setWantsVote(true)
   199  		st.machine("13").setWantsVote(true)
   200  
   201  		st.session.Set(mkMembers("0v 1v 2v 3", ipVersion))
   202  		st.session.setStatus(mkStatuses("0H 1p 2s 3s", ipVersion))
   203  
   204  		// Make the worker fail to set HasVote to false
   205  		// after changing the replica set membership.
   206  		setErrorFor("Machine.SetHasVote * false", errors.New("frood"))
   207  
   208  		memberWatcher := st.session.members.Watch()
   209  		mustNext(c, memberWatcher)
   210  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion))
   211  
   212  		w := newWorker(st, noPublisher{})
   213  		done := make(chan error)
   214  		go func() {
   215  			done <- w.Wait()
   216  		}()
   217  
   218  		// Wait for the worker to set the initial members.
   219  		mustNext(c, memberWatcher)
   220  		assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion))
   221  
   222  		// The worker should encounter an error setting the
   223  		// has-vote status to false and exit.
   224  		select {
   225  		case err := <-done:
   226  			c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`)
   227  		case <-time.After(coretesting.LongWait):
   228  			c.Fatalf("timed out waiting for worker to exit")
   229  		}
   230  
   231  		// Start the worker again - although the membership should
   232  		// not change, the HasVote status should be updated correctly.
   233  		resetErrors()
   234  		w = newWorker(st, noPublisher{})
   235  
   236  		// Watch all the machines for changes, so we can check
   237  		// their has-vote status without polling.
   238  		changed := make(chan struct{}, 1)
   239  		for i := 10; i < 14; i++ {
   240  			watcher := st.machine(fmt.Sprint(i)).val.Watch()
   241  			defer watcher.Close()
   242  			go func() {
   243  				for watcher.Next() {
   244  					select {
   245  					case changed <- struct{}{}:
   246  					default:
   247  					}
   248  				}
   249  			}()
   250  		}
   251  		timeout := time.After(coretesting.LongWait)
   252  	loop:
   253  		for {
   254  			select {
   255  			case <-changed:
   256  				correct := true
   257  				for i := 10; i < 14; i++ {
   258  					hasVote := st.machine(fmt.Sprint(i)).HasVote()
   259  					expectHasVote := i != 10
   260  					if hasVote != expectHasVote {
   261  						correct = false
   262  					}
   263  				}
   264  				if correct {
   265  					break loop
   266  				}
   267  			case <-timeout:
   268  				c.Fatalf("timed out waiting for vote to be set")
   269  			}
   270  		}
   271  	})
   272  }
   273  
   274  func (s *workerSuite) TestAddressChange(c *gc.C) {
   275  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   276  		st := NewFakeState()
   277  		InitState(c, st, 3, ipVersion)
   278  
   279  		memberWatcher := st.session.members.Watch()
   280  		mustNext(c, memberWatcher)
   281  		assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion))
   282  
   283  		logger.Infof("starting worker")
   284  		w := newWorker(st, noPublisher{})
   285  		defer func() {
   286  			c.Check(worker.Stop(w), gc.IsNil)
   287  		}()
   288  
   289  		// Wait for the worker to set the initial members.
   290  		mustNext(c, memberWatcher)
   291  		assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion))
   292  
   293  		// Change an address and wait for it to be changed in the
   294  		// members.
   295  		st.machine("11").setStateHostPort(ipVersion.extraHostPort)
   296  
   297  		mustNext(c, memberWatcher)
   298  		expectMembers := mkMembers("0v 1 2", ipVersion)
   299  		expectMembers[1].Address = ipVersion.extraHostPort
   300  		assertMembers(c, memberWatcher.Value(), expectMembers)
   301  		resetErrors()
   302  	})
   303  }
   304  
   305  var fatalErrorsTests = []struct {
   306  	errPattern string
   307  	err        error
   308  	expectErr  string
   309  }{{
   310  	errPattern: "State.StateServerInfo",
   311  	expectErr:  "cannot get state server info: sample",
   312  }, {
   313  	errPattern: "Machine.SetHasVote 11 true",
   314  	expectErr:  `cannot set voting status of "11" to true: sample`,
   315  }, {
   316  	errPattern: "Session.CurrentStatus",
   317  	expectErr:  "cannot get replica set status: sample",
   318  }, {
   319  	errPattern: "Session.CurrentMembers",
   320  	expectErr:  "cannot get replica set members: sample",
   321  }, {
   322  	errPattern: "State.Machine *",
   323  	expectErr:  `cannot get machine "10": sample`,
   324  }, {
   325  	errPattern: "Machine.InstanceId *",
   326  	expectErr:  `cannot get API server info: sample`,
   327  }}
   328  
   329  func (s *workerSuite) TestFatalErrors(c *gc.C) {
   330  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   331  		s.PatchValue(&pollInterval, 5*time.Millisecond)
   332  		for i, testCase := range fatalErrorsTests {
   333  			c.Logf("test %d: %s -> %s", i, testCase.errPattern, testCase.expectErr)
   334  			resetErrors()
   335  			st := NewFakeState()
   336  			st.session.InstantlyReady = true
   337  			InitState(c, st, 3, ipVersion)
   338  			setErrorFor(testCase.errPattern, errors.New("sample"))
   339  			w := newWorker(st, noPublisher{})
   340  			done := make(chan error)
   341  			go func() {
   342  				done <- w.Wait()
   343  			}()
   344  			select {
   345  			case err := <-done:
   346  				c.Assert(err, gc.ErrorMatches, testCase.expectErr)
   347  			case <-time.After(coretesting.LongWait):
   348  				c.Fatalf("timed out waiting for error")
   349  			}
   350  		}
   351  	})
   352  }
   353  
   354  func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) {
   355  	coretesting.SkipIfI386(c, "lp:1425569")
   356  
   357  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   358  		st := NewFakeState()
   359  		InitState(c, st, 3, ipVersion)
   360  		st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion))
   361  		var setCount voyeur.Value
   362  		setErrorFuncFor("Session.Set", func() error {
   363  			setCount.Set(true)
   364  			return errors.New("sample")
   365  		})
   366  		s.PatchValue(&initialRetryInterval, 10*time.Microsecond)
   367  		s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4)
   368  
   369  		w := newWorker(st, noPublisher{})
   370  		defer func() {
   371  			c.Check(worker.Stop(w), gc.IsNil)
   372  		}()
   373  
   374  		// See that the worker is retrying.
   375  		setCountW := setCount.Watch()
   376  		mustNext(c, setCountW)
   377  		mustNext(c, setCountW)
   378  		mustNext(c, setCountW)
   379  
   380  		resetErrors()
   381  	})
   382  }
   383  
   384  type PublisherFunc func(apiServers [][]network.HostPort, instanceIds []instance.Id) error
   385  
   386  func (f PublisherFunc) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   387  	return f(apiServers, instanceIds)
   388  }
   389  
   390  func (s *workerSuite) TestStateServersArePublished(c *gc.C) {
   391  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   392  		publishCh := make(chan [][]network.HostPort)
   393  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   394  			publishCh <- apiServers
   395  			return nil
   396  		}
   397  
   398  		st := NewFakeState()
   399  		InitState(c, st, 3, ipVersion)
   400  		w := newWorker(st, PublisherFunc(publish))
   401  		defer func() {
   402  			c.Check(worker.Stop(w), gc.IsNil)
   403  		}()
   404  		select {
   405  		case servers := <-publishCh:
   406  			AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion))
   407  		case <-time.After(coretesting.LongWait):
   408  			c.Fatalf("timed out waiting for publish")
   409  		}
   410  
   411  		// Change one of the servers' API addresses and check that it's published.
   412  		var newMachine10APIHostPorts []network.HostPort
   413  		newMachine10APIHostPorts = network.NewHostPorts(apiPort, ipVersion.extraHostPort)
   414  		st.machine("10").setAPIHostPorts(newMachine10APIHostPorts)
   415  		select {
   416  		case servers := <-publishCh:
   417  			expected := ExpectedAPIHostPorts(3, ipVersion)
   418  			expected[0] = newMachine10APIHostPorts
   419  			AssertAPIHostPorts(c, servers, expected)
   420  		case <-time.After(coretesting.LongWait):
   421  			c.Fatalf("timed out waiting for publish")
   422  		}
   423  	})
   424  }
   425  
   426  func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) {
   427  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   428  		s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   429  		s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   430  		s.PatchValue(&maxRetryInterval, initialRetryInterval)
   431  
   432  		publishCh := make(chan [][]network.HostPort, 100)
   433  
   434  		count := 0
   435  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   436  			publishCh <- apiServers
   437  			count++
   438  			if count <= 3 {
   439  				return fmt.Errorf("publish error")
   440  			}
   441  			return nil
   442  		}
   443  		st := NewFakeState()
   444  		InitState(c, st, 3, ipVersion)
   445  
   446  		w := newWorker(st, PublisherFunc(publish))
   447  		defer func() {
   448  			c.Check(worker.Stop(w), gc.IsNil)
   449  		}()
   450  
   451  		for i := 0; i < 4; i++ {
   452  			select {
   453  			case servers := <-publishCh:
   454  				AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion))
   455  			case <-time.After(coretesting.LongWait):
   456  				c.Fatalf("timed out waiting for publish #%d", i)
   457  			}
   458  		}
   459  		select {
   460  		case <-publishCh:
   461  			c.Errorf("unexpected publish event")
   462  		case <-time.After(coretesting.ShortWait):
   463  		}
   464  	})
   465  }
   466  
   467  func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) {
   468  	DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) {
   469  		s.PatchValue(&pollInterval, coretesting.LongWait+time.Second)
   470  		s.PatchValue(&initialRetryInterval, 5*time.Millisecond)
   471  		s.PatchValue(&maxRetryInterval, initialRetryInterval)
   472  
   473  		publishCh := make(chan []instance.Id, 100)
   474  
   475  		publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   476  			publishCh <- instanceIds
   477  			return nil
   478  		}
   479  		st := NewFakeState()
   480  		InitState(c, st, 3, ipVersion)
   481  
   482  		w := newWorker(st, PublisherFunc(publish))
   483  		defer func() {
   484  			c.Check(worker.Stop(w), gc.IsNil)
   485  		}()
   486  
   487  		select {
   488  		case instanceIds := <-publishCh:
   489  			c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"})
   490  		case <-time.After(coretesting.LongWait):
   491  			c.Errorf("timed out waiting for publish")
   492  		}
   493  	})
   494  }
   495  
   496  // mustNext waits for w's value to be set and returns it.
   497  func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) {
   498  	type voyeurResult struct {
   499  		ok  bool
   500  		val interface{}
   501  	}
   502  	done := make(chan voyeurResult)
   503  	go func() {
   504  		c.Logf("mustNext %p", w)
   505  		ok := w.Next()
   506  		val = w.Value()
   507  		c.Logf("mustNext done %p, ok: %v, val: %#v", w, ok, val)
   508  		done <- voyeurResult{ok, val}
   509  	}()
   510  	select {
   511  	case result := <-done:
   512  		c.Assert(result.ok, jc.IsTrue)
   513  		return result.val
   514  	case <-time.After(coretesting.LongWait):
   515  		c.Fatalf("timed out waiting for value to be set")
   516  	}
   517  	panic("unreachable")
   518  }
   519  
   520  type noPublisher struct{}
   521  
   522  func (noPublisher) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error {
   523  	return nil
   524  }