github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/raftclusterer/worker_test.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package raftclusterer_test
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/hashicorp/raft"
    10  	"github.com/juju/pubsub"
    11  	jc "github.com/juju/testing/checkers"
    12  	gc "gopkg.in/check.v1"
    13  	"gopkg.in/juju/names.v2"
    14  	"gopkg.in/juju/worker.v1"
    15  	"gopkg.in/juju/worker.v1/workertest"
    16  
    17  	"github.com/juju/juju/pubsub/apiserver"
    18  	"github.com/juju/juju/pubsub/centralhub"
    19  	coretesting "github.com/juju/juju/testing"
    20  	jujuraft "github.com/juju/juju/worker/raft"
    21  	"github.com/juju/juju/worker/raft/raftclusterer"
    22  	"github.com/juju/juju/worker/raft/rafttest"
    23  )
    24  
    25  type workerFixture struct {
    26  	rafttest.RaftFixture
    27  	hub    *pubsub.StructuredHub
    28  	config raftclusterer.Config
    29  }
    30  
    31  func (s *workerFixture) SetUpTest(c *gc.C) {
    32  	s.FSM = &jujuraft.SimpleFSM{}
    33  	s.RaftFixture.SetUpTest(c)
    34  	s.hub = centralhub.New(names.NewMachineTag("0"))
    35  	s.config = raftclusterer.Config{
    36  		Raft: s.Raft,
    37  		Hub:  s.hub,
    38  	}
    39  }
    40  
    41  type WorkerValidationSuite struct {
    42  	workerFixture
    43  }
    44  
    45  var _ = gc.Suite(&WorkerValidationSuite{})
    46  
    47  func (s *WorkerValidationSuite) TestValidateErrors(c *gc.C) {
    48  	type test struct {
    49  		f      func(*raftclusterer.Config)
    50  		expect string
    51  	}
    52  	tests := []test{{
    53  		func(cfg *raftclusterer.Config) { cfg.Raft = nil },
    54  		"nil Raft not valid",
    55  	}, {
    56  		func(cfg *raftclusterer.Config) { cfg.Hub = nil },
    57  		"nil Hub not valid",
    58  	}}
    59  	for i, test := range tests {
    60  		c.Logf("test #%d (%s)", i, test.expect)
    61  		s.testValidateError(c, test.f, test.expect)
    62  	}
    63  }
    64  
    65  func (s *WorkerValidationSuite) testValidateError(c *gc.C, f func(*raftclusterer.Config), expect string) {
    66  	config := s.config
    67  	f(&config)
    68  	w, err := raftclusterer.NewWorker(config)
    69  	if !c.Check(err, gc.NotNil) {
    70  		workertest.DirtyKill(c, w)
    71  		return
    72  	}
    73  	c.Check(w, gc.IsNil)
    74  	c.Check(err, gc.ErrorMatches, expect)
    75  }
    76  
    77  type WorkerSuite struct {
    78  	workerFixture
    79  	worker worker.Worker
    80  	reqs   chan apiserver.DetailsRequest
    81  }
    82  
    83  var _ = gc.Suite(&WorkerSuite{})
    84  
    85  func (s *WorkerSuite) SetUpTest(c *gc.C) {
    86  	s.workerFixture.SetUpTest(c)
    87  	s.reqs = make(chan apiserver.DetailsRequest, 10)
    88  
    89  	// Use a local variable to send to the channel in the callback, so
    90  	// we don't get races when a subsequent test overwrites s.reqs
    91  	// with a new channel.
    92  	reqs := s.reqs
    93  	unsubscribe, err := s.hub.Subscribe(
    94  		apiserver.DetailsRequestTopic,
    95  		func(topic string, req apiserver.DetailsRequest, err error) {
    96  			c.Check(err, jc.ErrorIsNil)
    97  			reqs <- req
    98  		},
    99  	)
   100  	c.Assert(err, jc.ErrorIsNil)
   101  	s.AddCleanup(func(c *gc.C) { unsubscribe() })
   102  
   103  	worker, err := raftclusterer.NewWorker(s.config)
   104  	c.Assert(err, jc.ErrorIsNil)
   105  	s.AddCleanup(func(c *gc.C) {
   106  		workertest.DirtyKill(c, worker)
   107  	})
   108  	s.worker = worker
   109  }
   110  
   111  func (s *WorkerSuite) TestCleanKill(c *gc.C) {
   112  	workertest.CleanKill(c, s.worker)
   113  }
   114  
   115  func (s *WorkerSuite) TestAddRemoveServers(c *gc.C) {
   116  	// Create 4 servers: 0, 1, 2, and 3, where all servers can connect
   117  	// bidirectionally.
   118  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   119  	_, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   120  	_, _, transport3, _, _ := s.NewRaft(c, "3", &jujuraft.SimpleFSM{})
   121  	connectTransports(s.Transport, transport1, transport2, transport3)
   122  
   123  	machine0Address := string(s.Transport.LocalAddr())
   124  	machine1Address := string(transport1.LocalAddr())
   125  	machine2Address := string(transport2.LocalAddr())
   126  	machine3Address := string(transport3.LocalAddr())
   127  
   128  	raft1Observations := make(chan raft.Observation, 1)
   129  	raft1Observer := raft.NewObserver(raft1Observations, false, func(o *raft.Observation) bool {
   130  		_, ok := o.Data.(raft.LeaderObservation)
   131  		return ok
   132  	})
   133  	raft1.RegisterObserver(raft1Observer)
   134  	defer raft1.DeregisterObserver(raft1Observer)
   135  
   136  	// Add machines 1 and 2.
   137  	s.publishDetails(c, map[string]string{
   138  		"0": machine0Address,
   139  		"1": machine1Address,
   140  		"2": machine2Address,
   141  	})
   142  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   143  		ID:       "0",
   144  		Address:  raft.ServerAddress(machine0Address),
   145  		Suffrage: raft.Voter,
   146  	}, {
   147  		ID:       "1",
   148  		Address:  raft.ServerAddress(machine1Address),
   149  		Suffrage: raft.Voter,
   150  	}, {
   151  		ID:       "2",
   152  		Address:  raft.ServerAddress(machine2Address),
   153  		Suffrage: raft.Voter,
   154  	}})
   155  
   156  	select {
   157  	case <-raft1Observations:
   158  		c.Assert(raft1.Leader(), gc.Equals, s.Transport.LocalAddr())
   159  	case <-time.After(coretesting.LongWait):
   160  		c.Fatal("timed out waiting for leader observation")
   161  	}
   162  
   163  	// Remove machine 1, add machine 3.
   164  	s.publishDetails(c, map[string]string{
   165  		"0": machine0Address,
   166  		"2": machine2Address,
   167  		"3": machine3Address,
   168  	})
   169  	rafttest.CheckConfiguration(c, raft1, []raft.Server{{
   170  		ID:       "0",
   171  		Address:  raft.ServerAddress(machine0Address),
   172  		Suffrage: raft.Voter,
   173  	}, {
   174  		ID:       "2",
   175  		Address:  raft.ServerAddress(machine2Address),
   176  		Suffrage: raft.Voter,
   177  	}, {
   178  		ID:       "3",
   179  		Address:  raft.ServerAddress(machine3Address),
   180  		Suffrage: raft.Voter,
   181  	}})
   182  }
   183  
   184  func (s *WorkerSuite) TestChangeLocalServer(c *gc.C) {
   185  	// This test asserts that a configuration change which updates a
   186  	// raft leader's address does not result in a leadership change.
   187  
   188  	// Machine 0's address will be updated to a non-localhost address, and
   189  	// two new servers are added.
   190  
   191  	// We add 1 and 2, and change 0's address. Changing machine 0's
   192  	// address should not affect its leadership.
   193  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   194  	_, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   195  	connectTransports(s.Transport, transport1, transport2)
   196  	machine1Address := string(transport1.LocalAddr())
   197  	machine2Address := string(transport2.LocalAddr())
   198  
   199  	alternateAddress := "testing.invalid:1234"
   200  	c.Assert(s.Raft.Leader(), gc.Not(gc.Equals), alternateAddress)
   201  	s.publishDetails(c, map[string]string{
   202  		"0": alternateAddress,
   203  		"1": machine1Address,
   204  		"2": machine2Address,
   205  	})
   206  	//Check configuration asserts that the raft configuration should have
   207  	//been updated to reflect the two added machines and that the address of
   208  	//the leader has been changed.
   209  	rafttest.CheckConfiguration(c, raft1, []raft.Server{{
   210  		ID:       "0",
   211  		Address:  raft.ServerAddress(alternateAddress),
   212  		Suffrage: raft.Voter,
   213  	}, {
   214  		ID:       "1",
   215  		Address:  raft.ServerAddress(machine1Address),
   216  		Suffrage: raft.Voter,
   217  	}, {
   218  		ID:       "2",
   219  		Address:  raft.ServerAddress(machine2Address),
   220  		Suffrage: raft.Voter,
   221  	}})
   222  
   223  	// Machine 0 should still be the leader.
   224  	future := s.Raft.VerifyLeader()
   225  	c.Assert(future.Error(), jc.ErrorIsNil)
   226  }
   227  
   228  func (s *WorkerSuite) TestDisappearingAddresses(c *gc.C) {
   229  	// If we had 3 servers but the peergrouper publishes an update
   230  	// that sets all of their addresses to "", ignore that change.
   231  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   232  	_, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   233  	connectTransports(s.Transport, transport1, transport2)
   234  	machine0Address := string(s.Transport.LocalAddr())
   235  	machine1Address := string(transport1.LocalAddr())
   236  	machine2Address := string(transport2.LocalAddr())
   237  
   238  	s.publishDetails(c, map[string]string{
   239  		"0": machine0Address,
   240  		"1": machine1Address,
   241  		"2": machine2Address,
   242  	})
   243  	expectedConfiguration := []raft.Server{{
   244  		ID:       "0",
   245  		Address:  raft.ServerAddress(machine0Address),
   246  		Suffrage: raft.Voter,
   247  	}, {
   248  		ID:       "1",
   249  		Address:  raft.ServerAddress(machine1Address),
   250  		Suffrage: raft.Voter,
   251  	}, {
   252  		ID:       "2",
   253  		Address:  raft.ServerAddress(machine2Address),
   254  		Suffrage: raft.Voter,
   255  	}}
   256  	rafttest.CheckConfiguration(c, raft1, expectedConfiguration)
   257  
   258  	s.publishDetails(c, map[string]string{
   259  		"0": "",
   260  		"1": "",
   261  		"2": "",
   262  	})
   263  	// Check that it ignores the update - removing all servers isn't
   264  	// something that we should allow.
   265  	rafttest.CheckConfiguration(c, raft1, expectedConfiguration)
   266  
   267  	// But publishing an update with one machines with a blank address
   268  	// should still remove it.
   269  	s.publishDetails(c, map[string]string{
   270  		"0": machine0Address,
   271  		"1": "",
   272  		"2": machine2Address,
   273  	})
   274  	rafttest.CheckConfiguration(c, raft1, []raft.Server{
   275  		expectedConfiguration[0],
   276  		expectedConfiguration[2],
   277  	})
   278  }
   279  
   280  func (s *WorkerSuite) TestRequestsDetails(c *gc.C) {
   281  	// The worker is started in SetUpTest.
   282  	select {
   283  	case req := <-s.reqs:
   284  		c.Assert(req, gc.Equals, apiserver.DetailsRequest{
   285  			Requester: "raft-clusterer",
   286  			LocalOnly: true,
   287  		})
   288  	case <-time.After(coretesting.LongWait):
   289  		c.Fatalf("timed out waiting for details request")
   290  	}
   291  }
   292  
   293  func (s *WorkerSuite) TestDemotesAServerWhenThereAre2(c *gc.C) {
   294  	// Create 3 servers: 0, 1 and 2, where all servers can connect
   295  	// bidirectionally.
   296  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   297  	raft2, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   298  	connectTransports(s.Transport, transport1, transport2)
   299  
   300  	machine0Address := string(s.Transport.LocalAddr())
   301  	machine1Address := string(transport1.LocalAddr())
   302  	machine2Address := string(transport2.LocalAddr())
   303  
   304  	raft1Observations := make(chan raft.Observation, 1)
   305  	raft1Observer := raft.NewObserver(raft1Observations, false, func(o *raft.Observation) bool {
   306  		_, ok := o.Data.(raft.LeaderObservation)
   307  		return ok
   308  	})
   309  	raft1.RegisterObserver(raft1Observer)
   310  	defer raft1.DeregisterObserver(raft1Observer)
   311  
   312  	// Add machines 1 and 2.
   313  	s.publishDetails(c, map[string]string{
   314  		"0": machine0Address,
   315  		"1": machine1Address,
   316  		"2": machine2Address,
   317  	})
   318  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   319  		ID:       "0",
   320  		Address:  raft.ServerAddress(machine0Address),
   321  		Suffrage: raft.Voter,
   322  	}, {
   323  		ID:       "1",
   324  		Address:  raft.ServerAddress(machine1Address),
   325  		Suffrage: raft.Voter,
   326  	}, {
   327  		ID:       "2",
   328  		Address:  raft.ServerAddress(machine2Address),
   329  		Suffrage: raft.Voter,
   330  	}})
   331  
   332  	select {
   333  	case <-raft1Observations:
   334  		c.Assert(raft1.Leader(), gc.Equals, s.Transport.LocalAddr())
   335  	case <-time.After(coretesting.LongWait):
   336  		c.Fatal("timed out waiting for leader observation")
   337  	}
   338  
   339  	// Remove machine 1.
   340  	s.publishDetails(c, map[string]string{
   341  		"0": machine0Address,
   342  		"2": machine2Address,
   343  	})
   344  	f := raft1.Shutdown()
   345  	c.Assert(f.Error(), jc.ErrorIsNil)
   346  	rafttest.CheckConfiguration(c, raft2, []raft.Server{{
   347  		ID:       "0",
   348  		Address:  raft.ServerAddress(machine0Address),
   349  		Suffrage: raft.Voter,
   350  	}, {
   351  		ID:       "2",
   352  		Address:  raft.ServerAddress(machine2Address),
   353  		Suffrage: raft.Nonvoter,
   354  	}})
   355  }
   356  
   357  func (s *WorkerSuite) TestPromotesAServerWhenThereAre3Again(c *gc.C) {
   358  	// Create 2 servers: 0 and 1, where both servers can connect
   359  	// bidirectionally.
   360  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   361  	connectTransports(s.Transport, transport1)
   362  
   363  	machine0Address := string(s.Transport.LocalAddr())
   364  	machine1Address := string(transport1.LocalAddr())
   365  
   366  	raft1Observations := make(chan raft.Observation, 1)
   367  	raft1Observer := raft.NewObserver(raft1Observations, false, func(o *raft.Observation) bool {
   368  		_, ok := o.Data.(raft.LeaderObservation)
   369  		return ok
   370  	})
   371  	raft1.RegisterObserver(raft1Observer)
   372  	defer raft1.DeregisterObserver(raft1Observer)
   373  
   374  	// Add machine 1.
   375  	s.publishDetails(c, map[string]string{
   376  		"0": machine0Address,
   377  		"1": machine1Address,
   378  	})
   379  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   380  		ID:       "0",
   381  		Address:  raft.ServerAddress(machine0Address),
   382  		Suffrage: raft.Voter,
   383  	}, {
   384  		ID:       "1",
   385  		Address:  raft.ServerAddress(machine1Address),
   386  		Suffrage: raft.Nonvoter,
   387  	}})
   388  
   389  	select {
   390  	case <-raft1Observations:
   391  		c.Assert(raft1.Leader(), gc.Equals, s.Transport.LocalAddr())
   392  	case <-time.After(coretesting.LongWait):
   393  		c.Fatal("timed out waiting for leader observation")
   394  	}
   395  
   396  	// Add machine 2.
   397  	raft2, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   398  	connectTransports(s.Transport, transport1, transport2)
   399  	machine2Address := string(transport2.LocalAddr())
   400  
   401  	s.publishDetails(c, map[string]string{
   402  		"0": machine0Address,
   403  		"1": machine1Address,
   404  		"2": machine2Address,
   405  	})
   406  	rafttest.CheckConfiguration(c, raft2, []raft.Server{{
   407  		ID:       "0",
   408  		Address:  raft.ServerAddress(machine0Address),
   409  		Suffrage: raft.Voter,
   410  	}, {
   411  		ID:       "1",
   412  		Address:  raft.ServerAddress(machine1Address),
   413  		Suffrage: raft.Voter,
   414  	}, {
   415  		ID:       "2",
   416  		Address:  raft.ServerAddress(machine2Address),
   417  		Suffrage: raft.Voter,
   418  	}})
   419  }
   420  
   421  func (s *WorkerSuite) TestKeepsNonvoterIfAddressChanges(c *gc.C) {
   422  	// Create 2 servers: 0 and 1, where both servers can connect
   423  	// bidirectionally.
   424  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   425  	connectTransports(s.Transport, transport1)
   426  
   427  	machine0Address := string(s.Transport.LocalAddr())
   428  	machine1Address := string(transport1.LocalAddr())
   429  
   430  	raft1Observations := make(chan raft.Observation, 1)
   431  	raft1Observer := raft.NewObserver(raft1Observations, false, func(o *raft.Observation) bool {
   432  		_, ok := o.Data.(raft.LeaderObservation)
   433  		return ok
   434  	})
   435  	raft1.RegisterObserver(raft1Observer)
   436  	defer raft1.DeregisterObserver(raft1Observer)
   437  
   438  	// Add machine 1.
   439  	s.publishDetails(c, map[string]string{
   440  		"0": machine0Address,
   441  		"1": machine1Address,
   442  	})
   443  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   444  		ID:       "0",
   445  		Address:  raft.ServerAddress(machine0Address),
   446  		Suffrage: raft.Voter,
   447  	}, {
   448  		ID:       "1",
   449  		Address:  raft.ServerAddress(machine1Address),
   450  		Suffrage: raft.Nonvoter,
   451  	}})
   452  
   453  	select {
   454  	case <-raft1Observations:
   455  		c.Assert(raft1.Leader(), gc.Equals, s.Transport.LocalAddr())
   456  	case <-time.After(coretesting.LongWait):
   457  		c.Fatal("timed out waiting for leader observation")
   458  	}
   459  
   460  	// Update the non-voting server's address - ensure it doesn't
   461  	// accidentally get promoted to voting at the same time.
   462  	alternateAddress := "testing.invalid:1234"
   463  	s.publishDetails(c, map[string]string{
   464  		"0": machine0Address,
   465  		"1": alternateAddress,
   466  	})
   467  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   468  		ID:       "0",
   469  		Address:  raft.ServerAddress(machine0Address),
   470  		Suffrage: raft.Voter,
   471  	}, {
   472  		ID:       "1",
   473  		Address:  raft.ServerAddress(alternateAddress),
   474  		Suffrage: raft.Nonvoter,
   475  	}})
   476  }
   477  
   478  func (s *WorkerSuite) TestDemotesLeaderIfRemoved(c *gc.C) {
   479  	// Create 3 servers: 0, 1 and 2, where all servers can connect
   480  	// bidirectionally.
   481  	raft1, _, transport1, _, _ := s.NewRaft(c, "1", &jujuraft.SimpleFSM{})
   482  	_, _, transport2, _, _ := s.NewRaft(c, "2", &jujuraft.SimpleFSM{})
   483  	connectTransports(s.Transport, transport1, transport2)
   484  
   485  	machine0Address := string(s.Transport.LocalAddr())
   486  	machine1Address := string(transport1.LocalAddr())
   487  	machine2Address := string(transport2.LocalAddr())
   488  
   489  	raft1Observations := make(chan raft.Observation, 1)
   490  	raft1Observer := raft.NewObserver(raft1Observations, false, func(o *raft.Observation) bool {
   491  		_, ok := o.Data.(raft.LeaderObservation)
   492  		return ok
   493  	})
   494  	raft1.RegisterObserver(raft1Observer)
   495  	defer raft1.DeregisterObserver(raft1Observer)
   496  
   497  	// Add machines 1 and 2.
   498  	s.publishDetails(c, map[string]string{
   499  		"0": machine0Address,
   500  		"1": machine1Address,
   501  		"2": machine2Address,
   502  	})
   503  	rafttest.CheckConfiguration(c, s.Raft, []raft.Server{{
   504  		ID:       "0",
   505  		Address:  raft.ServerAddress(machine0Address),
   506  		Suffrage: raft.Voter,
   507  	}, {
   508  		ID:       "1",
   509  		Address:  raft.ServerAddress(machine1Address),
   510  		Suffrage: raft.Voter,
   511  	}, {
   512  		ID:       "2",
   513  		Address:  raft.ServerAddress(machine2Address),
   514  		Suffrage: raft.Voter,
   515  	}})
   516  
   517  	select {
   518  	case <-raft1Observations:
   519  		c.Assert(raft1.Leader(), gc.Equals, s.Transport.LocalAddr())
   520  	case <-time.After(coretesting.LongWait):
   521  		c.Fatal("timed out waiting for leader observation")
   522  	}
   523  
   524  	// Remove machine 0. This should prompt the clusterer to demote
   525  	// the leader but not remove it - the new leader after the
   526  	// election will remove it instead.
   527  	s.publishDetails(c, map[string]string{
   528  		"1": machine1Address,
   529  		"2": machine2Address,
   530  	})
   531  	rafttest.CheckConfiguration(c, raft1, []raft.Server{{
   532  		ID:       "0",
   533  		Address:  raft.ServerAddress(machine0Address),
   534  		Suffrage: raft.Nonvoter,
   535  	}, {
   536  		ID:       "1",
   537  		Address:  raft.ServerAddress(machine1Address),
   538  		Suffrage: raft.Voter,
   539  	}, {
   540  		ID:       "2",
   541  		Address:  raft.ServerAddress(machine2Address),
   542  		Suffrage: raft.Voter,
   543  	}})
   544  }
   545  
   546  func (s *WorkerSuite) publishDetails(c *gc.C, serverAddrs map[string]string) {
   547  	details := makeDetails(serverAddrs)
   548  	received, err := s.hub.Publish(apiserver.DetailsTopic, details)
   549  	c.Assert(err, jc.ErrorIsNil)
   550  	select {
   551  	case <-received:
   552  	case <-time.After(coretesting.LongWait):
   553  		c.Fatal("timed out waiting for details to be received")
   554  	}
   555  }
   556  
   557  // Connect the provided transport bidirectionally.
   558  func connectTransports(transports ...raft.LoopbackTransport) {
   559  	for _, t1 := range transports {
   560  		for _, t2 := range transports {
   561  			if t1 == t2 {
   562  				continue
   563  			}
   564  			t1.Connect(t2.LocalAddr(), t2)
   565  		}
   566  	}
   567  }
   568  
   569  func makeDetails(serverInfo map[string]string) apiserver.Details {
   570  	servers := make(map[string]apiserver.APIServer)
   571  	for id, address := range serverInfo {
   572  		servers[id] = apiserver.APIServer{
   573  			ID:              id,
   574  			InternalAddress: address,
   575  		}
   576  	}
   577  	return apiserver.Details{Servers: servers}
   578  }