github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/raftclusterer/worker.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package raftclusterer
     5  
     6  import (
     7  	"github.com/hashicorp/raft"
     8  	"github.com/juju/errors"
     9  	"github.com/juju/loggo"
    10  	"github.com/juju/pubsub"
    11  	"gopkg.in/juju/worker.v1"
    12  	"gopkg.in/juju/worker.v1/catacomb"
    13  
    14  	"github.com/juju/juju/pubsub/apiserver"
    15  )
    16  
    17  var (
    18  	logger = loggo.GetLogger("juju.worker.raft.raftclusterer")
    19  )
    20  
    21  // Config holds the configuration necessary to run a worker for
    22  // maintaining the raft cluster configuration.
    23  type Config struct {
    24  	Raft *raft.Raft
    25  	Hub  *pubsub.StructuredHub
    26  }
    27  
    28  // Validate validates the raft worker configuration.
    29  func (config Config) Validate() error {
    30  	if config.Hub == nil {
    31  		return errors.NotValidf("nil Hub")
    32  	}
    33  	if config.Raft == nil {
    34  		return errors.NotValidf("nil Raft")
    35  	}
    36  	return nil
    37  }
    38  
    39  // NewWorker returns a new worker responsible for maintaining
    40  // the raft cluster configuration.
    41  func NewWorker(config Config) (worker.Worker, error) {
    42  	if err := config.Validate(); err != nil {
    43  		return nil, errors.Trace(err)
    44  	}
    45  	w := &Worker{
    46  		config:        config,
    47  		serverDetails: make(chan apiserver.Details),
    48  	}
    49  	// Subscribe to API server address changes.
    50  	unsubscribe, err := config.Hub.Subscribe(
    51  		apiserver.DetailsTopic,
    52  		w.apiserverDetailsChanged,
    53  	)
    54  	if err != nil {
    55  		return nil, errors.Annotate(err, "subscribing to apiserver details")
    56  	}
    57  	// Now that we're subscribed, request the current API server details.
    58  	req := apiserver.DetailsRequest{
    59  		Requester: "raft-clusterer",
    60  		LocalOnly: true,
    61  	}
    62  	if _, err := config.Hub.Publish(apiserver.DetailsRequestTopic, req); err != nil {
    63  		return nil, errors.Annotate(err, "requesting current apiserver details")
    64  	}
    65  
    66  	if err := catacomb.Invoke(catacomb.Plan{
    67  		Site: &w.catacomb,
    68  		Work: func() error {
    69  			defer unsubscribe()
    70  			return w.loop()
    71  		},
    72  	}); err != nil {
    73  		unsubscribe()
    74  		return nil, errors.Trace(err)
    75  	}
    76  	return w, nil
    77  }
    78  
    79  // Worker is a worker that manages raft cluster configuration.
    80  type Worker struct {
    81  	catacomb catacomb.Catacomb
    82  	config   Config
    83  
    84  	serverDetails chan apiserver.Details
    85  }
    86  
    87  // Kill is part of the worker.Worker interface.
    88  func (w *Worker) Kill() {
    89  	w.catacomb.Kill(nil)
    90  }
    91  
    92  // Wait is part of the worker.Worker interface.
    93  func (w *Worker) Wait() error {
    94  	return w.catacomb.Wait()
    95  }
    96  
    97  func (w *Worker) loop() error {
    98  	// Get the initial raft cluster configuration.
    99  	servers, prevIndex, err := w.getConfiguration()
   100  	if err != nil {
   101  		return errors.Annotate(err, "getting raft configuration")
   102  	}
   103  
   104  	for {
   105  		select {
   106  		case <-w.catacomb.Dying():
   107  			return w.catacomb.ErrDying()
   108  		case details := <-w.serverDetails:
   109  			prevIndex, err = w.updateConfiguration(servers, prevIndex, details)
   110  			if err != nil {
   111  				return errors.Annotate(err, "updating raft configuration")
   112  			}
   113  		}
   114  	}
   115  }
   116  
   117  func (w *Worker) getConfiguration() (map[raft.ServerID]*raft.Server, uint64, error) {
   118  	future := w.config.Raft.GetConfiguration()
   119  	prevIndex, err := w.waitIndexFuture(future)
   120  	if err != nil {
   121  		return nil, 0, errors.Trace(err)
   122  	}
   123  	servers := make(map[raft.ServerID]*raft.Server)
   124  	config := future.Configuration()
   125  	for i := range config.Servers {
   126  		server := config.Servers[i]
   127  		servers[server.ID] = &server
   128  	}
   129  	return servers, prevIndex, nil
   130  }
   131  
   132  func (w *Worker) updateConfiguration(
   133  	configuredServers map[raft.ServerID]*raft.Server,
   134  	prevIndex uint64,
   135  	details apiserver.Details,
   136  ) (uint64, error) {
   137  	newServers := make(map[raft.ServerID]raft.ServerAddress)
   138  	for _, server := range details.Servers {
   139  		if server.InternalAddress == "" {
   140  			continue
   141  		}
   142  		serverID := raft.ServerID(server.ID)
   143  		serverAddress := raft.ServerAddress(server.InternalAddress)
   144  		newServers[serverID] = serverAddress
   145  	}
   146  	ops := w.configOps(configuredServers, newServers)
   147  	for _, op := range ops {
   148  		var err error
   149  		prevIndex, err = w.waitIndexFuture(op(prevIndex))
   150  		if err != nil {
   151  			return 0, errors.Trace(err)
   152  		}
   153  	}
   154  	return prevIndex, nil
   155  }
   156  
   157  func (w *Worker) configOps(configuredServers map[raft.ServerID]*raft.Server, newServers map[raft.ServerID]raft.ServerAddress) []configOp {
   158  	if len(newServers) == 0 {
   159  		logger.Infof("peergrouper reported 0 API server addresses; not removing all servers from cluster")
   160  		return nil
   161  	}
   162  	// This worker can only run on the raft leader, ergo Leader
   163  	// returns the local address.
   164  	localAddr := w.config.Raft.Leader()
   165  
   166  	// Add new servers and update addresses first, then remove old
   167  	// servers. If the local server is to be removed, we demote it
   168  	// last, to ensure that the other servers can elect a leader
   169  	// amongst themselves and then remove this server.
   170  	var ops []configOp
   171  	for id, newAddr := range newServers {
   172  		if server, ok := configuredServers[id]; ok {
   173  			if server.Address == newAddr {
   174  				continue
   175  			}
   176  			logger.Infof("server %q (%s) address changed from %q to %q", id, server.Suffrage, server.Address, newAddr)
   177  			// Update the address of the server but take care not to
   178  			// change its suffrage.
   179  			server.Address = newAddr
   180  			if server.Suffrage == raft.Voter {
   181  				ops = append(ops, w.addVoterOp(id, newAddr))
   182  			} else {
   183  				ops = append(ops, w.addNonvoterOp(id, newAddr))
   184  			}
   185  		} else {
   186  			logger.Infof("server %q added with address %q", id, newAddr)
   187  			ops = append(ops, w.addVoterOp(id, newAddr))
   188  			configuredServers[id] = &raft.Server{
   189  				ID:       id,
   190  				Address:  newAddr,
   191  				Suffrage: raft.Voter,
   192  			}
   193  		}
   194  	}
   195  	var demoteLocal *raft.Server
   196  	for id, server := range configuredServers {
   197  		if _, ok := newServers[id]; ok {
   198  			continue
   199  		}
   200  		if server.Address == localAddr {
   201  			demoteLocal = server
   202  			continue
   203  		}
   204  		delete(configuredServers, id)
   205  		logger.Infof("server %q removed", id)
   206  		ops = append(ops, w.removeServerOp(id))
   207  	}
   208  	if demoteLocal != nil {
   209  		// The local (leader) server was removed, so we demote it in
   210  		// the raft configuration. This will prompt another server to
   211  		// become leader, and they can then remove it and make any
   212  		// other changes needed (like ensuring an odd number of
   213  		// voters).
   214  		logger.Infof("leader %q being removed - demoting so the new leader can remove it", demoteLocal.ID)
   215  		demoteLocal.Suffrage = raft.Nonvoter
   216  		ops = append(ops, w.demoteVoterOp(demoteLocal.ID))
   217  		return ops
   218  	}
   219  
   220  	// Prevent there from being an even number of voters, to avoid
   221  	// problems from having a split quorum (especially in the case of
   222  	// 2).
   223  	correctionOps := w.correctVoterCountOps(configuredServers, localAddr)
   224  	ops = append(ops, correctionOps...)
   225  	return ops
   226  }
   227  
   228  func (w *Worker) correctVoterCountOps(configuredServers map[raft.ServerID]*raft.Server, localAddr raft.ServerAddress) []configOp {
   229  	var voters, nonvoters []*raft.Server
   230  	for _, server := range configuredServers {
   231  		if server.Suffrage == raft.Nonvoter {
   232  			nonvoters = append(nonvoters, server)
   233  		} else {
   234  			voters = append(voters, server)
   235  		}
   236  	}
   237  
   238  	// We should have at most one nonvoter, bail if we have more than
   239  	// expected.
   240  	if len(nonvoters) > 1 {
   241  		logger.Errorf("expected at most one nonvoter, found %d: %#v", len(nonvoters), nonvoters)
   242  		return nil
   243  	}
   244  
   245  	needNonvoter := len(configuredServers)%2 == 0
   246  	if !needNonvoter && len(nonvoters) == 1 {
   247  		promote := nonvoters[0]
   248  		logger.Infof("promoting nonvoter %q to maintain odd voter count", promote.ID)
   249  		promote.Suffrage = raft.Voter
   250  		return []configOp{w.addVoterOp(promote.ID, promote.Address)}
   251  	}
   252  	if needNonvoter && len(nonvoters) == 0 {
   253  		// Find a voter that isn't us and demote it.
   254  		for _, voter := range voters {
   255  			if voter.Address == localAddr {
   256  				continue
   257  			}
   258  			logger.Infof("demoting voter %q to maintain odd voter count", voter.ID)
   259  			voter.Suffrage = raft.Nonvoter
   260  			return []configOp{w.demoteVoterOp(voter.ID)}
   261  		}
   262  	}
   263  
   264  	// No correction needed.
   265  	return nil
   266  }
   267  
   268  type configOp func(prevIndex uint64) raft.IndexFuture
   269  
   270  func (w *Worker) removeServerOp(id raft.ServerID) configOp {
   271  	return func(prevIndex uint64) raft.IndexFuture {
   272  		return w.config.Raft.RemoveServer(id, prevIndex, 0)
   273  	}
   274  }
   275  
   276  func (w *Worker) addVoterOp(id raft.ServerID, addr raft.ServerAddress) configOp {
   277  	return func(prevIndex uint64) raft.IndexFuture {
   278  		return w.config.Raft.AddVoter(id, addr, prevIndex, 0)
   279  	}
   280  }
   281  
   282  func (w *Worker) addNonvoterOp(id raft.ServerID, addr raft.ServerAddress) configOp {
   283  	return func(prevIndex uint64) raft.IndexFuture {
   284  		return w.config.Raft.AddNonvoter(id, addr, prevIndex, 0)
   285  	}
   286  }
   287  
   288  func (w *Worker) demoteVoterOp(id raft.ServerID) configOp {
   289  	return func(prevIndex uint64) raft.IndexFuture {
   290  		return w.config.Raft.DemoteVoter(id, prevIndex, 0)
   291  	}
   292  }
   293  
   294  // waitIndexFuture waits for the future to return, or for the worker
   295  // to be killed, whichever happens first. If the worker is dying, then
   296  // the catacomb's ErrDying() is returned.
   297  func (w *Worker) waitIndexFuture(f raft.IndexFuture) (uint64, error) {
   298  	errch := make(chan error, 1)
   299  	go func() {
   300  		errch <- f.Error()
   301  	}()
   302  	select {
   303  	case <-w.catacomb.Dying():
   304  		return 0, w.catacomb.ErrDying()
   305  	case err := <-errch:
   306  		return f.Index(), err
   307  	}
   308  }
   309  
   310  func (w *Worker) apiserverDetailsChanged(topic string, details apiserver.Details, err error) {
   311  	if err != nil {
   312  		// This should never happen, so treat it as fatal.
   313  		w.catacomb.Kill(errors.Annotate(err, "apiserver details callback failed"))
   314  		return
   315  	}
   316  	select {
   317  	case w.serverDetails <- details:
   318  	case <-w.catacomb.Dying():
   319  	}
   320  }