github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/worker.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package raft
     5  
     6  import (
     7  	"io"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"time"
    12  
    13  	humanize "github.com/dustin/go-humanize"
    14  	"github.com/hashicorp/raft"
    15  	"github.com/hashicorp/raft-boltdb"
    16  	"github.com/juju/clock"
    17  	"github.com/juju/errors"
    18  	"github.com/juju/loggo"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  	"gopkg.in/juju/worker.v1"
    21  	"gopkg.in/juju/worker.v1/catacomb"
    22  
    23  	"github.com/juju/juju/worker/raft/raftutil"
    24  )
    25  
    26  const (
    27  	// defaultSnapshotRetention is the number of
    28  	// snapshots to retain on disk by default.
    29  	defaultSnapshotRetention = 2
    30  
    31  	// bootstrapAddress is the raft server address
    32  	// configured for the bootstrap node. This address
    33  	// will be replaced once the raftclusterer worker
    34  	// observes an address for the server.
    35  	bootstrapAddress raft.ServerAddress = "localhost"
    36  
    37  	// LoopTimeout is the max time we will wait until the raft object
    38  	// is constructed and the main loop is started. This is to avoid
    39  	// hard-to-debug problems where the transport hung and so this
    40  	// worker wasn't really started even though it seemed like it
    41  	// was. If it crashes instead the logging will give a path to the
    42  	// problem.
    43  	LoopTimeout = 1 * time.Minute
    44  
    45  	// noLeaderTimeout is how long a follower will wait for contact
    46  	// from the leader before restarting. This allows us to see config
    47  	// changes (force-appended by the raft-backstop worker) to allow
    48  	// us to become voting again if the leader was removed leaving a
    49  	// 2-node cluster without quorum.
    50  	noLeaderTimeout = 1 * time.Minute
    51  
    52  	// noLeaderFrequency is how long the raft worker wait between
    53  	// checking whether it's in contact with the leader.
    54  	noLeaderFrequency = 10 * time.Second
    55  )
    56  
    57  var (
    58  	// ErrWorkerStopped is returned by Worker.Raft if the
    59  	// worker has been explicitly stopped.
    60  	ErrWorkerStopped = errors.New("raft worker stopped")
    61  
    62  	// ErrStartTimeout is returned by NewWorker if the worker loop
    63  	// didn't start within LoopTimeout.
    64  	ErrStartTimeout = errors.New("timed out waiting for worker loop")
    65  
    66  	// ErrNoLeaderTimeout is returned by the worker loop if we've gone
    67  	// too long without contact from the leader. It gives the worker a
    68  	// chance to see any configuration changes the backstop worker
    69  	// might have force-appended to the raft log.
    70  	ErrNoLeaderTimeout = errors.New("timed out waiting for leader contact")
    71  )
    72  
    73  // Logger represents the logging methods called.
    74  type Logger interface {
    75  	Warningf(message string, args ...interface{})
    76  	Errorf(message string, args ...interface{})
    77  	Logf(level loggo.Level, message string, args ...interface{})
    78  }
    79  
    80  // Config is the configuration required for running a raft worker.
    81  type Config struct {
    82  	// FSM is the raft.FSM to use for this raft worker. This
    83  	// must be non-nil for NewWorker, and nil for Bootstrap.
    84  	FSM raft.FSM
    85  
    86  	// Logger is the logger for this worker.
    87  	Logger Logger
    88  
    89  	// StorageDir is the directory in which to store raft
    90  	// artifacts: logs, snapshots, etc. It is expected that
    91  	// this directory is under the full control of the raft
    92  	// worker.
    93  	StorageDir string
    94  
    95  	// LocalID is the raft.ServerID of this worker.
    96  	LocalID raft.ServerID
    97  
    98  	// Transport is the raft.Transport to use for communication
    99  	// between raft servers. This must be non-nil for NewWorker,
   100  	// and nil for Bootstrap.
   101  	//
   102  	// The raft worker expects the server address to exactly
   103  	// match the server ID, which is the stringified agent tag.
   104  	// The transport internally maps the server address to one
   105  	// or more network addresses, i.e. by looking up the API
   106  	// connection information in the state database.
   107  	Transport raft.Transport
   108  
   109  	// Clock is used for timeouts in the worker (although not inside
   110  	// raft).
   111  	Clock clock.Clock
   112  
   113  	// NoLeaderTimeout, if non-zero, will override the default
   114  	// timeout for leader contact before restarting.
   115  	NoLeaderTimeout time.Duration
   116  
   117  	// ElectionTimeout, if non-zero, will override the default
   118  	// raft election timeout.
   119  	ElectionTimeout time.Duration
   120  
   121  	// HeartbeatTimeout, if non-zero, will override the default
   122  	// raft heartbeat timeout.
   123  	HeartbeatTimeout time.Duration
   124  
   125  	// LeaderLeaseTimeout, if non-zero, will override the default
   126  	// raft leader lease timeout.
   127  	LeaderLeaseTimeout time.Duration
   128  
   129  	// SnapshotRetention is the non-negative number of snapshots
   130  	// to retain on disk. If zero, defaults to 2.
   131  	SnapshotRetention int
   132  
   133  	// PrometheusRegisterer is used to register the raft metrics.
   134  	PrometheusRegisterer prometheus.Registerer
   135  }
   136  
   137  // Validate validates the raft worker configuration.
   138  func (config Config) Validate() error {
   139  	if config.FSM == nil {
   140  		return errors.NotValidf("nil FSM")
   141  	}
   142  	if config.Logger == nil {
   143  		return errors.NotValidf("nil Logger")
   144  	}
   145  	if config.StorageDir == "" {
   146  		return errors.NotValidf("empty StorageDir")
   147  	}
   148  	if config.LocalID == "" {
   149  		return errors.NotValidf("empty LocalID")
   150  	}
   151  	if config.SnapshotRetention < 0 {
   152  		return errors.NotValidf("negative SnapshotRetention")
   153  	}
   154  	if config.Transport == nil {
   155  		return errors.NotValidf("nil Transport")
   156  	}
   157  	if config.Clock == nil {
   158  		return errors.NotValidf("nil Clock")
   159  	}
   160  	return nil
   161  }
   162  
   163  // Bootstrap bootstraps the raft cluster, using the given configuration.
   164  //
   165  // This is only to be called once, at the beginning of the raft cluster's
   166  // lifetime, by the bootstrap machine agent.
   167  func Bootstrap(config Config) error {
   168  	if config.FSM != nil {
   169  		return errors.NotValidf("non-nil FSM during Bootstrap")
   170  	}
   171  	if config.Transport != nil {
   172  		return errors.NotValidf("non-nil Transport during Bootstrap")
   173  	}
   174  
   175  	// During bootstrap we use an in-memory transport. We just need
   176  	// to make sure we use the same local address as we'll use later.
   177  	_, transport := raft.NewInmemTransport(bootstrapAddress)
   178  	defer transport.Close()
   179  	config.Transport = transport
   180  
   181  	// During bootstrap, we do not require an FSM.
   182  	config.FSM = BootstrapFSM{}
   183  
   184  	w, err := newWorker(config)
   185  	if err != nil {
   186  		return errors.Trace(err)
   187  	}
   188  	defer worker.Stop(w)
   189  
   190  	r, err := w.Raft()
   191  	if err != nil {
   192  		return errors.Trace(err)
   193  	}
   194  
   195  	if err := r.BootstrapCluster(raft.Configuration{
   196  		Servers: []raft.Server{{
   197  			ID:      config.LocalID,
   198  			Address: bootstrapAddress,
   199  		}},
   200  	}).Error(); err != nil {
   201  		return errors.Annotate(err, "bootstrapping raft cluster")
   202  	}
   203  	return errors.Annotate(worker.Stop(w), "stopping bootstrap raft worker")
   204  }
   205  
   206  // NewWorker returns a new raft worker, with the given configuration.
   207  func NewWorker(config Config) (worker.Worker, error) {
   208  	return newWorker(config)
   209  }
   210  
   211  func newWorker(config Config) (*Worker, error) {
   212  	if err := config.Validate(); err != nil {
   213  		return nil, errors.Trace(err)
   214  	}
   215  	if config.NoLeaderTimeout == 0 {
   216  		config.NoLeaderTimeout = noLeaderTimeout
   217  	}
   218  	raftConfig, err := NewRaftConfig(config)
   219  	if err != nil {
   220  		return nil, errors.Trace(err)
   221  	}
   222  	w := &Worker{
   223  		config:     config,
   224  		raftCh:     make(chan *raft.Raft),
   225  		logStoreCh: make(chan raft.LogStore),
   226  	}
   227  	if err := catacomb.Invoke(catacomb.Plan{
   228  		Site: &w.catacomb,
   229  		Work: func() error {
   230  			return w.loop(raftConfig)
   231  		},
   232  	}); err != nil {
   233  		return nil, errors.Trace(err)
   234  	}
   235  	// Wait for the loop to be started.
   236  	select {
   237  	case <-config.Clock.After(LoopTimeout):
   238  		w.catacomb.Kill(ErrStartTimeout)
   239  		return nil, ErrStartTimeout
   240  	case <-w.raftCh:
   241  	}
   242  	return w, nil
   243  }
   244  
   245  // Worker is a worker that manages a raft.Raft instance.
   246  type Worker struct {
   247  	catacomb catacomb.Catacomb
   248  	config   Config
   249  
   250  	raftCh     chan *raft.Raft
   251  	logStoreCh chan raft.LogStore
   252  }
   253  
   254  // Raft returns the raft.Raft managed by this worker, or
   255  // an error if the worker has stopped.
   256  func (w *Worker) Raft() (*raft.Raft, error) {
   257  	select {
   258  	case <-w.catacomb.Dying():
   259  		err := w.catacomb.Err()
   260  		if err != nil {
   261  			return nil, err
   262  		}
   263  		return nil, ErrWorkerStopped
   264  	case raft := <-w.raftCh:
   265  		return raft, nil
   266  	}
   267  }
   268  
   269  // LogStore returns the raft.LogStore managed by this worker, or
   270  // an error if the worker has stopped.
   271  func (w *Worker) LogStore() (raft.LogStore, error) {
   272  	select {
   273  	case <-w.catacomb.Dying():
   274  		err := w.catacomb.Err()
   275  		if err != nil {
   276  			return nil, err
   277  		}
   278  		return nil, ErrWorkerStopped
   279  	case logStore := <-w.logStoreCh:
   280  		return logStore, nil
   281  	}
   282  }
   283  
   284  // Kill is part of the worker.Worker interface.
   285  func (w *Worker) Kill() {
   286  	w.catacomb.Kill(nil)
   287  }
   288  
   289  // Wait is part of the worker.Worker interface.
   290  func (w *Worker) Wait() error {
   291  	return w.catacomb.Wait()
   292  }
   293  
   294  func (w *Worker) loop(raftConfig *raft.Config) (loopErr error) {
   295  	// Register the metrics.
   296  	if w.config.PrometheusRegisterer != nil {
   297  		registerMetrics(w.config.PrometheusRegisterer, w.config.Logger)
   298  	}
   299  
   300  	rawLogStore, err := NewLogStore(w.config.StorageDir)
   301  	if err != nil {
   302  		return errors.Trace(err)
   303  	}
   304  	// We need to make sure access to the LogStore methods (+ closing)
   305  	// is synchronised, but we don't need to synchronise the
   306  	// StableStore methods, because we aren't giving out a reference
   307  	// to the StableStore - only the raft instance uses it.
   308  	logStore := &syncLogStore{store: rawLogStore}
   309  	defer logStore.Close()
   310  
   311  	snapshotRetention := w.config.SnapshotRetention
   312  	if snapshotRetention == 0 {
   313  		snapshotRetention = defaultSnapshotRetention
   314  	}
   315  	snapshotStore, err := NewSnapshotStore(w.config.StorageDir, snapshotRetention, w.config.Logger)
   316  	if err != nil {
   317  		return errors.Trace(err)
   318  	}
   319  
   320  	r, err := raft.NewRaft(raftConfig, w.config.FSM, logStore, rawLogStore, snapshotStore, w.config.Transport)
   321  	if err != nil {
   322  		return errors.Trace(err)
   323  	}
   324  	defer func() {
   325  		if err := r.Shutdown().Error(); err != nil {
   326  			if loopErr == nil {
   327  				loopErr = err
   328  			} else {
   329  				w.config.Logger.Warningf("raft shutdown failed: %s", err)
   330  			}
   331  		}
   332  	}()
   333  
   334  	shutdown := make(chan raft.Observation)
   335  	observer := raft.NewObserver(shutdown, true, func(o *raft.Observation) bool {
   336  		return o.Data == raft.Shutdown
   337  	})
   338  	r.RegisterObserver(observer)
   339  	defer r.DeregisterObserver(observer)
   340  
   341  	// Every 10 seconds we check whether the no-leader timeout should
   342  	// trip.
   343  	noLeaderCheck := w.config.Clock.After(noLeaderFrequency)
   344  	lastContact := w.config.Clock.Now()
   345  
   346  	for {
   347  		select {
   348  		case <-w.catacomb.Dying():
   349  			return w.catacomb.ErrDying()
   350  		case <-shutdown:
   351  			// The raft server shutdown without this worker
   352  			// telling it to do so. This typically means that
   353  			// the local node was removed from the cluster
   354  			// configuration, causing it to shutdown.
   355  			return errors.New("raft shutdown")
   356  		case now := <-noLeaderCheck:
   357  			noLeaderCheck = w.config.Clock.After(noLeaderFrequency)
   358  			if r.State() == raft.Leader {
   359  				lastContact = now
   360  				continue
   361  			}
   362  			var zeroTime time.Time
   363  			if latest := r.LastContact(); latest != zeroTime {
   364  				lastContact = latest
   365  			}
   366  			if now.After(lastContact.Add(w.config.NoLeaderTimeout)) {
   367  				w.config.Logger.Errorf("last leader contact earlier than %s", humanize.Time(lastContact))
   368  				return ErrNoLeaderTimeout
   369  			}
   370  		case w.raftCh <- r:
   371  		case w.logStoreCh <- logStore:
   372  		}
   373  	}
   374  }
   375  
   376  // NewRaftConfig makes a raft config struct from the worker config
   377  // struct passed in.
   378  func NewRaftConfig(config Config) (*raft.Config, error) {
   379  	raftConfig := raft.DefaultConfig()
   380  	raftConfig.LocalID = config.LocalID
   381  	// Having ShutdownOnRemove true means that the raft node also
   382  	// stops when it's demoted if it's the leader.
   383  	raftConfig.ShutdownOnRemove = false
   384  
   385  	logWriter := &raftutil.LoggoWriter{config.Logger, loggo.DEBUG}
   386  	raftConfig.Logger = log.New(logWriter, "", 0)
   387  
   388  	maybeOverrideDuration := func(d time.Duration, target *time.Duration) {
   389  		if d != 0 {
   390  			*target = d
   391  		}
   392  	}
   393  	maybeOverrideDuration(config.ElectionTimeout, &raftConfig.ElectionTimeout)
   394  	maybeOverrideDuration(config.HeartbeatTimeout, &raftConfig.HeartbeatTimeout)
   395  	maybeOverrideDuration(config.LeaderLeaseTimeout, &raftConfig.LeaderLeaseTimeout)
   396  
   397  	if err := raft.ValidateConfig(raftConfig); err != nil {
   398  		return nil, errors.Annotate(err, "validating raft config")
   399  	}
   400  	return raftConfig, nil
   401  }
   402  
   403  // NewLogStore opens a boltDB logstore in the specified directory. If
   404  // the directory doesn't already exist it'll be created.
   405  func NewLogStore(dir string) (*raftboltdb.BoltStore, error) {
   406  	if err := os.MkdirAll(dir, 0700); err != nil {
   407  		return nil, errors.Trace(err)
   408  	}
   409  	logs, err := raftboltdb.New(raftboltdb.Options{
   410  		Path: filepath.Join(dir, "logs"),
   411  	})
   412  	if err != nil {
   413  		return nil, errors.Annotate(err, "failed to create bolt store for raft logs")
   414  	}
   415  	return logs, nil
   416  }
   417  
   418  // NewSnapshotStore opens a file-based snapshot store in the specified
   419  // directory. If the directory doesn't exist it'll be created.
   420  func NewSnapshotStore(
   421  	dir string,
   422  	retain int,
   423  	logger Logger,
   424  ) (raft.SnapshotStore, error) {
   425  	const logPrefix = "[snapshot] "
   426  	if err := os.MkdirAll(dir, 0700); err != nil {
   427  		return nil, errors.Trace(err)
   428  	}
   429  	logWriter := &raftutil.LoggoWriter{logger, loggo.DEBUG}
   430  	logLogger := log.New(logWriter, logPrefix, 0)
   431  
   432  	snaps, err := raft.NewFileSnapshotStoreWithLogger(dir, retain, logLogger)
   433  	if err != nil {
   434  		return nil, errors.Annotate(err, "failed to create file snapshot store")
   435  	}
   436  	return snaps, nil
   437  }
   438  
   439  // BootstrapFSM is a minimal implementation of raft.FSM for use during
   440  // bootstrap. Its methods should never be invoked.
   441  type BootstrapFSM struct{}
   442  
   443  // Apply is part of raft.FSM.
   444  func (BootstrapFSM) Apply(log *raft.Log) interface{} {
   445  	panic("Apply should not be called during bootstrap")
   446  }
   447  
   448  // Snapshot is part of raft.FSM.
   449  func (BootstrapFSM) Snapshot() (raft.FSMSnapshot, error) {
   450  	panic("Snapshot should not be called during bootstrap")
   451  }
   452  
   453  // Restore is part of raft.FSM.
   454  func (BootstrapFSM) Restore(io.ReadCloser) error {
   455  	panic("Restore should not be called during bootstrap")
   456  }