github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/worker.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package raft 5 6 import ( 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "time" 12 13 humanize "github.com/dustin/go-humanize" 14 "github.com/hashicorp/raft" 15 "github.com/hashicorp/raft-boltdb" 16 "github.com/juju/clock" 17 "github.com/juju/errors" 18 "github.com/juju/loggo" 19 "github.com/prometheus/client_golang/prometheus" 20 "gopkg.in/juju/worker.v1" 21 "gopkg.in/juju/worker.v1/catacomb" 22 23 "github.com/juju/juju/worker/raft/raftutil" 24 ) 25 26 const ( 27 // defaultSnapshotRetention is the number of 28 // snapshots to retain on disk by default. 29 defaultSnapshotRetention = 2 30 31 // bootstrapAddress is the raft server address 32 // configured for the bootstrap node. This address 33 // will be replaced once the raftclusterer worker 34 // observes an address for the server. 35 bootstrapAddress raft.ServerAddress = "localhost" 36 37 // LoopTimeout is the max time we will wait until the raft object 38 // is constructed and the main loop is started. This is to avoid 39 // hard-to-debug problems where the transport hung and so this 40 // worker wasn't really started even though it seemed like it 41 // was. If it crashes instead the logging will give a path to the 42 // problem. 43 LoopTimeout = 1 * time.Minute 44 45 // noLeaderTimeout is how long a follower will wait for contact 46 // from the leader before restarting. This allows us to see config 47 // changes (force-appended by the raft-backstop worker) to allow 48 // us to become voting again if the leader was removed leaving a 49 // 2-node cluster without quorum. 50 noLeaderTimeout = 1 * time.Minute 51 52 // noLeaderFrequency is how long the raft worker wait between 53 // checking whether it's in contact with the leader. 54 noLeaderFrequency = 10 * time.Second 55 ) 56 57 var ( 58 // ErrWorkerStopped is returned by Worker.Raft if the 59 // worker has been explicitly stopped. 60 ErrWorkerStopped = errors.New("raft worker stopped") 61 62 // ErrStartTimeout is returned by NewWorker if the worker loop 63 // didn't start within LoopTimeout. 64 ErrStartTimeout = errors.New("timed out waiting for worker loop") 65 66 // ErrNoLeaderTimeout is returned by the worker loop if we've gone 67 // too long without contact from the leader. It gives the worker a 68 // chance to see any configuration changes the backstop worker 69 // might have force-appended to the raft log. 70 ErrNoLeaderTimeout = errors.New("timed out waiting for leader contact") 71 ) 72 73 // Logger represents the logging methods called. 74 type Logger interface { 75 Warningf(message string, args ...interface{}) 76 Errorf(message string, args ...interface{}) 77 Logf(level loggo.Level, message string, args ...interface{}) 78 } 79 80 // Config is the configuration required for running a raft worker. 81 type Config struct { 82 // FSM is the raft.FSM to use for this raft worker. This 83 // must be non-nil for NewWorker, and nil for Bootstrap. 84 FSM raft.FSM 85 86 // Logger is the logger for this worker. 87 Logger Logger 88 89 // StorageDir is the directory in which to store raft 90 // artifacts: logs, snapshots, etc. It is expected that 91 // this directory is under the full control of the raft 92 // worker. 93 StorageDir string 94 95 // LocalID is the raft.ServerID of this worker. 96 LocalID raft.ServerID 97 98 // Transport is the raft.Transport to use for communication 99 // between raft servers. This must be non-nil for NewWorker, 100 // and nil for Bootstrap. 101 // 102 // The raft worker expects the server address to exactly 103 // match the server ID, which is the stringified agent tag. 104 // The transport internally maps the server address to one 105 // or more network addresses, i.e. by looking up the API 106 // connection information in the state database. 107 Transport raft.Transport 108 109 // Clock is used for timeouts in the worker (although not inside 110 // raft). 111 Clock clock.Clock 112 113 // NoLeaderTimeout, if non-zero, will override the default 114 // timeout for leader contact before restarting. 115 NoLeaderTimeout time.Duration 116 117 // ElectionTimeout, if non-zero, will override the default 118 // raft election timeout. 119 ElectionTimeout time.Duration 120 121 // HeartbeatTimeout, if non-zero, will override the default 122 // raft heartbeat timeout. 123 HeartbeatTimeout time.Duration 124 125 // LeaderLeaseTimeout, if non-zero, will override the default 126 // raft leader lease timeout. 127 LeaderLeaseTimeout time.Duration 128 129 // SnapshotRetention is the non-negative number of snapshots 130 // to retain on disk. If zero, defaults to 2. 131 SnapshotRetention int 132 133 // PrometheusRegisterer is used to register the raft metrics. 134 PrometheusRegisterer prometheus.Registerer 135 } 136 137 // Validate validates the raft worker configuration. 138 func (config Config) Validate() error { 139 if config.FSM == nil { 140 return errors.NotValidf("nil FSM") 141 } 142 if config.Logger == nil { 143 return errors.NotValidf("nil Logger") 144 } 145 if config.StorageDir == "" { 146 return errors.NotValidf("empty StorageDir") 147 } 148 if config.LocalID == "" { 149 return errors.NotValidf("empty LocalID") 150 } 151 if config.SnapshotRetention < 0 { 152 return errors.NotValidf("negative SnapshotRetention") 153 } 154 if config.Transport == nil { 155 return errors.NotValidf("nil Transport") 156 } 157 if config.Clock == nil { 158 return errors.NotValidf("nil Clock") 159 } 160 return nil 161 } 162 163 // Bootstrap bootstraps the raft cluster, using the given configuration. 164 // 165 // This is only to be called once, at the beginning of the raft cluster's 166 // lifetime, by the bootstrap machine agent. 167 func Bootstrap(config Config) error { 168 if config.FSM != nil { 169 return errors.NotValidf("non-nil FSM during Bootstrap") 170 } 171 if config.Transport != nil { 172 return errors.NotValidf("non-nil Transport during Bootstrap") 173 } 174 175 // During bootstrap we use an in-memory transport. We just need 176 // to make sure we use the same local address as we'll use later. 177 _, transport := raft.NewInmemTransport(bootstrapAddress) 178 defer transport.Close() 179 config.Transport = transport 180 181 // During bootstrap, we do not require an FSM. 182 config.FSM = BootstrapFSM{} 183 184 w, err := newWorker(config) 185 if err != nil { 186 return errors.Trace(err) 187 } 188 defer worker.Stop(w) 189 190 r, err := w.Raft() 191 if err != nil { 192 return errors.Trace(err) 193 } 194 195 if err := r.BootstrapCluster(raft.Configuration{ 196 Servers: []raft.Server{{ 197 ID: config.LocalID, 198 Address: bootstrapAddress, 199 }}, 200 }).Error(); err != nil { 201 return errors.Annotate(err, "bootstrapping raft cluster") 202 } 203 return errors.Annotate(worker.Stop(w), "stopping bootstrap raft worker") 204 } 205 206 // NewWorker returns a new raft worker, with the given configuration. 207 func NewWorker(config Config) (worker.Worker, error) { 208 return newWorker(config) 209 } 210 211 func newWorker(config Config) (*Worker, error) { 212 if err := config.Validate(); err != nil { 213 return nil, errors.Trace(err) 214 } 215 if config.NoLeaderTimeout == 0 { 216 config.NoLeaderTimeout = noLeaderTimeout 217 } 218 raftConfig, err := NewRaftConfig(config) 219 if err != nil { 220 return nil, errors.Trace(err) 221 } 222 w := &Worker{ 223 config: config, 224 raftCh: make(chan *raft.Raft), 225 logStoreCh: make(chan raft.LogStore), 226 } 227 if err := catacomb.Invoke(catacomb.Plan{ 228 Site: &w.catacomb, 229 Work: func() error { 230 return w.loop(raftConfig) 231 }, 232 }); err != nil { 233 return nil, errors.Trace(err) 234 } 235 // Wait for the loop to be started. 236 select { 237 case <-config.Clock.After(LoopTimeout): 238 w.catacomb.Kill(ErrStartTimeout) 239 return nil, ErrStartTimeout 240 case <-w.raftCh: 241 } 242 return w, nil 243 } 244 245 // Worker is a worker that manages a raft.Raft instance. 246 type Worker struct { 247 catacomb catacomb.Catacomb 248 config Config 249 250 raftCh chan *raft.Raft 251 logStoreCh chan raft.LogStore 252 } 253 254 // Raft returns the raft.Raft managed by this worker, or 255 // an error if the worker has stopped. 256 func (w *Worker) Raft() (*raft.Raft, error) { 257 select { 258 case <-w.catacomb.Dying(): 259 err := w.catacomb.Err() 260 if err != nil { 261 return nil, err 262 } 263 return nil, ErrWorkerStopped 264 case raft := <-w.raftCh: 265 return raft, nil 266 } 267 } 268 269 // LogStore returns the raft.LogStore managed by this worker, or 270 // an error if the worker has stopped. 271 func (w *Worker) LogStore() (raft.LogStore, error) { 272 select { 273 case <-w.catacomb.Dying(): 274 err := w.catacomb.Err() 275 if err != nil { 276 return nil, err 277 } 278 return nil, ErrWorkerStopped 279 case logStore := <-w.logStoreCh: 280 return logStore, nil 281 } 282 } 283 284 // Kill is part of the worker.Worker interface. 285 func (w *Worker) Kill() { 286 w.catacomb.Kill(nil) 287 } 288 289 // Wait is part of the worker.Worker interface. 290 func (w *Worker) Wait() error { 291 return w.catacomb.Wait() 292 } 293 294 func (w *Worker) loop(raftConfig *raft.Config) (loopErr error) { 295 // Register the metrics. 296 if w.config.PrometheusRegisterer != nil { 297 registerMetrics(w.config.PrometheusRegisterer, w.config.Logger) 298 } 299 300 rawLogStore, err := NewLogStore(w.config.StorageDir) 301 if err != nil { 302 return errors.Trace(err) 303 } 304 // We need to make sure access to the LogStore methods (+ closing) 305 // is synchronised, but we don't need to synchronise the 306 // StableStore methods, because we aren't giving out a reference 307 // to the StableStore - only the raft instance uses it. 308 logStore := &syncLogStore{store: rawLogStore} 309 defer logStore.Close() 310 311 snapshotRetention := w.config.SnapshotRetention 312 if snapshotRetention == 0 { 313 snapshotRetention = defaultSnapshotRetention 314 } 315 snapshotStore, err := NewSnapshotStore(w.config.StorageDir, snapshotRetention, w.config.Logger) 316 if err != nil { 317 return errors.Trace(err) 318 } 319 320 r, err := raft.NewRaft(raftConfig, w.config.FSM, logStore, rawLogStore, snapshotStore, w.config.Transport) 321 if err != nil { 322 return errors.Trace(err) 323 } 324 defer func() { 325 if err := r.Shutdown().Error(); err != nil { 326 if loopErr == nil { 327 loopErr = err 328 } else { 329 w.config.Logger.Warningf("raft shutdown failed: %s", err) 330 } 331 } 332 }() 333 334 shutdown := make(chan raft.Observation) 335 observer := raft.NewObserver(shutdown, true, func(o *raft.Observation) bool { 336 return o.Data == raft.Shutdown 337 }) 338 r.RegisterObserver(observer) 339 defer r.DeregisterObserver(observer) 340 341 // Every 10 seconds we check whether the no-leader timeout should 342 // trip. 343 noLeaderCheck := w.config.Clock.After(noLeaderFrequency) 344 lastContact := w.config.Clock.Now() 345 346 for { 347 select { 348 case <-w.catacomb.Dying(): 349 return w.catacomb.ErrDying() 350 case <-shutdown: 351 // The raft server shutdown without this worker 352 // telling it to do so. This typically means that 353 // the local node was removed from the cluster 354 // configuration, causing it to shutdown. 355 return errors.New("raft shutdown") 356 case now := <-noLeaderCheck: 357 noLeaderCheck = w.config.Clock.After(noLeaderFrequency) 358 if r.State() == raft.Leader { 359 lastContact = now 360 continue 361 } 362 var zeroTime time.Time 363 if latest := r.LastContact(); latest != zeroTime { 364 lastContact = latest 365 } 366 if now.After(lastContact.Add(w.config.NoLeaderTimeout)) { 367 w.config.Logger.Errorf("last leader contact earlier than %s", humanize.Time(lastContact)) 368 return ErrNoLeaderTimeout 369 } 370 case w.raftCh <- r: 371 case w.logStoreCh <- logStore: 372 } 373 } 374 } 375 376 // NewRaftConfig makes a raft config struct from the worker config 377 // struct passed in. 378 func NewRaftConfig(config Config) (*raft.Config, error) { 379 raftConfig := raft.DefaultConfig() 380 raftConfig.LocalID = config.LocalID 381 // Having ShutdownOnRemove true means that the raft node also 382 // stops when it's demoted if it's the leader. 383 raftConfig.ShutdownOnRemove = false 384 385 logWriter := &raftutil.LoggoWriter{config.Logger, loggo.DEBUG} 386 raftConfig.Logger = log.New(logWriter, "", 0) 387 388 maybeOverrideDuration := func(d time.Duration, target *time.Duration) { 389 if d != 0 { 390 *target = d 391 } 392 } 393 maybeOverrideDuration(config.ElectionTimeout, &raftConfig.ElectionTimeout) 394 maybeOverrideDuration(config.HeartbeatTimeout, &raftConfig.HeartbeatTimeout) 395 maybeOverrideDuration(config.LeaderLeaseTimeout, &raftConfig.LeaderLeaseTimeout) 396 397 if err := raft.ValidateConfig(raftConfig); err != nil { 398 return nil, errors.Annotate(err, "validating raft config") 399 } 400 return raftConfig, nil 401 } 402 403 // NewLogStore opens a boltDB logstore in the specified directory. If 404 // the directory doesn't already exist it'll be created. 405 func NewLogStore(dir string) (*raftboltdb.BoltStore, error) { 406 if err := os.MkdirAll(dir, 0700); err != nil { 407 return nil, errors.Trace(err) 408 } 409 logs, err := raftboltdb.New(raftboltdb.Options{ 410 Path: filepath.Join(dir, "logs"), 411 }) 412 if err != nil { 413 return nil, errors.Annotate(err, "failed to create bolt store for raft logs") 414 } 415 return logs, nil 416 } 417 418 // NewSnapshotStore opens a file-based snapshot store in the specified 419 // directory. If the directory doesn't exist it'll be created. 420 func NewSnapshotStore( 421 dir string, 422 retain int, 423 logger Logger, 424 ) (raft.SnapshotStore, error) { 425 const logPrefix = "[snapshot] " 426 if err := os.MkdirAll(dir, 0700); err != nil { 427 return nil, errors.Trace(err) 428 } 429 logWriter := &raftutil.LoggoWriter{logger, loggo.DEBUG} 430 logLogger := log.New(logWriter, logPrefix, 0) 431 432 snaps, err := raft.NewFileSnapshotStoreWithLogger(dir, retain, logLogger) 433 if err != nil { 434 return nil, errors.Annotate(err, "failed to create file snapshot store") 435 } 436 return snaps, nil 437 } 438 439 // BootstrapFSM is a minimal implementation of raft.FSM for use during 440 // bootstrap. Its methods should never be invoked. 441 type BootstrapFSM struct{} 442 443 // Apply is part of raft.FSM. 444 func (BootstrapFSM) Apply(log *raft.Log) interface{} { 445 panic("Apply should not be called during bootstrap") 446 } 447 448 // Snapshot is part of raft.FSM. 449 func (BootstrapFSM) Snapshot() (raft.FSMSnapshot, error) { 450 panic("Snapshot should not be called during bootstrap") 451 } 452 453 // Restore is part of raft.FSM. 454 func (BootstrapFSM) Restore(io.ReadCloser) error { 455 panic("Restore should not be called during bootstrap") 456 }