github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/worker_test.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package raft_test 5 6 import ( 7 "log" 8 "time" 9 10 coreraft "github.com/hashicorp/raft" 11 "github.com/juju/clock" 12 "github.com/juju/clock/testclock" 13 "github.com/juju/loggo" 14 "github.com/juju/testing" 15 jc "github.com/juju/testing/checkers" 16 gc "gopkg.in/check.v1" 17 "gopkg.in/juju/worker.v1/workertest" 18 19 coretesting "github.com/juju/juju/testing" 20 "github.com/juju/juju/worker/raft" 21 "github.com/juju/juju/worker/raft/rafttest" 22 "github.com/juju/juju/worker/raft/raftutil" 23 ) 24 25 type workerFixture struct { 26 testing.IsolationSuite 27 fsm *raft.SimpleFSM 28 config raft.Config 29 } 30 31 func (s *workerFixture) SetUpTest(c *gc.C) { 32 s.IsolationSuite.SetUpTest(c) 33 s.fsm = &raft.SimpleFSM{} 34 s.config = raft.Config{ 35 FSM: s.fsm, 36 Logger: loggo.GetLogger("juju.worker.raft_test"), 37 StorageDir: c.MkDir(), 38 LocalID: "123", 39 Transport: s.newTransport("123"), 40 Clock: testclock.NewClock(time.Time{}), 41 } 42 } 43 44 func (s *workerFixture) newTransport(address coreraft.ServerAddress) *coreraft.InmemTransport { 45 _, transport := coreraft.NewInmemTransport(address) 46 s.AddCleanup(func(c *gc.C) { 47 c.Assert(transport.Close(), jc.ErrorIsNil) 48 }) 49 return transport 50 } 51 52 type WorkerValidationSuite struct { 53 workerFixture 54 } 55 56 var _ = gc.Suite(&WorkerValidationSuite{}) 57 58 func (s *WorkerValidationSuite) TestValidateErrors(c *gc.C) { 59 type test struct { 60 f func(*raft.Config) 61 expect string 62 } 63 tests := []test{{ 64 func(cfg *raft.Config) { cfg.FSM = nil }, 65 "nil FSM not valid", 66 }, { 67 func(cfg *raft.Config) { cfg.Logger = nil }, 68 "nil Logger not valid", 69 }, { 70 func(cfg *raft.Config) { cfg.StorageDir = "" }, 71 "empty StorageDir not valid", 72 }, { 73 func(cfg *raft.Config) { cfg.LocalID = "" }, 74 "empty LocalID not valid", 75 }, { 76 func(cfg *raft.Config) { cfg.HeartbeatTimeout = time.Millisecond }, 77 "validating raft config: Heartbeat timeout is too low", 78 }, { 79 func(cfg *raft.Config) { cfg.Transport = nil }, 80 "nil Transport not valid", 81 }, { 82 func(cfg *raft.Config) { cfg.Clock = nil }, 83 "nil Clock not valid", 84 }} 85 for i, test := range tests { 86 c.Logf("test #%d (%s)", i, test.expect) 87 s.testValidateError(c, test.f, test.expect) 88 } 89 } 90 91 func (s *WorkerValidationSuite) testValidateError(c *gc.C, f func(*raft.Config), expect string) { 92 config := s.config 93 f(&config) 94 w, err := raft.NewWorker(config) 95 if !c.Check(err, gc.NotNil) { 96 workertest.DirtyKill(c, w) 97 return 98 } 99 c.Check(w, gc.IsNil) 100 c.Check(err, gc.ErrorMatches, expect) 101 } 102 103 func (s *WorkerValidationSuite) TestBootstrapFSM(c *gc.C) { 104 s.config.Transport = nil 105 err := raft.Bootstrap(s.config) 106 c.Assert(err, gc.ErrorMatches, "non-nil FSM during Bootstrap not valid") 107 } 108 109 func (s *WorkerValidationSuite) TestBootstrapTransport(c *gc.C) { 110 s.config.FSM = nil 111 err := raft.Bootstrap(s.config) 112 c.Assert(err, gc.ErrorMatches, "non-nil Transport during Bootstrap not valid") 113 } 114 115 type WorkerSuite struct { 116 workerFixture 117 worker *raft.Worker 118 clock *testclock.Clock 119 } 120 121 var _ = gc.Suite(&WorkerSuite{}) 122 123 func (s *WorkerSuite) SetUpTest(c *gc.C) { 124 s.workerFixture.SetUpTest(c) 125 126 // Speed up the tests. 127 s.config.HeartbeatTimeout = 100 * time.Millisecond 128 s.config.ElectionTimeout = s.config.HeartbeatTimeout 129 s.config.LeaderLeaseTimeout = s.config.HeartbeatTimeout 130 131 // Bootstrap before starting the worker. 132 transport := s.config.Transport 133 fsm := s.config.FSM 134 s.config.Transport = nil 135 s.config.FSM = nil 136 err := raft.Bootstrap(s.config) 137 c.Assert(err, jc.ErrorIsNil) 138 139 // Make a new clock so the waits from the bootstrap aren't hanging 140 // around. Use time.Now() as the start so the time can be compared 141 // to raft.LastContact(), which unfortunately uses wallclock time. 142 s.clock = testclock.NewClock(time.Now()) 143 s.config.Clock = s.clock 144 s.config.NoLeaderTimeout = 4 * time.Second 145 146 s.config.Transport = transport 147 s.config.FSM = fsm 148 worker, err := raft.NewWorker(s.config) 149 c.Assert(err, jc.ErrorIsNil) 150 s.AddCleanup(func(c *gc.C) { 151 workertest.DirtyKill(c, worker) 152 }) 153 s.worker = worker.(*raft.Worker) 154 } 155 156 func (s *WorkerSuite) waitLeader(c *gc.C) *coreraft.Raft { 157 r, err := s.worker.Raft() 158 c.Assert(err, jc.ErrorIsNil) 159 c.Assert(r, gc.NotNil) 160 161 select { 162 case leader := <-r.LeaderCh(): 163 c.Assert(leader, jc.IsTrue) 164 case <-time.After(coretesting.LongWait): 165 c.Fatal("timed out waiting for leadership change") 166 } 167 return r 168 } 169 170 func (s *WorkerSuite) TestBootstrapAddress(c *gc.C) { 171 r := s.waitLeader(c) 172 173 f := r.GetConfiguration() 174 c.Assert(f.Error(), jc.ErrorIsNil) 175 c.Assert(f.Configuration().Servers, jc.DeepEquals, []coreraft.Server{{ 176 Suffrage: coreraft.Voter, 177 ID: "123", 178 Address: "localhost", 179 }}) 180 } 181 182 func (s *WorkerSuite) TestRaft(c *gc.C) { 183 r := s.waitLeader(c) 184 185 f := r.Apply([]byte("command1"), time.Minute) 186 c.Assert(f.Error(), jc.ErrorIsNil) 187 c.Assert(f.Index(), gc.Equals, uint64(3)) 188 c.Assert(f.Response(), gc.Equals, 1) 189 190 f = r.Apply([]byte("command2"), time.Minute) 191 c.Assert(f.Error(), jc.ErrorIsNil) 192 c.Assert(f.Index(), gc.Equals, uint64(4)) 193 c.Assert(f.Response(), gc.Equals, 2) 194 195 c.Assert(s.fsm.Logs(), jc.DeepEquals, [][]byte{ 196 []byte("command1"), 197 []byte("command2"), 198 }) 199 } 200 201 func (s *WorkerSuite) TestRaftWorkerStopped(c *gc.C) { 202 s.worker.Kill() 203 204 r, err := s.worker.Raft() 205 c.Assert(err, gc.Equals, raft.ErrWorkerStopped) 206 c.Assert(r, gc.IsNil) 207 } 208 209 func (s *WorkerSuite) TestRestoreSnapshot(c *gc.C) { 210 r := s.waitLeader(c) 211 212 f := r.Apply([]byte("command1"), time.Minute) 213 c.Assert(f.Error(), jc.ErrorIsNil) 214 c.Assert(f.Index(), gc.Equals, uint64(3)) 215 c.Assert(f.Response(), gc.Equals, 1) 216 217 sf := r.Snapshot() 218 c.Assert(sf.Error(), jc.ErrorIsNil) 219 meta, rc, err := sf.Open() 220 c.Assert(err, jc.ErrorIsNil) 221 defer rc.Close() 222 223 f = r.Apply([]byte("command2"), time.Minute) 224 c.Assert(f.Error(), jc.ErrorIsNil) 225 c.Assert(f.Index(), gc.Equals, uint64(4)) 226 c.Assert(f.Response(), gc.Equals, 2) 227 228 err = r.Restore(meta, rc, time.Minute) 229 c.Assert(err, jc.ErrorIsNil) 230 c.Assert(s.fsm.Logs(), jc.DeepEquals, [][]byte{ 231 []byte("command1"), 232 }) 233 } 234 235 func (s *WorkerSuite) TestStartStop(c *gc.C) { 236 workertest.CleanKill(c, s.worker) 237 } 238 239 func (s *WorkerSuite) TestShutdownRaftKillsWorker(c *gc.C) { 240 r := s.waitLeader(c) 241 c.Assert(r.Shutdown().Error(), jc.ErrorIsNil) 242 243 err := workertest.CheckKilled(c, s.worker) 244 c.Assert(err, gc.ErrorMatches, "raft shutdown") 245 } 246 247 func (s *WorkerSuite) TestLogStore(c *gc.C) { 248 _, err := s.worker.LogStore() 249 c.Assert(err, jc.ErrorIsNil) 250 } 251 252 func (s *WorkerSuite) newRaft(c *gc.C, id coreraft.ServerID) ( 253 *coreraft.Raft, *coreraft.InmemTransport, 254 ) { 255 transport := s.newTransport("") 256 store := coreraft.NewInmemStore() 257 raftConfig := coreraft.DefaultConfig() 258 raftConfig.LocalID = id 259 raftConfig.HeartbeatTimeout = 100 * time.Millisecond 260 raftConfig.ElectionTimeout = raftConfig.HeartbeatTimeout 261 raftConfig.LeaderLeaseTimeout = raftConfig.HeartbeatTimeout 262 raftConfig.Logger = log.New(&raftutil.LoggoWriter{ 263 loggo.GetLogger("juju.worker.raft_test_" + string(id)), 264 loggo.DEBUG, 265 }, "", 0) 266 r, err := coreraft.NewRaft( 267 raftConfig, 268 &raft.SimpleFSM{}, 269 store, 270 store, 271 coreraft.NewInmemSnapshotStore(), 272 transport, 273 ) 274 c.Assert(err, jc.ErrorIsNil) 275 s.AddCleanup(func(c *gc.C) { 276 c.Assert(r.Shutdown().Error(), jc.ErrorIsNil) 277 }) 278 return r, transport 279 } 280 281 func (s *WorkerSuite) TestNoLeaderTimeout(c *gc.C) { 282 // Get the raft node into a state where it has no contact with the 283 // leader by adding 2 more nodes, demoting the local one so that 284 // it isn't the leader, then stopping the other nodes. 285 transport0 := s.config.Transport.(coreraft.LoopbackTransport) 286 raft1, transport1 := s.newRaft(c, "1") 287 raft2, transport2 := s.newRaft(c, "2") 288 connectTransports(transport0, transport1, transport2) 289 290 raft0 := s.waitLeader(c) 291 f1 := raft0.AddVoter("1", transport1.LocalAddr(), 0, 0) 292 f2 := raft0.AddVoter("2", transport2.LocalAddr(), 0, 0) 293 c.Assert(f1.Error(), jc.ErrorIsNil) 294 c.Assert(f2.Error(), jc.ErrorIsNil) 295 296 rafttest.CheckConfiguration(c, raft0, []coreraft.Server{{ 297 ID: "123", 298 Address: coreraft.ServerAddress("localhost"), 299 Suffrage: coreraft.Voter, 300 }, { 301 ID: "1", 302 Address: transport1.LocalAddr(), 303 Suffrage: coreraft.Voter, 304 }, { 305 ID: "2", 306 Address: transport2.LocalAddr(), 307 Suffrage: coreraft.Voter, 308 }}) 309 310 f3 := raft0.DemoteVoter("123", 0, 0) 311 c.Assert(f3.Error(), jc.ErrorIsNil) 312 313 // Wait until raft0 isn't the leader anymore. 314 leader := true 315 for a := coretesting.LongAttempt.Start(); a.Next(); { 316 leader = raft0.Leader() == coreraft.ServerAddress("localhost") 317 if !leader { 318 break 319 } 320 } 321 c.Assert(leader, jc.IsFalse) 322 323 f4 := raft1.Shutdown() 324 f5 := raft2.Shutdown() 325 c.Assert(f4.Error(), jc.ErrorIsNil) 326 c.Assert(f5.Error(), jc.ErrorIsNil) 327 328 // Now advance time to trigger the timeout. There should be 2 329 // waits when we advance: 330 // * the loop timeout wait from starting the worker 331 // * the no leader timeout check in loop. 332 c.Assert(s.clock.WaitAdvance(10*time.Second, coretesting.LongWait, 2), jc.ErrorIsNil) 333 c.Assert(workertest.CheckKilled(c, s.worker), gc.Equals, raft.ErrNoLeaderTimeout) 334 } 335 336 // Connect the provided transport bidirectionally. 337 func connectTransports(transports ...coreraft.LoopbackTransport) { 338 for _, t1 := range transports { 339 for _, t2 := range transports { 340 if t1 == t2 { 341 continue 342 } 343 t1.Connect(t2.LocalAddr(), t2) 344 } 345 } 346 } 347 348 type WorkerTimeoutSuite struct { 349 workerFixture 350 } 351 352 var _ = gc.Suite(&WorkerTimeoutSuite{}) 353 354 func (s *WorkerTimeoutSuite) SetUpTest(c *gc.C) { 355 s.workerFixture.SetUpTest(c) 356 357 // Speed up the tests. 358 s.config.HeartbeatTimeout = 100 * time.Millisecond 359 s.config.ElectionTimeout = s.config.HeartbeatTimeout 360 s.config.LeaderLeaseTimeout = s.config.HeartbeatTimeout 361 362 // Bootstrap before starting the worker. 363 transport := s.config.Transport 364 fsm := s.config.FSM 365 s.config.Transport = nil 366 s.config.FSM = nil 367 err := raft.Bootstrap(s.config) 368 c.Assert(err, jc.ErrorIsNil) 369 370 s.config.Transport = transport 371 s.config.FSM = fsm 372 } 373 374 func (s *WorkerTimeoutSuite) TestNewWorkerTimesOut(c *gc.C) { 375 // If for some reason it takes a long time to create the Raft 376 // object we don't want to just hang - that can make it really 377 // hard to work out what's going on. Instead we should timeout if 378 // the raft loop doesn't get started. 379 testClock := testclock.NewClock(time.Time{}) 380 s.config.Clock = testClock 381 _, underlying := coreraft.NewInmemTransport("something") 382 s.config.Transport = &hangingTransport{ 383 Transport: underlying, 384 clock: testClock, 385 } 386 errChan := make(chan error) 387 go func() { 388 w, err := raft.NewWorker(s.config) 389 c.Check(w, gc.IsNil) 390 errChan <- err 391 }() 392 393 // We wait for the transport and the worker to be waiting for the 394 // clock, then we move it past the timeout. 395 err := testClock.WaitAdvance(2*raft.LoopTimeout, coretesting.LongWait, 2) 396 c.Assert(err, jc.ErrorIsNil) 397 398 select { 399 case err := <-errChan: 400 c.Assert(err, gc.ErrorMatches, "timed out waiting for worker loop") 401 case <-time.After(coretesting.LongWait): 402 c.Fatalf("timed out waiting for worker error") 403 } 404 } 405 406 type hangingTransport struct { 407 coreraft.Transport 408 clock clock.Clock 409 } 410 411 func (t *hangingTransport) LocalAddr() coreraft.ServerAddress { 412 <-t.clock.After(5 * raft.LoopTimeout) 413 return t.Transport.LocalAddr() 414 }