github.com/kaituanwang/hyperledger@v2.0.1+incompatible/orderer/consensus/etcdraft/chain_test.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft_test 8 9 import ( 10 "encoding/pem" 11 "fmt" 12 "io/ioutil" 13 "os" 14 "os/user" 15 "path" 16 "sync" 17 "time" 18 19 "code.cloudfoundry.org/clock/fakeclock" 20 "github.com/golang/protobuf/proto" 21 "github.com/hyperledger/fabric-protos-go/common" 22 "github.com/hyperledger/fabric-protos-go/orderer" 23 raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 24 "github.com/hyperledger/fabric/bccsp" 25 "github.com/hyperledger/fabric/bccsp/factory" 26 "github.com/hyperledger/fabric/bccsp/sw" 27 "github.com/hyperledger/fabric/common/crypto/tlsgen" 28 "github.com/hyperledger/fabric/common/flogging" 29 "github.com/hyperledger/fabric/orderer/common/cluster" 30 "github.com/hyperledger/fabric/orderer/consensus/etcdraft" 31 "github.com/hyperledger/fabric/orderer/consensus/etcdraft/mocks" 32 consensusmocks "github.com/hyperledger/fabric/orderer/consensus/mocks" 33 mockblockcutter "github.com/hyperledger/fabric/orderer/mocks/common/blockcutter" 34 "github.com/hyperledger/fabric/protoutil" 35 . "github.com/onsi/ginkgo" 36 . "github.com/onsi/gomega" 37 "github.com/onsi/gomega/types" 38 "github.com/pkg/errors" 39 "go.etcd.io/etcd/raft" 40 "go.etcd.io/etcd/raft/raftpb" 41 "go.uber.org/zap" 42 ) 43 44 const ( 45 interval = 100 * time.Millisecond 46 LongEventualTimeout = 10 * time.Second 47 48 // 10 is the default setting of ELECTION_TICK. 49 // We used to have a small number here (2) to reduce the time for test - we don't 50 // need to tick node 10 times to trigger election - however, we are using another 51 // mechanism to trigger it now which does not depend on time: send an artificial 52 // MsgTimeoutNow to node. 53 ELECTION_TICK = 10 54 HEARTBEAT_TICK = 1 55 ) 56 57 func init() { 58 factory.InitFactories(nil) 59 } 60 61 func mockOrderer(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig { 62 orderer := &mocks.OrdererConfig{} 63 orderer.BatchTimeoutReturns(batchTimeout) 64 orderer.ConsensusMetadataReturns(metadata) 65 return orderer 66 } 67 68 // for some test cases we chmod file/dir to test failures caused by exotic permissions. 69 // however this does not work if tests are running as root, i.e. in a container. 70 func skipIfRoot() { 71 u, err := user.Current() 72 Expect(err).NotTo(HaveOccurred()) 73 if u.Uid == "0" { 74 Skip("you are running test as root, there's no way to make files unreadable") 75 } 76 } 77 78 var _ = Describe("Chain", func() { 79 var ( 80 env *common.Envelope 81 channelID string 82 tlsCA tlsgen.CA 83 logger *flogging.FabricLogger 84 ) 85 86 BeforeEach(func() { 87 tlsCA, _ = tlsgen.NewCA() 88 channelID = "test-channel" 89 logger = flogging.NewFabricLogger(zap.NewExample()) 90 env = &common.Envelope{ 91 Payload: marshalOrPanic(&common.Payload{ 92 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 93 Data: []byte("TEST_MESSAGE"), 94 }), 95 } 96 }) 97 98 Describe("Single Raft node", func() { 99 var ( 100 configurator *mocks.FakeConfigurator 101 consenterMetadata *raftprotos.ConfigMetadata 102 consenters map[uint64]*raftprotos.Consenter 103 clock *fakeclock.FakeClock 104 opts etcdraft.Options 105 support *consensusmocks.FakeConsenterSupport 106 cutter *mockblockcutter.Receiver 107 storage *raft.MemoryStorage 108 observeC chan raft.SoftState 109 chain *etcdraft.Chain 110 dataDir string 111 walDir string 112 snapDir string 113 err error 114 fakeFields *fakeMetricsFields 115 cryptoProvider bccsp.BCCSP 116 ) 117 118 BeforeEach(func() { 119 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 120 Expect(err).NotTo(HaveOccurred()) 121 122 configurator = &mocks.FakeConfigurator{} 123 clock = fakeclock.NewFakeClock(time.Now()) 124 storage = raft.NewMemoryStorage() 125 126 dataDir, err = ioutil.TempDir("", "wal-") 127 Expect(err).NotTo(HaveOccurred()) 128 walDir = path.Join(dataDir, "wal") 129 snapDir = path.Join(dataDir, "snapshot") 130 131 observeC = make(chan raft.SoftState, 1) 132 133 support = &consensusmocks.FakeConsenterSupport{} 134 support.ChannelIDReturns(channelID) 135 consenterMetadata = createMetadata(1, tlsCA) 136 support.SharedConfigReturns(mockOrderer(time.Hour, marshalOrPanic(consenterMetadata))) 137 138 cutter = mockblockcutter.NewReceiver() 139 support.BlockCutterReturns(cutter) 140 141 // for block creator initialization 142 support.HeightReturns(1) 143 support.BlockReturns(getSeedBlock()) 144 145 meta := &raftprotos.BlockMetadata{ 146 ConsenterIds: make([]uint64, len(consenterMetadata.Consenters)), 147 NextConsenterId: 1, 148 } 149 150 for i := range meta.ConsenterIds { 151 meta.ConsenterIds[i] = meta.NextConsenterId 152 meta.NextConsenterId++ 153 } 154 155 consenters = map[uint64]*raftprotos.Consenter{} 156 for i, c := range consenterMetadata.Consenters { 157 consenters[meta.ConsenterIds[i]] = c 158 } 159 160 fakeFields = newFakeMetricsFields() 161 162 opts = etcdraft.Options{ 163 RaftID: 1, 164 Clock: clock, 165 TickInterval: interval, 166 ElectionTick: ELECTION_TICK, 167 HeartbeatTick: HEARTBEAT_TICK, 168 MaxSizePerMsg: 1024 * 1024, 169 MaxInflightBlocks: 256, 170 BlockMetadata: meta, 171 Consenters: consenters, 172 Logger: logger, 173 MemoryStorage: storage, 174 WALDir: walDir, 175 SnapDir: snapDir, 176 Metrics: newFakeMetrics(fakeFields), 177 } 178 }) 179 180 campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) { 181 Eventually(func() <-chan raft.SoftState { 182 c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow})}, 0) 183 return observeC 184 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 185 } 186 187 JustBeforeEach(func() { 188 chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 189 Expect(err).NotTo(HaveOccurred()) 190 191 chain.Start() 192 193 // When the Raft node bootstraps, it produces a ConfChange 194 // to add itself, which needs to be consumed with Ready(). 195 // If there are pending configuration changes in raft, 196 // it refuses to campaign, no matter how many ticks elapse. 197 // This is not a problem in the production code because raft.Ready 198 // will be consumed eventually, as the wall clock advances. 199 // 200 // However, this is problematic when using the fake clock and 201 // artificial ticks. Instead of ticking raft indefinitely until 202 // raft.Ready is consumed, this check is added to indirectly guarantee 203 // that the first ConfChange is actually consumed and we can safely 204 // proceed to tick the Raft FSM. 205 Eventually(func() error { 206 _, err := storage.Entries(1, 1, 1) 207 return err 208 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 209 }) 210 211 AfterEach(func() { 212 chain.Halt() 213 Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed()) 214 // Make sure no timer leak 215 Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 216 os.RemoveAll(dataDir) 217 }) 218 219 Context("when a node starts up", func() { 220 It("properly configures the communication layer", func() { 221 expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata) 222 Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1)) 223 _, arg2 := configurator.ConfigureArgsForCall(0) 224 Expect(arg2).To(Equal(expectedNodeConfig)) 225 }) 226 227 It("correctly sets the metrics labels and publishes requisite metrics", func() { 228 type withImplementers interface { 229 WithCallCount() int 230 WithArgsForCall(int) []string 231 } 232 metricsList := []withImplementers{ 233 fakeFields.fakeClusterSize, 234 fakeFields.fakeIsLeader, 235 fakeFields.fakeActiveNodes, 236 fakeFields.fakeCommittedBlockNumber, 237 fakeFields.fakeSnapshotBlockNumber, 238 fakeFields.fakeLeaderChanges, 239 fakeFields.fakeProposalFailures, 240 fakeFields.fakeDataPersistDuration, 241 fakeFields.fakeNormalProposalsReceived, 242 fakeFields.fakeConfigProposalsReceived, 243 } 244 for _, m := range metricsList { 245 Expect(m.WithCallCount()).To(Equal(1)) 246 Expect(func() string { 247 return m.WithArgsForCall(0)[1] 248 }()).To(Equal(channelID)) 249 } 250 251 Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1)) 252 Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1))) 253 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1)) 254 Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0))) 255 Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1)) 256 Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0))) 257 }) 258 }) 259 260 Context("when no Raft leader is elected", func() { 261 It("fails to order envelope", func() { 262 err := chain.Order(env, 0) 263 Expect(err).To(MatchError("no Raft leader")) 264 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 265 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 266 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0)) 267 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 268 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 269 }) 270 271 It("starts proactive campaign", func() { 272 // assert that even tick supplied are less than ELECTION_TIMEOUT, 273 // a leader can still be successfully elected. 274 for i := 0; i < ELECTION_TICK; i++ { 275 clock.Increment(interval) 276 time.Sleep(10 * time.Millisecond) 277 } 278 Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 279 }) 280 }) 281 282 Context("when Raft leader is elected", func() { 283 JustBeforeEach(func() { 284 campaign(chain, observeC) 285 }) 286 287 It("updates metrics upon leader election", func() { 288 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2)) 289 Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1))) 290 Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1)) 291 Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1))) 292 }) 293 294 It("fails to order envelope if chain is halted", func() { 295 chain.Halt() 296 err := chain.Order(env, 0) 297 Expect(err).To(MatchError("chain is stopped")) 298 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 299 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 300 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 301 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 302 }) 303 304 It("produces blocks following batch rules", func() { 305 close(cutter.Block) 306 307 By("cutting next batch directly") 308 cutter.CutNext = true 309 err := chain.Order(env, 0) 310 Expect(err).NotTo(HaveOccurred()) 311 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 312 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 313 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 314 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 315 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 316 317 // There are three calls to DataPersistDuration by now corresponding to the following three 318 // arriving on the Ready channel: 319 // 1. an EntryConfChange to let this node join the Raft cluster 320 // 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader 321 // 3. a block being committed 322 // The duration being emitted is zero since we don't tick the fake clock during this time 323 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3)) 324 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0))) 325 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0))) 326 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0))) 327 328 By("respecting batch timeout") 329 cutter.CutNext = false 330 timeout := time.Second 331 support.SharedConfigReturns(mockOrderer(timeout, nil)) 332 err = chain.Order(env, 0) 333 Expect(err).NotTo(HaveOccurred()) 334 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 335 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 336 337 clock.WaitForNWatchersAndIncrement(timeout, 2) 338 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 339 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 340 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 341 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4)) 342 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0))) 343 }) 344 345 It("does not reset timer for every envelope", func() { 346 close(cutter.Block) 347 348 timeout := time.Second 349 support.SharedConfigReturns(mockOrderer(timeout, nil)) 350 351 err := chain.Order(env, 0) 352 Expect(err).NotTo(HaveOccurred()) 353 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 354 355 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 356 357 err = chain.Order(env, 0) 358 Expect(err).NotTo(HaveOccurred()) 359 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2)) 360 361 // the second envelope should not reset the timer; it should 362 // therefore expire if we increment it by just timeout/2 363 clock.Increment(timeout / 2) 364 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 365 }) 366 367 It("does not write a block if halted before timeout", func() { 368 close(cutter.Block) 369 timeout := time.Second 370 support.SharedConfigReturns(mockOrderer(timeout, nil)) 371 372 err := chain.Order(env, 0) 373 Expect(err).NotTo(HaveOccurred()) 374 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 375 376 // wait for timer to start 377 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 378 379 chain.Halt() 380 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 381 }) 382 383 It("stops the timer if a batch is cut", func() { 384 close(cutter.Block) 385 386 timeout := time.Second 387 support.SharedConfigReturns(mockOrderer(timeout, nil)) 388 389 err := chain.Order(env, 0) 390 Expect(err).NotTo(HaveOccurred()) 391 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 392 393 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 394 395 By("force a batch to be cut before timer expires") 396 cutter.CutNext = true 397 err = chain.Order(env, 0) 398 Expect(err).NotTo(HaveOccurred()) 399 400 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 401 b, _ := support.WriteBlockArgsForCall(0) 402 Expect(b.Data.Data).To(HaveLen(2)) 403 Expect(cutter.CurBatch()).To(HaveLen(0)) 404 405 // this should start a fresh timer 406 cutter.CutNext = false 407 err = chain.Order(env, 0) 408 Expect(err).NotTo(HaveOccurred()) 409 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 410 411 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 412 Consistently(support.WriteBlockCallCount).Should(Equal(1)) 413 414 clock.Increment(timeout / 2) 415 416 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 417 b, _ = support.WriteBlockArgsForCall(1) 418 Expect(b.Data.Data).To(HaveLen(1)) 419 }) 420 421 It("cut two batches if incoming envelope does not fit into first batch", func() { 422 close(cutter.Block) 423 424 timeout := time.Second 425 support.SharedConfigReturns(mockOrderer(timeout, nil)) 426 427 err := chain.Order(env, 0) 428 Expect(err).NotTo(HaveOccurred()) 429 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 430 431 cutter.IsolatedTx = true 432 err = chain.Order(env, 0) 433 Expect(err).NotTo(HaveOccurred()) 434 435 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 436 }) 437 438 Context("revalidation", func() { 439 BeforeEach(func() { 440 close(cutter.Block) 441 442 timeout := time.Hour 443 support.SharedConfigReturns(mockOrderer(timeout, nil)) 444 support.SequenceReturns(1) 445 }) 446 447 It("enqueue if envelope is still valid", func() { 448 support.ProcessNormalMsgReturns(1, nil) 449 450 err := chain.Order(env, 0) 451 Expect(err).NotTo(HaveOccurred()) 452 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 453 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 454 }) 455 456 It("does not enqueue if envelope is not valid", func() { 457 support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid")) 458 459 err := chain.Order(env, 0) 460 Expect(err).NotTo(HaveOccurred()) 461 Consistently(cutter.CurBatch).Should(HaveLen(0)) 462 Consistently(clock.WatcherCount).Should(Equal(1)) 463 }) 464 }) 465 466 It("unblocks Errored if chain is halted", func() { 467 errorC := chain.Errored() 468 Expect(errorC).NotTo(BeClosed()) 469 chain.Halt() 470 Eventually(errorC, LongEventualTimeout).Should(BeClosed()) 471 }) 472 473 Describe("Config updates", func() { 474 var ( 475 configEnv *common.Envelope 476 configSeq uint64 477 ) 478 479 Context("when a type A config update comes", func() { 480 481 Context("for existing channel", func() { 482 483 // use to prepare the Orderer Values 484 BeforeEach(func() { 485 newValues := map[string]*common.ConfigValue{ 486 "BatchTimeout": { 487 Version: 1, 488 Value: marshalOrPanic(&orderer.BatchTimeout{ 489 Timeout: "3ms", 490 }), 491 }, 492 "ConsensusType": { 493 Version: 4, 494 }, 495 } 496 oldValues := map[string]*common.ConfigValue{ 497 "ConsensusType": { 498 Version: 4, 499 }, 500 } 501 configEnv = newConfigEnv(channelID, 502 common.HeaderType_CONFIG, 503 newConfigUpdateEnv(channelID, oldValues, newValues), 504 ) 505 configSeq = 0 506 }) // BeforeEach block 507 508 Context("without revalidation (i.e. correct config sequence)", func() { 509 510 Context("without pending normal envelope", func() { 511 It("should create a config block and no normal block", func() { 512 err := chain.Configure(configEnv, configSeq) 513 Expect(err).NotTo(HaveOccurred()) 514 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 515 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 516 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 517 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 518 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 519 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 520 }) 521 }) 522 523 Context("with pending normal envelope", func() { 524 It("should create a normal block and a config block", func() { 525 // We do not need to block the cutter from ordering in our test case and therefore close this channel. 526 close(cutter.Block) 527 528 By("adding a normal envelope") 529 err := chain.Order(env, 0) 530 Expect(err).NotTo(HaveOccurred()) 531 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 532 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 533 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 534 535 By("adding a config envelope") 536 err = chain.Configure(configEnv, configSeq) 537 Expect(err).NotTo(HaveOccurred()) 538 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 539 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 540 541 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 542 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 543 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 544 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 545 }) 546 }) 547 }) 548 549 Context("with revalidation (i.e. incorrect config sequence)", func() { 550 551 BeforeEach(func() { 552 close(cutter.Block) 553 support.SequenceReturns(1) // this causes the revalidation 554 }) 555 556 It("should create config block upon correct revalidation", func() { 557 support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation 558 559 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 560 Consistently(clock.WatcherCount).Should(Equal(1)) 561 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 562 }) 563 564 It("should not create config block upon incorrect revalidation", func() { 565 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 566 567 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 568 Consistently(clock.WatcherCount).Should(Equal(1)) 569 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock 570 }) 571 572 It("should not disturb current running timer upon incorrect revalidation", func() { 573 support.ProcessNormalMsgReturns(1, nil) 574 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 575 576 Expect(chain.Order(env, configSeq)).To(Succeed()) 577 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 578 579 clock.Increment(30 * time.Minute) 580 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 581 582 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 583 Consistently(clock.WatcherCount).Should(Equal(2)) 584 585 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 586 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) 587 588 clock.Increment(30 * time.Minute) 589 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 590 }) 591 }) 592 }) 593 594 Context("for creating a new channel", func() { 595 596 // use to prepare the Orderer Values 597 BeforeEach(func() { 598 chainID := "mychannel" 599 values := make(map[string]*common.ConfigValue) 600 configEnv = newConfigEnv(chainID, 601 common.HeaderType_CONFIG, 602 newConfigUpdateEnv(chainID, nil, values), 603 ) 604 configSeq = 0 605 }) // BeforeEach block 606 607 It("should be able to create a channel", func() { 608 err := chain.Configure(configEnv, configSeq) 609 Expect(err).NotTo(HaveOccurred()) 610 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 611 }) 612 }) 613 }) // Context block for type A config 614 615 Context("when a type B config update comes", func() { 616 Context("updating protocol values", func() { 617 // use to prepare the Orderer Values 618 BeforeEach(func() { 619 values := map[string]*common.ConfigValue{ 620 "ConsensusType": { 621 Version: 1, 622 Value: marshalOrPanic(&orderer.ConsensusType{ 623 Metadata: marshalOrPanic(consenterMetadata), 624 }), 625 }, 626 } 627 configEnv = newConfigEnv(channelID, 628 common.HeaderType_CONFIG, 629 newConfigUpdateEnv(channelID, nil, values)) 630 configSeq = 0 631 632 }) // BeforeEach block 633 634 It("should be able to process config update of type B", func() { 635 err := chain.Configure(configEnv, configSeq) 636 Expect(err).NotTo(HaveOccurred()) 637 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 638 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 639 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 640 }) 641 }) 642 643 Context("updating consenters set by exactly one node", func() { 644 It("should be able to process config update adding single node", func() { 645 metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata) 646 metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{ 647 Host: "localhost", 648 Port: 7050, 649 ServerTlsCert: serverTLSCert(tlsCA), 650 ClientTlsCert: clientTLSCert(tlsCA), 651 }) 652 653 values := map[string]*common.ConfigValue{ 654 "ConsensusType": { 655 Version: 1, 656 Value: marshalOrPanic(&orderer.ConsensusType{ 657 Metadata: marshalOrPanic(metadata), 658 }), 659 }, 660 } 661 configEnv = newConfigEnv(channelID, 662 common.HeaderType_CONFIG, 663 newConfigUpdateEnv(channelID, nil, values)) 664 configSeq = 0 665 666 err := chain.Configure(configEnv, configSeq) 667 Expect(err).NotTo(HaveOccurred()) 668 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 669 }) 670 671 }) 672 }) 673 }) 674 675 Describe("Crash Fault Tolerance", func() { 676 var ( 677 raftMetadata *raftprotos.BlockMetadata 678 ) 679 680 BeforeEach(func() { 681 raftMetadata = &raftprotos.BlockMetadata{ 682 ConsenterIds: []uint64{1}, 683 NextConsenterId: 2, 684 } 685 }) 686 687 Describe("when a chain is started with existing WAL", func() { 688 var ( 689 m1 *raftprotos.BlockMetadata 690 m2 *raftprotos.BlockMetadata 691 ) 692 JustBeforeEach(func() { 693 // to generate WAL data, we start a chain, 694 // order several envelopes and then halt the chain. 695 close(cutter.Block) 696 cutter.CutNext = true 697 698 // enque some data to be persisted on disk by raft 699 err := chain.Order(env, uint64(0)) 700 Expect(err).NotTo(HaveOccurred()) 701 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 702 703 _, metadata := support.WriteBlockArgsForCall(0) 704 m1 = &raftprotos.BlockMetadata{} 705 proto.Unmarshal(metadata, m1) 706 707 err = chain.Order(env, uint64(0)) 708 Expect(err).NotTo(HaveOccurred()) 709 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 710 711 _, metadata = support.WriteBlockArgsForCall(1) 712 m2 = &raftprotos.BlockMetadata{} 713 proto.Unmarshal(metadata, m2) 714 715 chain.Halt() 716 }) 717 718 It("replays blocks from committed entries", func() { 719 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 720 c.init() 721 c.Start() 722 defer c.Halt() 723 724 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 725 726 _, metadata := c.support.WriteBlockArgsForCall(0) 727 m := &raftprotos.BlockMetadata{} 728 proto.Unmarshal(metadata, m) 729 Expect(m.RaftIndex).To(Equal(m1.RaftIndex)) 730 731 _, metadata = c.support.WriteBlockArgsForCall(1) 732 m = &raftprotos.BlockMetadata{} 733 proto.Unmarshal(metadata, m) 734 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 735 736 // chain should keep functioning 737 campaign(c.Chain, c.observe) 738 739 c.cutter.CutNext = true 740 741 err := c.Order(env, uint64(0)) 742 Expect(err).NotTo(HaveOccurred()) 743 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 744 745 }) 746 747 It("only replays blocks after Applied index", func() { 748 raftMetadata.RaftIndex = m1.RaftIndex 749 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 750 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 751 752 c.init() 753 c.Start() 754 defer c.Halt() 755 756 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 757 758 _, metadata := c.support.WriteBlockArgsForCall(1) 759 m := &raftprotos.BlockMetadata{} 760 proto.Unmarshal(metadata, m) 761 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 762 763 // chain should keep functioning 764 campaign(c.Chain, c.observe) 765 766 c.cutter.CutNext = true 767 768 err := c.Order(env, uint64(0)) 769 Expect(err).NotTo(HaveOccurred()) 770 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 771 }) 772 773 It("does not replay any block if already in sync", func() { 774 raftMetadata.RaftIndex = m2.RaftIndex 775 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 776 c.init() 777 c.Start() 778 defer c.Halt() 779 780 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 781 782 // chain should keep functioning 783 campaign(c.Chain, c.observe) 784 785 c.cutter.CutNext = true 786 787 err := c.Order(env, uint64(0)) 788 Expect(err).NotTo(HaveOccurred()) 789 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 790 }) 791 792 Context("WAL file is not readable", func() { 793 It("fails to load wal", func() { 794 skipIfRoot() 795 796 files, err := ioutil.ReadDir(walDir) 797 Expect(err).NotTo(HaveOccurred()) 798 for _, f := range files { 799 os.Chmod(path.Join(walDir, f.Name()), 0300) 800 } 801 802 c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 803 Expect(c).To(BeNil()) 804 Expect(err).To(MatchError(ContainSubstring("permission denied"))) 805 }) 806 }) 807 }) 808 809 Describe("when snapshotting is enabled (snapshot interval is not zero)", func() { 810 var ( 811 ledgerLock sync.Mutex 812 ledger map[uint64]*common.Block 813 ) 814 815 countFiles := func() int { 816 files, err := ioutil.ReadDir(snapDir) 817 Expect(err).NotTo(HaveOccurred()) 818 return len(files) 819 } 820 821 BeforeEach(func() { 822 opts.SnapshotCatchUpEntries = 2 823 824 close(cutter.Block) 825 cutter.CutNext = true 826 827 ledgerLock.Lock() 828 ledger = map[uint64]*common.Block{ 829 0: getSeedBlock(), // genesis block 830 } 831 ledgerLock.Unlock() 832 833 support.WriteBlockStub = func(block *common.Block, meta []byte) { 834 b := proto.Clone(block).(*common.Block) 835 836 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 837 Expect(err).NotTo(HaveOccurred()) 838 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 839 840 ledgerLock.Lock() 841 defer ledgerLock.Unlock() 842 ledger[b.Header.Number] = b 843 } 844 845 support.HeightStub = func() uint64 { 846 ledgerLock.Lock() 847 defer ledgerLock.Unlock() 848 return uint64(len(ledger)) 849 } 850 }) 851 852 Context("Small SnapshotInterval", func() { 853 BeforeEach(func() { 854 opts.SnapshotIntervalSize = 1 855 }) 856 857 It("writes snapshot file to snapDir", func() { 858 // Scenario: start a chain with SnapInterval = 1 byte, expect it to take 859 // one snapshot for each block 860 861 i, _ := opts.MemoryStorage.FirstIndex() 862 863 Expect(chain.Order(env, uint64(0))).To(Succeed()) 864 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 865 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 866 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 867 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call 868 s, _ := opts.MemoryStorage.Snapshot() 869 b := protoutil.UnmarshalBlockOrPanic(s.Data) 870 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number))) 871 872 i, _ = opts.MemoryStorage.FirstIndex() 873 874 Expect(chain.Order(env, uint64(0))).To(Succeed()) 875 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 876 877 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 878 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 879 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call 880 s, _ = opts.MemoryStorage.Snapshot() 881 b = protoutil.UnmarshalBlockOrPanic(s.Data) 882 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number))) 883 }) 884 885 It("pauses chain if sync is in progress", func() { 886 // Scenario: 887 // after a snapshot is taken, reboot chain with raftIndex = 0 888 // chain should attempt to sync upon reboot, and blocks on 889 // `WaitReady` API 890 891 i, _ := opts.MemoryStorage.FirstIndex() 892 893 Expect(chain.Order(env, uint64(0))).To(Succeed()) 894 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 895 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 896 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 897 898 i, _ = opts.MemoryStorage.FirstIndex() 899 900 Expect(chain.Order(env, uint64(0))).To(Succeed()) 901 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 902 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 903 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 904 905 chain.Halt() 906 907 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 908 c.init() 909 910 signal := make(chan struct{}) 911 912 c.puller.PullBlockStub = func(i uint64) *common.Block { 913 <-signal // blocking for assertions 914 ledgerLock.Lock() 915 defer ledgerLock.Unlock() 916 if i >= uint64(len(ledger)) { 917 return nil 918 } 919 920 return ledger[i] 921 } 922 923 err := c.WaitReady() 924 Expect(err).To(MatchError("chain is not started")) 925 926 c.Start() 927 defer c.Halt() 928 929 // pull block is called, so chain should be catching up now, WaitReady should block 930 signal <- struct{}{} 931 932 done := make(chan error) 933 go func() { 934 done <- c.WaitReady() 935 }() 936 937 Consistently(done).ShouldNot(Receive()) 938 close(signal) // unblock block puller 939 Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked 940 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 941 }) 942 943 It("restores snapshot w/o extra entries", func() { 944 // Scenario: 945 // after a snapshot is taken, no more entries are appended. 946 // then node is restarted, it loads snapshot, finds its term 947 // and index. While replaying WAL to memory storage, it should 948 // not append any entry because no extra entry was appended 949 // after snapshot was taken. 950 951 Expect(chain.Order(env, uint64(0))).To(Succeed()) 952 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 953 _, metadata := support.WriteBlockArgsForCall(0) 954 m := &raftprotos.BlockMetadata{} 955 proto.Unmarshal(metadata, m) 956 957 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 958 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 959 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 960 Expect(err).NotTo(HaveOccurred()) 961 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 962 Expect(err).NotTo(HaveOccurred()) 963 964 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 965 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 966 967 chain.Halt() 968 969 raftMetadata.RaftIndex = m.RaftIndex 970 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 971 c.opts.SnapshotIntervalSize = 1 972 973 c.init() 974 c.Start() 975 976 // following arithmetic reflects how etcdraft MemoryStorage is implemented 977 // when no entry is appended after snapshot being loaded. 978 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 979 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index)) 980 981 // chain keeps functioning 982 Eventually(func() <-chan raft.SoftState { 983 c.clock.Increment(interval) 984 return c.observe 985 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 986 987 c.cutter.CutNext = true 988 err = c.Order(env, uint64(0)) 989 Expect(err).NotTo(HaveOccurred()) 990 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 991 992 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 993 c.Halt() 994 995 _, metadata = c.support.WriteBlockArgsForCall(0) 996 m = &raftprotos.BlockMetadata{} 997 proto.Unmarshal(metadata, m) 998 raftMetadata.RaftIndex = m.RaftIndex 999 cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 1000 1001 cx.init() 1002 cx.Start() 1003 defer cx.Halt() 1004 1005 // chain keeps functioning 1006 Eventually(func() <-chan raft.SoftState { 1007 cx.clock.Increment(interval) 1008 return cx.observe 1009 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1010 }) 1011 }) 1012 1013 Context("Large SnapshotInterval", func() { 1014 BeforeEach(func() { 1015 opts.SnapshotIntervalSize = 1024 1016 }) 1017 1018 It("restores snapshot w/ extra entries", func() { 1019 // Scenario: 1020 // after a snapshot is taken, more entries are appended. 1021 // then node is restarted, it loads snapshot, finds its term 1022 // and index. While replaying WAL to memory storage, it should 1023 // append some entries. 1024 1025 largeEnv := &common.Envelope{ 1026 Payload: marshalOrPanic(&common.Payload{ 1027 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1028 Data: make([]byte, 500), 1029 }), 1030 } 1031 1032 By("Ordering two large envelopes to trigger snapshot") 1033 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1034 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1035 1036 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1037 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1038 1039 _, metadata := support.WriteBlockArgsForCall(1) 1040 m := &raftprotos.BlockMetadata{} 1041 proto.Unmarshal(metadata, m) 1042 1043 // check snapshot does exit 1044 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1045 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 1046 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1047 Expect(err).NotTo(HaveOccurred()) 1048 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1049 Expect(err).NotTo(HaveOccurred()) 1050 1051 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1052 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1053 1054 By("Ordering another envlope to append new data to memory after snaphost") 1055 Expect(chain.Order(env, uint64(0))).To(Succeed()) 1056 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1057 1058 lasti, _ := opts.MemoryStorage.LastIndex() 1059 1060 chain.Halt() 1061 1062 raftMetadata.RaftIndex = m.RaftIndex 1063 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 1064 cnt := support.WriteBlockCallCount() 1065 for i := 0; i < cnt; i++ { 1066 c.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1067 } 1068 1069 By("Restarting the node") 1070 c.init() 1071 c.Start() 1072 defer c.Halt() 1073 1074 By("Checking latest index is larger than index in snapshot") 1075 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1076 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti)) 1077 }) 1078 1079 When("local ledger is in sync with snapshot", func() { 1080 It("does not pull blocks and still respects snapshot interval", func() { 1081 // Scenario: 1082 // - snapshot is taken at block 2 1083 // - order one more envelope (block 3) 1084 // - reboot chain at block 2 1085 // - block 3 should be replayed from wal 1086 // - order another envelope to trigger snapshot, containing block 3 & 4 1087 // Assertions: 1088 // - block puller should NOT be called 1089 // - chain should keep functioning after reboot 1090 // - chain should respect snapshot interval to trigger next snapshot 1091 1092 largeEnv := &common.Envelope{ 1093 Payload: marshalOrPanic(&common.Payload{ 1094 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1095 Data: make([]byte, 500), 1096 }), 1097 } 1098 1099 By("Ordering two large envelopes to trigger snapshot") 1100 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1101 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1102 1103 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1104 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1105 1106 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1107 1108 _, metadata := support.WriteBlockArgsForCall(1) 1109 m := &raftprotos.BlockMetadata{} 1110 proto.Unmarshal(metadata, m) 1111 1112 By("Cutting block [3]") 1113 // order another envelope. this should not trigger snapshot 1114 err = chain.Order(largeEnv, uint64(0)) 1115 Expect(err).NotTo(HaveOccurred()) 1116 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1117 1118 chain.Halt() 1119 1120 raftMetadata.RaftIndex = m.RaftIndex 1121 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 1122 // replay block 1&2 1123 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 1124 c.support.WriteBlock(support.WriteBlockArgsForCall(1)) 1125 1126 c.opts.SnapshotIntervalSize = 1024 1127 1128 By("Restarting node at block [2]") 1129 c.init() 1130 c.Start() 1131 defer c.Halt() 1132 1133 // elect leader 1134 campaign(c.Chain, c.observe) 1135 1136 By("Ordering one more block to trigger snapshot") 1137 c.cutter.CutNext = true 1138 err = c.Order(largeEnv, uint64(0)) 1139 Expect(err).NotTo(HaveOccurred()) 1140 1141 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4)) 1142 Expect(c.puller.PullBlockCallCount()).Should(BeZero()) 1143 // old snapshot file is retained 1144 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1145 }) 1146 }) 1147 1148 It("respects snapshot interval after reboot", func() { 1149 largeEnv := &common.Envelope{ 1150 Payload: marshalOrPanic(&common.Payload{ 1151 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1152 Data: make([]byte, 500), 1153 }), 1154 } 1155 1156 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1157 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1158 // check no snapshot is taken 1159 Consistently(countFiles).Should(Equal(0)) 1160 1161 _, metadata := support.WriteBlockArgsForCall(0) 1162 m := &raftprotos.BlockMetadata{} 1163 proto.Unmarshal(metadata, m) 1164 1165 chain.Halt() 1166 1167 raftMetadata.RaftIndex = m.RaftIndex 1168 c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider) 1169 cnt := support.WriteBlockCallCount() 1170 for i := 0; i < cnt; i++ { 1171 c1.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1172 } 1173 c1.cutter.CutNext = true 1174 c1.opts.SnapshotIntervalSize = 1024 1175 1176 By("Restarting chain") 1177 c1.init() 1178 c1.Start() 1179 // chain keeps functioning 1180 campaign(c1.Chain, c1.observe) 1181 1182 Expect(c1.Order(largeEnv, uint64(0))).To(Succeed()) 1183 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1184 // check snapshot does exit 1185 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1186 }) 1187 }) 1188 }) 1189 }) 1190 1191 Context("Invalid WAL dir", func() { 1192 var support = &consensusmocks.FakeConsenterSupport{} 1193 BeforeEach(func() { 1194 // for block creator initialization 1195 support.HeightReturns(1) 1196 support.BlockReturns(getSeedBlock()) 1197 }) 1198 1199 When("WAL dir is a file", func() { 1200 It("replaces file with fresh WAL dir", func() { 1201 f, err := ioutil.TempFile("", "wal-") 1202 Expect(err).NotTo(HaveOccurred()) 1203 defer os.RemoveAll(f.Name()) 1204 1205 chain, err := etcdraft.NewChain( 1206 support, 1207 etcdraft.Options{ 1208 WALDir: f.Name(), 1209 SnapDir: snapDir, 1210 Logger: logger, 1211 MemoryStorage: storage, 1212 BlockMetadata: &raftprotos.BlockMetadata{}, 1213 Metrics: newFakeMetrics(newFakeMetricsFields()), 1214 }, 1215 configurator, 1216 nil, 1217 cryptoProvider, 1218 nil, 1219 nil, 1220 observeC) 1221 Expect(chain).NotTo(BeNil()) 1222 Expect(err).NotTo(HaveOccurred()) 1223 1224 info, err := os.Stat(f.Name()) 1225 Expect(err).NotTo(HaveOccurred()) 1226 Expect(info.IsDir()).To(BeTrue()) 1227 }) 1228 }) 1229 1230 When("WAL dir is not writeable", func() { 1231 It("replace it with fresh WAL dir", func() { 1232 d, err := ioutil.TempDir("", "wal-") 1233 Expect(err).NotTo(HaveOccurred()) 1234 defer os.RemoveAll(d) 1235 1236 err = os.Chmod(d, 0500) 1237 Expect(err).NotTo(HaveOccurred()) 1238 1239 chain, err := etcdraft.NewChain( 1240 support, 1241 etcdraft.Options{ 1242 WALDir: d, 1243 SnapDir: snapDir, 1244 Logger: logger, 1245 MemoryStorage: storage, 1246 BlockMetadata: &raftprotos.BlockMetadata{}, 1247 Metrics: newFakeMetrics(newFakeMetricsFields()), 1248 }, 1249 nil, 1250 nil, 1251 cryptoProvider, 1252 noOpBlockPuller, 1253 nil, 1254 nil) 1255 Expect(chain).NotTo(BeNil()) 1256 Expect(err).NotTo(HaveOccurred()) 1257 }) 1258 }) 1259 1260 When("WAL parent dir is not writeable", func() { 1261 It("fails to bootstrap fresh raft node", func() { 1262 skipIfRoot() 1263 1264 d, err := ioutil.TempDir("", "wal-") 1265 Expect(err).NotTo(HaveOccurred()) 1266 defer os.RemoveAll(d) 1267 1268 err = os.Chmod(d, 0500) 1269 Expect(err).NotTo(HaveOccurred()) 1270 1271 chain, err := etcdraft.NewChain( 1272 support, 1273 etcdraft.Options{ 1274 WALDir: path.Join(d, "wal-dir"), 1275 SnapDir: snapDir, 1276 Logger: logger, 1277 BlockMetadata: &raftprotos.BlockMetadata{}, 1278 }, 1279 nil, 1280 nil, 1281 cryptoProvider, 1282 noOpBlockPuller, 1283 nil, 1284 nil) 1285 Expect(chain).To(BeNil()) 1286 Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir"))) 1287 }) 1288 }) 1289 }) 1290 }) 1291 }) 1292 1293 Describe("2-node Raft cluster", func() { 1294 var ( 1295 network *network 1296 channelID string 1297 timeout time.Duration 1298 dataDir string 1299 c1, c2 *chain 1300 raftMetadata *raftprotos.BlockMetadata 1301 consenters map[uint64]*raftprotos.Consenter 1302 configEnv *common.Envelope 1303 cryptoProvider bccsp.BCCSP 1304 ) 1305 BeforeEach(func() { 1306 var err error 1307 1308 channelID = "multi-node-channel" 1309 timeout = 10 * time.Second 1310 1311 dataDir, err = ioutil.TempDir("", "raft-test-") 1312 Expect(err).NotTo(HaveOccurred()) 1313 1314 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1315 Expect(err).NotTo(HaveOccurred()) 1316 1317 raftMetadata = &raftprotos.BlockMetadata{ 1318 ConsenterIds: []uint64{1, 2}, 1319 NextConsenterId: 3, 1320 } 1321 1322 consenters = map[uint64]*raftprotos.Consenter{ 1323 1: { 1324 Host: "localhost", 1325 Port: 7051, 1326 ClientTlsCert: clientTLSCert(tlsCA), 1327 ServerTlsCert: serverTLSCert(tlsCA), 1328 }, 1329 2: { 1330 Host: "localhost", 1331 Port: 7051, 1332 ClientTlsCert: clientTLSCert(tlsCA), 1333 ServerTlsCert: serverTLSCert(tlsCA), 1334 }, 1335 } 1336 1337 metadata := &raftprotos.ConfigMetadata{ 1338 Options: &raftprotos.Options{ 1339 TickInterval: "500ms", 1340 ElectionTick: 10, 1341 HeartbeatTick: 1, 1342 MaxInflightBlocks: 5, 1343 SnapshotIntervalSize: 200, 1344 }, 1345 Consenters: []*raftprotos.Consenter{consenters[2]}, 1346 } 1347 value := map[string]*common.ConfigValue{ 1348 "ConsensusType": { 1349 Version: 1, 1350 Value: marshalOrPanic(&orderer.ConsensusType{ 1351 Metadata: marshalOrPanic(metadata), 1352 }), 1353 }, 1354 } 1355 // prepare config update to remove 1 1356 configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1357 1358 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider) 1359 c1, c2 = network.chains[1], network.chains[2] 1360 c1.cutter.CutNext = true 1361 network.init() 1362 network.start() 1363 }) 1364 1365 AfterEach(func() { 1366 network.stop() 1367 network.exec(func(c *chain) { 1368 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1369 }) 1370 1371 os.RemoveAll(dataDir) 1372 }) 1373 1374 It("can remove leader by reconfiguring cluster", func() { 1375 network.elect(1) 1376 1377 // trigger status dissemination 1378 Eventually(func() int { 1379 c1.clock.Increment(interval) 1380 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1381 }, LongEventualTimeout).Should(Equal(2)) 1382 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1383 1384 By("Configuring cluster to remove node") 1385 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1386 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1387 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 1388 1389 Eventually(func() <-chan raft.SoftState { 1390 c2.clock.Increment(interval) 1391 return c2.observe 1392 }, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader))) 1393 1394 By("Asserting leader can still serve requests as single-node cluster") 1395 c2.cutter.CutNext = true 1396 Expect(c2.Order(env, 0)).To(Succeed()) 1397 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1398 }) 1399 1400 It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() { 1401 network.elect(1) 1402 1403 step1 := c1.getStepFunc() 1404 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1405 stepMsg := &raftpb.Message{} 1406 if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil { 1407 return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err) 1408 } 1409 1410 if stepMsg.Type == raftpb.MsgTimeoutNow { 1411 return nil 1412 } 1413 1414 return step1(dest, msg) 1415 }) 1416 1417 By("Configuring cluster to remove node") 1418 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1419 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1420 c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1421 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1422 1423 c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1424 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1425 close(c1.stopped) // mark c1 stopped in network 1426 1427 network.elect(2) 1428 1429 By("Asserting leader can still serve requests as single-node cluster") 1430 c2.cutter.CutNext = true 1431 Expect(c2.Order(env, 0)).To(Succeed()) 1432 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1433 }) 1434 1435 It("can remove follower by reconfiguring cluster", func() { 1436 network.elect(2) 1437 1438 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1439 network.exec(func(c *chain) { 1440 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1441 }) 1442 1443 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1444 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1445 1446 By("Asserting leader can still serve requests as single-node cluster") 1447 c2.cutter.CutNext = true 1448 Expect(c2.Order(env, 0)).To(Succeed()) 1449 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1450 }) 1451 }) 1452 1453 Describe("3-node Raft cluster", func() { 1454 var ( 1455 network *network 1456 channelID string 1457 timeout time.Duration 1458 dataDir string 1459 c1, c2, c3 *chain 1460 raftMetadata *raftprotos.BlockMetadata 1461 consenters map[uint64]*raftprotos.Consenter 1462 cryptoProvider bccsp.BCCSP 1463 ) 1464 1465 BeforeEach(func() { 1466 var err error 1467 1468 channelID = "multi-node-channel" 1469 timeout = 10 * time.Second 1470 1471 dataDir, err = ioutil.TempDir("", "raft-test-") 1472 Expect(err).NotTo(HaveOccurred()) 1473 1474 raftMetadata = &raftprotos.BlockMetadata{ 1475 ConsenterIds: []uint64{1, 2, 3}, 1476 NextConsenterId: 4, 1477 } 1478 1479 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1480 Expect(err).NotTo(HaveOccurred()) 1481 1482 consenters = map[uint64]*raftprotos.Consenter{ 1483 1: { 1484 Host: "localhost", 1485 Port: 7051, 1486 ClientTlsCert: clientTLSCert(tlsCA), 1487 ServerTlsCert: serverTLSCert(tlsCA), 1488 }, 1489 2: { 1490 Host: "localhost", 1491 Port: 7051, 1492 ClientTlsCert: clientTLSCert(tlsCA), 1493 ServerTlsCert: serverTLSCert(tlsCA), 1494 }, 1495 3: { 1496 Host: "localhost", 1497 Port: 7051, 1498 ClientTlsCert: clientTLSCert(tlsCA), 1499 ServerTlsCert: serverTLSCert(tlsCA), 1500 }, 1501 } 1502 1503 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider) 1504 c1 = network.chains[1] 1505 c2 = network.chains[2] 1506 c3 = network.chains[3] 1507 }) 1508 1509 AfterEach(func() { 1510 network.stop() 1511 network.exec(func(c *chain) { 1512 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1513 }) 1514 1515 os.RemoveAll(dataDir) 1516 }) 1517 1518 When("2/3 nodes are running", func() { 1519 It("late node can catch up", func() { 1520 network.init() 1521 network.start(1, 2) 1522 network.elect(1) 1523 1524 // trigger status dissemination 1525 Eventually(func() int { 1526 c1.clock.Increment(interval) 1527 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1528 }, LongEventualTimeout).Should(Equal(2)) 1529 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1530 1531 c1.cutter.CutNext = true 1532 err := c1.Order(env, 0) 1533 Expect(err).NotTo(HaveOccurred()) 1534 1535 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1536 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1537 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1538 1539 network.start(3) 1540 1541 c1.clock.Increment(interval) 1542 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1543 1544 network.stop() 1545 }) 1546 1547 It("late node receives snapshot from leader", func() { 1548 c1.opts.SnapshotIntervalSize = 1 1549 c1.opts.SnapshotCatchUpEntries = 1 1550 1551 c1.cutter.CutNext = true 1552 1553 var blocksLock sync.Mutex 1554 blocks := make(map[uint64]*common.Block) // storing written blocks for block puller 1555 1556 c1.support.WriteBlockStub = func(b *common.Block, meta []byte) { 1557 blocksLock.Lock() 1558 defer blocksLock.Unlock() 1559 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 1560 Expect(err).NotTo(HaveOccurred()) 1561 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 1562 blocks[b.Header.Number] = b 1563 } 1564 1565 c3.puller.PullBlockStub = func(i uint64) *common.Block { 1566 blocksLock.Lock() 1567 defer blocksLock.Unlock() 1568 b, exist := blocks[i] 1569 if !exist { 1570 return nil 1571 } 1572 1573 return b 1574 } 1575 1576 network.init() 1577 network.start(1, 2) 1578 network.elect(1) 1579 1580 err := c1.Order(env, 0) 1581 Expect(err).NotTo(HaveOccurred()) 1582 1583 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1584 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1585 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1586 1587 err = c1.Order(env, 0) 1588 Expect(err).NotTo(HaveOccurred()) 1589 1590 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1591 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1592 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1593 1594 network.start(3) 1595 1596 c1.clock.Increment(interval) 1597 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1598 1599 network.stop() 1600 }) 1601 }) 1602 1603 When("reconfiguring raft cluster", func() { 1604 const ( 1605 defaultTimeout = 5 * time.Second 1606 ) 1607 var ( 1608 options = &raftprotos.Options{ 1609 TickInterval: "500ms", 1610 ElectionTick: 10, 1611 HeartbeatTick: 1, 1612 MaxInflightBlocks: 5, 1613 SnapshotIntervalSize: 200, 1614 } 1615 updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue { 1616 return map[string]*common.ConfigValue{ 1617 "ConsensusType": { 1618 Version: 1, 1619 Value: marshalOrPanic(&orderer.ConsensusType{ 1620 Metadata: marshalOrPanic(metadata), 1621 }), 1622 }, 1623 } 1624 } 1625 addConsenterConfigValue = func() map[string]*common.ConfigValue { 1626 metadata := &raftprotos.ConfigMetadata{Options: options} 1627 for _, consenter := range consenters { 1628 metadata.Consenters = append(metadata.Consenters, consenter) 1629 } 1630 1631 newConsenter := &raftprotos.Consenter{ 1632 Host: "localhost", 1633 Port: 7050, 1634 ServerTlsCert: serverTLSCert(tlsCA), 1635 ClientTlsCert: clientTLSCert(tlsCA), 1636 } 1637 metadata.Consenters = append(metadata.Consenters, newConsenter) 1638 return updateRaftConfigValue(metadata) 1639 } 1640 removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue { 1641 metadata := &raftprotos.ConfigMetadata{Options: options} 1642 for nodeID, consenter := range consenters { 1643 if nodeID == id { 1644 continue 1645 } 1646 metadata.Consenters = append(metadata.Consenters, consenter) 1647 } 1648 return updateRaftConfigValue(metadata) 1649 } 1650 createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope { 1651 configEnv := newConfigEnv("another-channel", 1652 common.HeaderType_CONFIG, 1653 newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata))) 1654 1655 // Wrap config env in Orderer transaction 1656 return &common.Envelope{ 1657 Payload: marshalOrPanic(&common.Payload{ 1658 Header: &common.Header{ 1659 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 1660 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 1661 ChannelId: channelID, 1662 }), 1663 }, 1664 Data: marshalOrPanic(configEnv), 1665 }), 1666 } 1667 } 1668 ) 1669 1670 BeforeEach(func() { 1671 network.exec(func(c *chain) { 1672 c.opts.EvictionSuspicion = time.Millisecond * 100 1673 c.opts.LeaderCheckInterval = time.Millisecond * 100 1674 }) 1675 1676 network.init() 1677 network.start() 1678 network.elect(1) 1679 1680 By("Submitting first tx to cut the block") 1681 c1.cutter.CutNext = true 1682 err := c1.Order(env, 0) 1683 Expect(err).NotTo(HaveOccurred()) 1684 1685 c1.clock.Increment(interval) 1686 1687 network.exec( 1688 func(c *chain) { 1689 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1690 }) 1691 }) 1692 1693 AfterEach(func() { 1694 network.stop() 1695 }) 1696 1697 Context("channel creation", func() { 1698 It("succeeds with valid config metadata", func() { 1699 metadata := &raftprotos.ConfigMetadata{Options: options} 1700 for _, consenter := range consenters { 1701 metadata.Consenters = append(metadata.Consenters, consenter) 1702 } 1703 1704 Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed()) 1705 network.exec(func(c *chain) { 1706 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1707 }) 1708 }) 1709 1710 }) 1711 1712 Context("reconfiguration", func() { 1713 It("can rotate certificate by adding and removing 1 node in one config update", func() { 1714 metadata := &raftprotos.ConfigMetadata{Options: options} 1715 for id, consenter := range consenters { 1716 if id == 2 { 1717 // remove second consenter 1718 continue 1719 } 1720 metadata.Consenters = append(metadata.Consenters, consenter) 1721 } 1722 1723 // add new consenter 1724 newConsenter := &raftprotos.Consenter{ 1725 Host: "localhost", 1726 Port: 7050, 1727 ServerTlsCert: serverTLSCert(tlsCA), 1728 ClientTlsCert: clientTLSCert(tlsCA), 1729 } 1730 metadata.Consenters = append(metadata.Consenters, newConsenter) 1731 1732 value := map[string]*common.ConfigValue{ 1733 "ConsensusType": { 1734 Version: 1, 1735 Value: marshalOrPanic(&orderer.ConsensusType{ 1736 Metadata: marshalOrPanic(metadata), 1737 }), 1738 }, 1739 } 1740 1741 By("creating new configuration with removed node and new one") 1742 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1743 c1.cutter.CutNext = true 1744 1745 By("sending config transaction") 1746 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1747 1748 network.exec(func(c *chain) { 1749 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1750 }) 1751 }) 1752 1753 It("rotates leader certificate and triggers leadership transfer", func() { 1754 metadata := &raftprotos.ConfigMetadata{Options: options} 1755 for id, consenter := range consenters { 1756 if id == 1 { 1757 // remove second consenter 1758 continue 1759 } 1760 metadata.Consenters = append(metadata.Consenters, consenter) 1761 } 1762 1763 // add new consenter 1764 newConsenter := &raftprotos.Consenter{ 1765 Host: "localhost", 1766 Port: 7050, 1767 ServerTlsCert: serverTLSCert(tlsCA), 1768 ClientTlsCert: clientTLSCert(tlsCA), 1769 } 1770 metadata.Consenters = append(metadata.Consenters, newConsenter) 1771 1772 value := map[string]*common.ConfigValue{ 1773 "ConsensusType": { 1774 Version: 1, 1775 Value: marshalOrPanic(&orderer.ConsensusType{ 1776 Metadata: marshalOrPanic(metadata), 1777 }), 1778 }, 1779 } 1780 1781 By("creating new configuration with removed node and new one") 1782 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1783 c1.cutter.CutNext = true 1784 1785 By("sending config transaction") 1786 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1787 1788 Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower())) 1789 network.exec(func(c *chain) { 1790 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1791 }) 1792 }) 1793 1794 When("Leader is disconnected after cert rotation", func() { 1795 It("still configures communication after failed leader transfer attempt", func() { 1796 metadata := &raftprotos.ConfigMetadata{Options: options} 1797 for id, consenter := range consenters { 1798 if id == 1 { 1799 // remove second consenter 1800 continue 1801 } 1802 metadata.Consenters = append(metadata.Consenters, consenter) 1803 } 1804 1805 // add new consenter 1806 newConsenter := &raftprotos.Consenter{ 1807 Host: "localhost", 1808 Port: 7050, 1809 ServerTlsCert: serverTLSCert(tlsCA), 1810 ClientTlsCert: clientTLSCert(tlsCA), 1811 } 1812 metadata.Consenters = append(metadata.Consenters, newConsenter) 1813 1814 value := map[string]*common.ConfigValue{ 1815 "ConsensusType": { 1816 Version: 1, 1817 Value: marshalOrPanic(&orderer.ConsensusType{ 1818 Metadata: marshalOrPanic(metadata), 1819 }), 1820 }, 1821 } 1822 1823 By("creating new configuration with removed node and new one") 1824 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1825 c1.cutter.CutNext = true 1826 1827 step1 := c1.getStepFunc() 1828 count := c1.rpc.SendConsensusCallCount() // record current step call count 1829 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1830 // disconnect network after 4 MsgApp are sent by c1: 1831 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 1832 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 1833 if c1.rpc.SendConsensusCallCount() == count+4 { 1834 defer network.disconnect(1) 1835 } 1836 1837 return step1(dest, msg) 1838 }) 1839 1840 network.exec(func(c *chain) { 1841 Consistently(c.clock.WatcherCount).Should(Equal(1)) 1842 }) 1843 1844 By("sending config transaction") 1845 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1846 1847 Consistently(c1.observe).ShouldNot(Receive()) 1848 network.exec(func(c *chain) { 1849 // wait for timeout timer to start 1850 c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1851 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1852 }) 1853 }) 1854 }) 1855 1856 When("Follower is disconnected while leader cert is being rotated", func() { 1857 It("still configures communication and transfer leader", func() { 1858 metadata := &raftprotos.ConfigMetadata{Options: options} 1859 for id, consenter := range consenters { 1860 if id == 1 { 1861 // remove second consenter 1862 continue 1863 } 1864 metadata.Consenters = append(metadata.Consenters, consenter) 1865 } 1866 1867 // add new consenter 1868 newConsenter := &raftprotos.Consenter{ 1869 Host: "localhost", 1870 Port: 7050, 1871 ServerTlsCert: serverTLSCert(tlsCA), 1872 ClientTlsCert: clientTLSCert(tlsCA), 1873 } 1874 metadata.Consenters = append(metadata.Consenters, newConsenter) 1875 1876 value := map[string]*common.ConfigValue{ 1877 "ConsensusType": { 1878 Version: 1, 1879 Value: marshalOrPanic(&orderer.ConsensusType{ 1880 Metadata: marshalOrPanic(metadata), 1881 }), 1882 }, 1883 } 1884 1885 cnt := c1.rpc.SendConsensusCallCount() 1886 network.disconnect(3) 1887 1888 // Trigger some heartbeats to be sent so that leader notices 1889 // failed message delivery to 3, and mark it as Paused. 1890 // This is to ensure leadership is transferred to 2. 1891 Eventually(func() int { 1892 c1.clock.Increment(interval) 1893 return c1.rpc.SendConsensusCallCount() 1894 }, LongEventualTimeout).Should(BeNumerically(">=", cnt+5)) 1895 1896 By("creating new configuration with removed node and new one") 1897 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1898 c1.cutter.CutNext = true 1899 1900 By("sending config transaction") 1901 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1902 1903 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower))) 1904 network.Lock() 1905 network.leader = 2 // manually set network leader 1906 network.Unlock() 1907 network.disconnect(1) 1908 1909 network.exec(func(c *chain) { 1910 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1911 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1912 }, 1, 2) 1913 1914 network.join(3, true) 1915 Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1916 Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1917 1918 By("Ordering normal transaction") 1919 c2.cutter.CutNext = true 1920 Expect(c3.Order(env, 0)).To(Succeed()) 1921 network.exec(func(c *chain) { 1922 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1923 }, 2, 3) 1924 }) 1925 }) 1926 1927 It("adding node to the cluster", func() { 1928 addConsenterUpdate := addConsenterConfigValue() 1929 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate)) 1930 c1.cutter.CutNext = true 1931 1932 By("sending config transaction") 1933 err := c1.Configure(configEnv, 0) 1934 Expect(err).NotTo(HaveOccurred()) 1935 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 1936 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 1937 1938 network.exec(func(c *chain) { 1939 Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 1940 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 1941 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4))) 1942 }) 1943 1944 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 1945 meta := &common.Metadata{Value: raftmetabytes} 1946 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 1947 Expect(err).NotTo(HaveOccurred()) 1948 1949 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider) 1950 // if we join a node to existing network, it MUST already obtained blocks 1951 // till the config block that adds this node to cluster. 1952 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 1953 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 1954 c4.init() 1955 1956 network.addChain(c4) 1957 c4.Start() 1958 1959 // ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added 1960 // to leader's node list right away. An immediate tick does not trigger a heartbeat 1961 // being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins 1962 // the cluster successfully. 1963 Eventually(func() <-chan raft.SoftState { 1964 c1.clock.Increment(interval) 1965 return c4.observe 1966 }, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower}))) 1967 1968 Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1969 Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 1970 1971 By("submitting new transaction to follower") 1972 c1.cutter.CutNext = true 1973 err = c4.Order(env, 0) 1974 Expect(err).NotTo(HaveOccurred()) 1975 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 1976 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 1977 1978 network.exec(func(c *chain) { 1979 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2)) 1980 }) 1981 }) 1982 1983 It("does not reconfigure raft cluster if it's a channel creation tx", func() { 1984 configEnv := newConfigEnv("another-channel", 1985 common.HeaderType_CONFIG, 1986 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2))) 1987 1988 // Wrap config env in Orderer transaction 1989 channelCreationEnv := &common.Envelope{ 1990 Payload: marshalOrPanic(&common.Payload{ 1991 Header: &common.Header{ 1992 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 1993 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 1994 ChannelId: channelID, 1995 }), 1996 }, 1997 Data: marshalOrPanic(configEnv), 1998 }), 1999 } 2000 2001 c1.cutter.CutNext = true 2002 2003 Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed()) 2004 network.exec(func(c *chain) { 2005 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2006 }) 2007 2008 // assert c2 is not evicted 2009 Consistently(c2.Errored).ShouldNot(BeClosed()) 2010 Expect(c2.Order(env, 0)).To(Succeed()) 2011 2012 network.exec(func(c *chain) { 2013 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2014 }) 2015 }) 2016 2017 It("stop leader and continue reconfiguration failing over to new leader", func() { 2018 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2019 // configure chain support mock to disconnect c1 right after it writes configuration block 2020 // into the ledger, this to simulate failover. 2021 // Next boostraping a new node c4 to join a cluster and creating config transaction, submitting 2022 // it to the leader. Once leader writes configuration block it fails and leadership transferred to 2023 // c2. 2024 // Test asserts that new node c4, will join the cluster and c2 will handle failover of 2025 // re-configuration. Later we connecting c1 back and making sure it capable of catching up with 2026 // new configuration and successfully rejoins replica set. 2027 2028 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2029 c1.cutter.CutNext = true 2030 2031 step1 := c1.getStepFunc() 2032 count := c1.rpc.SendConsensusCallCount() // record current step call count 2033 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2034 // disconnect network after 4 MsgApp are sent by c1: 2035 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2036 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2037 if c1.rpc.SendConsensusCallCount() == count+4 { 2038 defer network.disconnect(1) 2039 } 2040 2041 return step1(dest, msg) 2042 }) 2043 2044 By("sending config transaction") 2045 err := c1.Configure(configEnv, 0) 2046 Expect(err).NotTo(HaveOccurred()) 2047 2048 // every node has written config block to the OSN ledger 2049 network.exec( 2050 func(c *chain) { 2051 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2052 }) 2053 2054 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2055 c1.setStepFunc(step1) 2056 2057 // elect node with higher index 2058 i2, _ := c2.storage.LastIndex() // err is always nil 2059 i3, _ := c3.storage.LastIndex() 2060 candidate := uint64(2) 2061 if i3 > i2 { 2062 candidate = 3 2063 } 2064 network.chains[candidate].cutter.CutNext = true 2065 network.elect(candidate) 2066 2067 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2068 meta := &common.Metadata{Value: raftmetabytes} 2069 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2070 Expect(err).NotTo(HaveOccurred()) 2071 2072 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider) 2073 // if we join a node to existing network, it MUST already obtained blocks 2074 // till the config block that adds this node to cluster. 2075 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2076 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2077 c4.init() 2078 2079 network.addChain(c4) 2080 c4.start() 2081 Expect(c4.WaitReady()).To(Succeed()) 2082 network.join(4, true) 2083 2084 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2085 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2086 2087 By("submitting new transaction to follower") 2088 err = c4.Order(env, 0) 2089 Expect(err).NotTo(HaveOccurred()) 2090 2091 // rest nodes are alive include a newly added, hence should write 2 blocks 2092 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2093 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2094 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2095 2096 // node 1 has been stopped should not write any block 2097 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2098 2099 network.join(1, true) 2100 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2101 }) 2102 2103 It("stop cluster quorum and continue reconfiguration after the restart", func() { 2104 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2105 // configure chain support mock to stop cluster after config block is committed. 2106 // Restart the cluster and ensure it picks up updates and capable to finish reconfiguration. 2107 2108 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2109 c1.cutter.CutNext = true 2110 2111 step1 := c1.getStepFunc() 2112 count := c1.rpc.SendConsensusCallCount() // record current step call count 2113 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2114 // disconnect network after 4 MsgApp are sent by c1: 2115 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2116 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2117 if c1.rpc.SendConsensusCallCount() == count+4 { 2118 defer func() { 2119 network.disconnect(1) 2120 network.disconnect(2) 2121 network.disconnect(3) 2122 }() 2123 } 2124 2125 return step1(dest, msg) 2126 }) 2127 2128 By("sending config transaction") 2129 err := c1.Configure(configEnv, 0) 2130 Expect(err).NotTo(HaveOccurred()) 2131 2132 // every node has written config block to the OSN ledger 2133 network.exec( 2134 func(c *chain) { 2135 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2136 }) 2137 2138 // assert conf change proposals have been dropped, before proceed to reconnect network 2139 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2140 c1.setStepFunc(step1) 2141 2142 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2143 meta := &common.Metadata{Value: raftmetabytes} 2144 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2145 Expect(err).NotTo(HaveOccurred()) 2146 2147 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider) 2148 // if we join a node to existing network, it MUST already obtained blocks 2149 // till the config block that adds this node to cluster. 2150 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2151 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2152 c4.init() 2153 2154 network.addChain(c4) 2155 2156 By("reconnecting nodes back") 2157 for i := uint64(1); i < 4; i++ { 2158 network.connect(i) 2159 } 2160 2161 // elect node with higher index 2162 i2, _ := c2.storage.LastIndex() // err is always nil 2163 i3, _ := c3.storage.LastIndex() 2164 candidate := uint64(2) 2165 if i3 > i2 { 2166 candidate = 3 2167 } 2168 network.chains[candidate].cutter.CutNext = true 2169 network.elect(candidate) 2170 2171 c4.start() 2172 Expect(c4.WaitReady()).To(Succeed()) 2173 network.join(4, false) 2174 2175 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2176 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2177 2178 By("submitting new transaction to follower") 2179 err = c4.Order(env, 0) 2180 Expect(err).NotTo(HaveOccurred()) 2181 2182 // rest nodes are alive include a newly added, hence should write 2 blocks 2183 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2184 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2185 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2186 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2187 }) 2188 2189 It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() { 2190 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2191 // Prepare config update transaction which removes leader (nodeID = 1), then leader 2192 // fails right after it commits configuration block. 2193 2194 configEnv := newConfigEnv(channelID, 2195 common.HeaderType_CONFIG, 2196 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2197 2198 c1.cutter.CutNext = true 2199 2200 step1 := c1.getStepFunc() 2201 count := c1.rpc.SendConsensusCallCount() // record current step call count 2202 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2203 // disconnect network after 4 MsgApp are sent by c1: 2204 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2205 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2206 if c1.rpc.SendConsensusCallCount() == count+4 { 2207 defer network.disconnect(1) 2208 } 2209 2210 return step1(dest, msg) 2211 }) 2212 2213 By("sending config transaction") 2214 err := c1.Configure(configEnv, 0) 2215 Expect(err).NotTo(HaveOccurred()) 2216 2217 network.exec(func(c *chain) { 2218 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2219 }) 2220 2221 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2222 c1.setStepFunc(step1) 2223 2224 // elect node with higher index 2225 i2, _ := c2.storage.LastIndex() // err is always nil 2226 i3, _ := c3.storage.LastIndex() 2227 candidate := uint64(2) 2228 if i3 > i2 { 2229 candidate = 3 2230 } 2231 network.chains[candidate].cutter.CutNext = true 2232 network.elect(candidate) 2233 2234 By("submitting new transaction to follower") 2235 err = c3.Order(env, 0) 2236 Expect(err).NotTo(HaveOccurred()) 2237 2238 // rest nodes are alive include a newly added, hence should write 2 blocks 2239 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2240 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2241 }) 2242 2243 It("removes leader from replica set", func() { 2244 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2245 // Prepare config update transaction which removes leader (nodeID = 1), this to 2246 // ensure we handle re-configuration of node removal correctly and remaining two 2247 // nodes still capable to form functional quorum and Raft capable of making further progress. 2248 // Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e. 2249 // should not be able to get updates or forward transactions. 2250 2251 configEnv := newConfigEnv(channelID, 2252 common.HeaderType_CONFIG, 2253 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2254 2255 c1.cutter.CutNext = true 2256 2257 By("sending config transaction") 2258 err := c1.Configure(configEnv, 0) 2259 Expect(err).NotTo(HaveOccurred()) 2260 2261 // every node has written config block to the OSN ledger 2262 network.exec( 2263 func(c *chain) { 2264 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2265 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 2266 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2))) 2267 }) 2268 2269 // Assert c1 has exited 2270 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 2271 Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed()) 2272 close(c1.stopped) 2273 2274 var newLeader, remainingFollower *chain 2275 for newLeader == nil || remainingFollower == nil { 2276 var state raft.SoftState 2277 select { 2278 case state = <-c2.observe: 2279 case state = <-c3.observe: 2280 case <-time.After(LongEventualTimeout): 2281 Fail("Expected a new leader to present") 2282 } 2283 2284 if state.RaftState == raft.StateLeader && state.Lead != raft.None { 2285 newLeader = network.chains[state.Lead] 2286 } 2287 2288 if state.RaftState == raft.StateFollower && state.Lead != raft.None { 2289 remainingFollower = network.chains[state.Lead] 2290 } 2291 } 2292 2293 By("submitting transaction to new leader") 2294 newLeader.cutter.CutNext = true 2295 err = newLeader.Order(env, 0) 2296 Expect(err).NotTo(HaveOccurred()) 2297 2298 Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2299 Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2300 // node 1 has been stopped should not write any block 2301 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2302 2303 By("trying to submit to new node, expected to fail") 2304 c1.cutter.CutNext = true 2305 err = c1.Order(env, 0) 2306 Expect(err).To(HaveOccurred()) 2307 2308 // number of block writes should remain the same 2309 Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2)) 2310 Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2)) 2311 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2312 }) 2313 2314 It("does not deadlock if leader steps down while config block is in-flight", func() { 2315 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2316 c1.cutter.CutNext = true 2317 2318 signal := make(chan struct{}) 2319 stub := c1.support.WriteConfigBlockStub 2320 c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) { 2321 signal <- struct{}{} 2322 <-signal 2323 stub(b, meta) 2324 } 2325 2326 By("Sending config transaction") 2327 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 2328 2329 Eventually(signal, LongEventualTimeout).Should(Receive()) 2330 network.disconnect(1) 2331 2332 By("Ticking leader till it steps down") 2333 Eventually(func() raft.SoftState { 2334 c1.clock.Increment(interval) 2335 return c1.Node.Status().SoftState 2336 }, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower)) 2337 2338 close(signal) 2339 2340 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower))) 2341 2342 By("Re-electing 1 as leader") 2343 network.connect(1) 2344 network.elect(1) 2345 2346 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2347 meta := &common.Metadata{Value: raftmetabytes} 2348 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2349 Expect(err).NotTo(HaveOccurred()) 2350 2351 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider) 2352 // if we join a node to existing network, it MUST already obtained blocks 2353 // till the config block that adds this node to cluster. 2354 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2355 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2356 c4.init() 2357 2358 network.addChain(c4) 2359 c4.Start() 2360 2361 Eventually(func() <-chan raft.SoftState { 2362 c1.clock.Increment(interval) 2363 return c4.observe 2364 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower))) 2365 2366 By("Submitting tx to confirm network is still working") 2367 Expect(c1.Order(env, 0)).To(Succeed()) 2368 2369 network.exec(func(c *chain) { 2370 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2371 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2372 }) 2373 }) 2374 }) 2375 }) 2376 2377 When("3/3 nodes are running", func() { 2378 JustBeforeEach(func() { 2379 network.init() 2380 network.start() 2381 network.elect(1) 2382 }) 2383 2384 AfterEach(func() { 2385 network.stop() 2386 }) 2387 2388 It("correctly sets the cluster size and leadership metrics", func() { 2389 // the network should see only one leadership change 2390 network.exec(func(c *chain) { 2391 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1)) 2392 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1))) 2393 Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1)) 2394 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3))) 2395 }) 2396 // c1 should be the leader 2397 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2398 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2399 // c2 and c3 should continue to remain followers 2400 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2401 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2402 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2403 Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2404 }) 2405 2406 It("orders envelope on leader", func() { 2407 By("instructed to cut next block") 2408 c1.cutter.CutNext = true 2409 err := c1.Order(env, 0) 2410 Expect(err).NotTo(HaveOccurred()) 2411 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2412 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2413 2414 network.exec( 2415 func(c *chain) { 2416 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2417 }) 2418 2419 By("respect batch timeout") 2420 c1.cutter.CutNext = false 2421 2422 err = c1.Order(env, 0) 2423 Expect(err).NotTo(HaveOccurred()) 2424 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2425 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2426 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2427 2428 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2429 network.exec( 2430 func(c *chain) { 2431 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2432 }) 2433 }) 2434 2435 It("orders envelope on follower", func() { 2436 By("instructed to cut next block") 2437 c1.cutter.CutNext = true 2438 err := c2.Order(env, 0) 2439 Expect(err).NotTo(HaveOccurred()) 2440 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2441 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2442 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2443 2444 network.exec( 2445 func(c *chain) { 2446 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2447 }) 2448 2449 By("respect batch timeout") 2450 c1.cutter.CutNext = false 2451 2452 err = c2.Order(env, 0) 2453 Expect(err).NotTo(HaveOccurred()) 2454 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2455 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2456 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2457 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2458 2459 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2460 network.exec( 2461 func(c *chain) { 2462 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2463 }) 2464 }) 2465 2466 When("MaxInflightBlocks is reached", func() { 2467 BeforeEach(func() { 2468 network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 }) 2469 }) 2470 2471 It("waits for in flight blocks to be committed", func() { 2472 c1.cutter.CutNext = true 2473 // disconnect c1 to disrupt consensus 2474 network.disconnect(1) 2475 2476 Expect(c1.Order(env, 0)).To(Succeed()) 2477 2478 doneProp := make(chan struct{}) 2479 go func() { 2480 defer GinkgoRecover() 2481 Expect(c1.Order(env, 0)).To(Succeed()) 2482 close(doneProp) 2483 }() 2484 // expect second `Order` to block 2485 Consistently(doneProp).ShouldNot(BeClosed()) 2486 network.exec(func(c *chain) { 2487 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2488 }) 2489 2490 network.connect(1) 2491 c1.clock.Increment(interval) 2492 2493 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2494 network.exec(func(c *chain) { 2495 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2496 }) 2497 }) 2498 2499 It("resets block in flight when steps down from leader", func() { 2500 c1.cutter.CutNext = true 2501 c2.cutter.CutNext = true 2502 // disconnect c1 to disrupt consensus 2503 network.disconnect(1) 2504 2505 Expect(c1.Order(env, 0)).To(Succeed()) 2506 2507 doneProp := make(chan struct{}) 2508 go func() { 2509 defer GinkgoRecover() 2510 2511 Expect(c1.Order(env, 0)).To(Succeed()) 2512 close(doneProp) 2513 }() 2514 // expect second `Order` to block 2515 Consistently(doneProp).ShouldNot(BeClosed()) 2516 network.exec(func(c *chain) { 2517 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2518 }) 2519 2520 network.elect(2) 2521 Expect(c3.Order(env, 0)).To(Succeed()) 2522 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2523 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2524 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2525 2526 network.connect(1) 2527 c2.clock.Increment(interval) 2528 2529 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2530 network.exec(func(c *chain) { 2531 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2532 }) 2533 }) 2534 }) 2535 2536 When("leader is disconnected", func() { 2537 It("proactively steps down to follower", func() { 2538 network.disconnect(1) 2539 2540 By("Ticking leader until it steps down") 2541 Eventually(func() <-chan raft.SoftState { 2542 c1.clock.Increment(interval) 2543 return c1.observe 2544 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower}))) 2545 2546 By("Ensuring it does not accept message due to the cluster being leaderless") 2547 err := c1.Order(env, 0) 2548 Expect(err).To(MatchError("no Raft leader")) 2549 2550 network.elect(2) 2551 2552 // c1 should have lost leadership 2553 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3)) 2554 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0))) 2555 // c2 should become the leader 2556 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2557 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2558 // c2 should continue to remain follower 2559 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2560 2561 network.join(1, true) 2562 network.exec(func(c *chain) { 2563 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3)) 2564 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1))) 2565 }) 2566 2567 err = c1.Order(env, 0) 2568 Expect(err).NotTo(HaveOccurred()) 2569 }) 2570 2571 It("does not deadlock if propose is blocked", func() { 2572 signal := make(chan struct{}) 2573 c1.cutter.CutNext = true 2574 c1.support.SequenceStub = func() uint64 { 2575 signal <- struct{}{} 2576 <-signal 2577 return 0 2578 } 2579 2580 By("Sending a normal transaction") 2581 Expect(c1.Order(env, 0)).To(Succeed()) 2582 2583 Eventually(signal).Should(Receive()) 2584 network.disconnect(1) 2585 2586 By("Ticking leader till it steps down") 2587 Eventually(func() raft.SoftState { 2588 c1.clock.Increment(interval) 2589 return c1.Node.Status().SoftState 2590 }).Should(StateEqual(0, raft.StateFollower)) 2591 2592 close(signal) 2593 2594 Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower))) 2595 c1.support.SequenceStub = nil 2596 network.exec(func(c *chain) { 2597 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2598 }) 2599 2600 By("Re-electing 1 as leader") 2601 network.connect(1) 2602 network.elect(1) 2603 2604 By("Sending another normal transaction") 2605 Expect(c1.Order(env, 0)).To(Succeed()) 2606 2607 network.exec(func(c *chain) { 2608 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2609 }) 2610 }) 2611 }) 2612 2613 When("follower is disconnected", func() { 2614 It("should return error when receiving an env", func() { 2615 network.disconnect(2) 2616 2617 errorC := c2.Errored() 2618 Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed 2619 2620 By("Ticking node 2 until it becomes pre-candidate") 2621 Eventually(func() <-chan raft.SoftState { 2622 c2.clock.Increment(interval) 2623 return c2.observe 2624 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate}))) 2625 2626 Eventually(errorC).Should(BeClosed()) 2627 err := c2.Order(env, 0) 2628 Expect(err).To(HaveOccurred()) 2629 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2630 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2631 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2632 2633 network.connect(2) 2634 c1.clock.Increment(interval) 2635 Expect(errorC).To(BeClosed()) 2636 2637 Eventually(c2.Errored).ShouldNot(BeClosed()) 2638 }) 2639 }) 2640 2641 It("leader retransmits lost messages", func() { 2642 // This tests that heartbeats will trigger leader to retransmit lost MsgApp 2643 2644 c1.cutter.CutNext = true 2645 2646 network.disconnect(1) // drop MsgApp 2647 2648 err := c1.Order(env, 0) 2649 Expect(err).NotTo(HaveOccurred()) 2650 2651 network.exec( 2652 func(c *chain) { 2653 Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0)) 2654 }) 2655 2656 network.connect(1) // reconnect leader 2657 2658 c1.clock.Increment(interval) // trigger a heartbeat 2659 network.exec( 2660 func(c *chain) { 2661 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2662 }) 2663 }) 2664 2665 It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() { 2666 // this ensures that the created blocks are not written out 2667 network.disconnect(1) 2668 2669 c1.cutter.CutNext = true 2670 for i := 0; i < 3; i++ { 2671 Expect(c1.Order(env, 0)).To(Succeed()) 2672 } 2673 2674 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 2675 2676 network.connect(1) 2677 2678 // After FAB-13722, leader would pause replication if it gets notified that message 2679 // delivery to certain node is failed, i.e. connection refused. Replication to that 2680 // follower is resumed if leader receives a MsgHeartbeatResp from it. 2681 // We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we 2682 // would also risk a slow leader stepping down due to excessive ticks. 2683 // 2684 // Instead, we can simply send artificial MsgHeartbeatResp to leader to resume. 2685 m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp} 2686 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id) 2687 m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp} 2688 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id) 2689 2690 network.exec(func(c *chain) { 2691 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2692 }) 2693 }) 2694 2695 It("new leader should wait for in-fight blocks to commit before accepting new env", func() { 2696 // Scenario: when a node is elected as new leader and there are still in-flight blocks, 2697 // it should not immediately start accepting new envelopes, instead it should wait for 2698 // those in-flight blocks to be committed, otherwise we may create uncle block which 2699 // forks and panicks chain. 2700 // 2701 // Steps: 2702 // - start raft cluster with three nodes and genesis block0 2703 // - order env1 on c1, which creates block1 2704 // - drop MsgApp from 1 to 3 2705 // - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1 2706 // - disconnect c1 and elect c2 2707 // - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create 2708 // an uncle block1 based on block0. 2709 // - c2 commits block1 2710 // - c2 accepts env2, and creates block2 2711 // - c2 commits block2 2712 c1.cutter.CutNext = true 2713 c2.cutter.CutNext = true 2714 2715 step1 := c1.getStepFunc() 2716 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2717 stepMsg := &raftpb.Message{} 2718 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2719 2720 if dest == 3 { 2721 return nil 2722 } 2723 2724 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 { 2725 return nil 2726 } 2727 2728 return step1(dest, msg) 2729 }) 2730 2731 Expect(c1.Order(env, 0)).NotTo(HaveOccurred()) 2732 2733 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2734 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2735 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2736 2737 network.disconnect(1) 2738 2739 step2 := c2.getStepFunc() 2740 c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2741 stepMsg := &raftpb.Message{} 2742 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2743 2744 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 { 2745 for _, ent := range stepMsg.Entries { 2746 if len(ent.Data) != 0 { 2747 return nil 2748 } 2749 } 2750 } 2751 return step2(dest, msg) 2752 }) 2753 2754 network.elect(2) 2755 2756 go func() { 2757 defer GinkgoRecover() 2758 Expect(c2.Order(env, 0)).NotTo(HaveOccurred()) 2759 }() 2760 2761 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2762 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2763 2764 c2.setStepFunc(step2) 2765 c2.clock.Increment(interval) 2766 2767 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2768 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2769 2770 b, _ := c2.support.WriteBlockArgsForCall(0) 2771 Expect(b.Header.Number).To(Equal(uint64(1))) 2772 b, _ = c2.support.WriteBlockArgsForCall(1) 2773 Expect(b.Header.Number).To(Equal(uint64(2))) 2774 }) 2775 2776 Context("handling config blocks", func() { 2777 var configEnv *common.Envelope 2778 BeforeEach(func() { 2779 values := map[string]*common.ConfigValue{ 2780 "BatchTimeout": { 2781 Version: 1, 2782 Value: marshalOrPanic(&orderer.BatchTimeout{ 2783 Timeout: "3ms", 2784 }), 2785 }, 2786 } 2787 configEnv = newConfigEnv(channelID, 2788 common.HeaderType_CONFIG, 2789 newConfigUpdateEnv(channelID, nil, values), 2790 ) 2791 }) 2792 2793 It("holds up block creation on leader once a config block has been created and not written out", func() { 2794 // this ensures that the created blocks are not written out 2795 network.disconnect(1) 2796 2797 c1.cutter.CutNext = true 2798 // config block 2799 err := c1.Order(configEnv, 0) 2800 Expect(err).NotTo(HaveOccurred()) 2801 2802 // to avoid data races since we are accessing these within a goroutine 2803 tempEnv := env 2804 tempC1 := c1 2805 2806 done := make(chan struct{}) 2807 2808 // normal block 2809 go func() { 2810 defer GinkgoRecover() 2811 2812 // This should be blocked if config block is not committed 2813 err := tempC1.Order(tempEnv, 0) 2814 Expect(err).NotTo(HaveOccurred()) 2815 2816 close(done) 2817 }() 2818 2819 Consistently(done).ShouldNot(BeClosed()) 2820 2821 network.connect(1) 2822 c1.clock.Increment(interval) 2823 2824 network.exec( 2825 func(c *chain) { 2826 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2827 }) 2828 2829 network.exec( 2830 func(c *chain) { 2831 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2832 }) 2833 }) 2834 2835 It("continues creating blocks on leader after a config block has been successfully written out", func() { 2836 c1.cutter.CutNext = true 2837 // config block 2838 err := c1.Configure(configEnv, 0) 2839 Expect(err).NotTo(HaveOccurred()) 2840 network.exec( 2841 func(c *chain) { 2842 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2843 }) 2844 2845 // normal block following config block 2846 err = c1.Order(env, 0) 2847 Expect(err).NotTo(HaveOccurred()) 2848 network.exec( 2849 func(c *chain) { 2850 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2851 }) 2852 }) 2853 }) 2854 2855 When("Snapshotting is enabled", func() { 2856 BeforeEach(func() { 2857 c1.opts.SnapshotIntervalSize = 1 2858 c1.opts.SnapshotCatchUpEntries = 1 2859 }) 2860 2861 It("keeps running if some entries in memory are purged", func() { 2862 // Scenario: snapshotting is enabled on node 1 and it purges memory storage 2863 // per every snapshot. Cluster should be correctly functioning. 2864 2865 i, err := c1.opts.MemoryStorage.FirstIndex() 2866 Expect(err).NotTo(HaveOccurred()) 2867 Expect(i).To(Equal(uint64(1))) 2868 2869 c1.cutter.CutNext = true 2870 2871 err = c1.Order(env, 0) 2872 Expect(err).NotTo(HaveOccurred()) 2873 2874 network.exec( 2875 func(c *chain) { 2876 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2877 }) 2878 2879 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2880 i, err = c1.opts.MemoryStorage.FirstIndex() 2881 Expect(err).NotTo(HaveOccurred()) 2882 2883 err = c1.Order(env, 0) 2884 Expect(err).NotTo(HaveOccurred()) 2885 2886 network.exec( 2887 func(c *chain) { 2888 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2889 }) 2890 2891 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2892 i, err = c1.opts.MemoryStorage.FirstIndex() 2893 Expect(err).NotTo(HaveOccurred()) 2894 2895 err = c1.Order(env, 0) 2896 Expect(err).NotTo(HaveOccurred()) 2897 2898 network.exec( 2899 func(c *chain) { 2900 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2901 }) 2902 2903 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2904 }) 2905 2906 It("lagged node can catch up using snapshot", func() { 2907 network.disconnect(2) 2908 c1.cutter.CutNext = true 2909 2910 c2Lasti, _ := c2.opts.MemoryStorage.LastIndex() 2911 var blockCnt int 2912 // Order blocks until first index of c1 memory is greater than last index of c2, 2913 // so a snapshot will be sent to c2 when it rejoins network 2914 Eventually(func() bool { 2915 c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex() 2916 if c1Firsti > c2Lasti+1 { 2917 return true 2918 } 2919 2920 Expect(c1.Order(env, 0)).To(Succeed()) 2921 blockCnt++ 2922 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2923 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2924 return false 2925 }, LongEventualTimeout).Should(BeTrue()) 2926 2927 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2928 2929 network.join(2, false) 2930 2931 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2932 indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir) 2933 Expect(indices).To(HaveLen(1)) 2934 gap := indices[0] - c2Lasti 2935 2936 // TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector, 2937 // which calls block puller, is still replying on real clock, and sometimes increment puller 2938 // call count. Therefore we are being more lenient here until suspector starts using fake clock 2939 // so we have more deterministic control over it. 2940 Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap))) 2941 2942 // chain should keeps functioning 2943 Expect(c2.Order(env, 0)).To(Succeed()) 2944 2945 network.exec( 2946 func(c *chain) { 2947 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1)) 2948 }) 2949 }) 2950 }) 2951 2952 Context("failover", func() { 2953 It("follower should step up as leader upon failover", func() { 2954 network.stop(1) 2955 network.elect(2) 2956 2957 By("order envelope on new leader") 2958 c2.cutter.CutNext = true 2959 err := c2.Order(env, 0) 2960 Expect(err).NotTo(HaveOccurred()) 2961 2962 // block should not be produced on chain 1 2963 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2964 2965 // block should be produced on chain 2 & 3 2966 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2967 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2968 2969 By("order envelope on follower") 2970 err = c3.Order(env, 0) 2971 Expect(err).NotTo(HaveOccurred()) 2972 2973 // block should not be produced on chain 1 2974 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2975 2976 // block should be produced on chain 2 & 3 2977 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2978 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2979 }) 2980 2981 It("follower cannot be elected if its log is not up-to-date", func() { 2982 network.disconnect(2) 2983 2984 c1.cutter.CutNext = true 2985 err := c1.Order(env, 0) 2986 Expect(err).NotTo(HaveOccurred()) 2987 2988 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2989 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2990 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2991 2992 network.disconnect(1) 2993 network.connect(2) 2994 2995 // node 2 has not caught up with other nodes 2996 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 2997 c2.clock.Increment(interval) 2998 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 2999 } 3000 3001 // When PreVote is enabled, node 2 would fail to collect enough 3002 // PreVote because its index is not up-to-date. Therefore, it 3003 // does not cause leader change on other nodes. 3004 Consistently(c3.observe).ShouldNot(Receive()) 3005 network.elect(3) // node 3 has newest logs among 2&3, so it can be elected 3006 }) 3007 3008 It("PreVote prevents reconnected node from disturbing network", func() { 3009 network.disconnect(2) 3010 3011 c1.cutter.CutNext = true 3012 err := c1.Order(env, 0) 3013 Expect(err).NotTo(HaveOccurred()) 3014 3015 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3016 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3017 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3018 3019 network.connect(2) 3020 3021 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3022 c2.clock.Increment(interval) 3023 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3024 } 3025 3026 Consistently(c1.observe).ShouldNot(Receive()) 3027 Consistently(c3.observe).ShouldNot(Receive()) 3028 }) 3029 3030 It("follower can catch up and then campaign with success", func() { 3031 network.disconnect(2) 3032 3033 c1.cutter.CutNext = true 3034 for i := 0; i < 10; i++ { 3035 err := c1.Order(env, 0) 3036 Expect(err).NotTo(HaveOccurred()) 3037 } 3038 3039 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3040 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3041 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3042 3043 network.join(2, false) 3044 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3045 3046 network.disconnect(1) 3047 network.elect(2) 3048 }) 3049 3050 It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() { 3051 // enqueue one transaction into 1's blockcutter to test for purging of block cutter 3052 c1.cutter.CutNext = false 3053 err := c1.Order(env, 0) 3054 Expect(err).NotTo(HaveOccurred()) 3055 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 3056 3057 // no block should be written because env is not cut into block yet 3058 c1.clock.WaitForNWatchersAndIncrement(interval, 2) 3059 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3060 3061 network.disconnect(1) 3062 network.elect(2) 3063 network.join(1, true) 3064 3065 Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped 3066 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0)) 3067 // the created block should be discarded since there is a leadership change 3068 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3069 3070 network.disconnect(2) 3071 network.elect(1) 3072 3073 err = c1.Order(env, 0) 3074 Expect(err).NotTo(HaveOccurred()) 3075 3076 // The following group of assertions is redundant - it's here for completeness. 3077 // If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing. 3078 // If the blockcucter has been reset, fast-forwarding won't do anything. 3079 // 3080 // Put differently: 3081 // 3082 // correct: 3083 // stop start fire 3084 // |--------------|---------------------------| 3085 // n*intervals timeout 3086 // (advanced in election) 3087 // 3088 // wrong: 3089 // unstop fire 3090 // |---------------------------| 3091 // timeout 3092 // 3093 // timeout-n*interval n*interval 3094 // |-----------|----------------| 3095 // ^ ^ 3096 // at this point of time it should fire 3097 // timer should not fire at this point 3098 3099 c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2) 3100 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3101 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3102 3103 c1.clock.Increment(interval) 3104 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3105 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3106 }) 3107 3108 It("stale leader should not be able to propose block because of lagged term", func() { 3109 network.disconnect(1) 3110 network.elect(2) 3111 network.connect(1) 3112 3113 c1.cutter.CutNext = true 3114 err := c1.Order(env, 0) 3115 Expect(err).NotTo(HaveOccurred()) 3116 3117 network.exec( 3118 func(c *chain) { 3119 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3120 }) 3121 }) 3122 3123 It("aborts waiting for block to be committed upon leadership lost", func() { 3124 network.disconnect(1) 3125 3126 c1.cutter.CutNext = true 3127 err := c1.Order(env, 0) 3128 Expect(err).NotTo(HaveOccurred()) 3129 3130 network.exec( 3131 func(c *chain) { 3132 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3133 }) 3134 3135 network.elect(2) 3136 network.connect(1) 3137 3138 c2.clock.Increment(interval) 3139 // this check guarantees that signal on resignC is consumed in commitBatches method. 3140 Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower}))) 3141 }) 3142 }) 3143 }) 3144 }) 3145 }) 3146 3147 func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode { 3148 var nodes []cluster.RemoteNode 3149 for i, consenter := range consenterMetadata.Consenters { 3150 // For now, skip ourselves 3151 if i == 0 { 3152 continue 3153 } 3154 serverDER, _ := pem.Decode(consenter.ServerTlsCert) 3155 clientDER, _ := pem.Decode(consenter.ClientTlsCert) 3156 node := cluster.RemoteNode{ 3157 ID: uint64(i + 1), 3158 Endpoint: "localhost:7050", 3159 ServerTLSCert: serverDER.Bytes, 3160 ClientTLSCert: clientDER.Bytes, 3161 } 3162 nodes = append(nodes, node) 3163 } 3164 return nodes 3165 } 3166 3167 func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata { 3168 md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{ 3169 TickInterval: time.Duration(interval).String(), 3170 ElectionTick: ELECTION_TICK, 3171 HeartbeatTick: HEARTBEAT_TICK, 3172 MaxInflightBlocks: 5, 3173 }} 3174 for i := 0; i < nodeCount; i++ { 3175 md.Consenters = append(md.Consenters, &raftprotos.Consenter{ 3176 Host: "localhost", 3177 Port: 7050, 3178 ServerTlsCert: serverTLSCert(tlsCA), 3179 ClientTlsCert: clientTLSCert(tlsCA), 3180 }) 3181 } 3182 return md 3183 } 3184 3185 func serverTLSCert(tlsCA tlsgen.CA) []byte { 3186 cert, err := tlsCA.NewServerCertKeyPair("localhost") 3187 if err != nil { 3188 panic(err) 3189 } 3190 return cert.Cert 3191 } 3192 3193 func clientTLSCert(tlsCA tlsgen.CA) []byte { 3194 cert, err := tlsCA.NewClientCertKeyPair() 3195 if err != nil { 3196 panic(err) 3197 } 3198 return cert.Cert 3199 } 3200 3201 // marshalOrPanic serializes a protobuf message and panics if this 3202 // operation fails 3203 func marshalOrPanic(pb proto.Message) []byte { 3204 data, err := proto.Marshal(pb) 3205 if err != nil { 3206 panic(err) 3207 } 3208 return data 3209 } 3210 3211 // helpers to facilitate tests 3212 type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error 3213 3214 type chain struct { 3215 id uint64 3216 3217 stepLock sync.Mutex 3218 step stepFunc 3219 3220 support *consensusmocks.FakeConsenterSupport 3221 cutter *mockblockcutter.Receiver 3222 configurator *mocks.FakeConfigurator 3223 rpc *mocks.FakeRPC 3224 storage *raft.MemoryStorage 3225 clock *fakeclock.FakeClock 3226 opts etcdraft.Options 3227 puller *mocks.FakeBlockPuller 3228 3229 // store written blocks to be returned by mock block puller 3230 ledgerLock sync.RWMutex 3231 ledger map[uint64]*common.Block 3232 ledgerHeight uint64 3233 lastConfigBlockNumber uint64 3234 3235 observe chan raft.SoftState 3236 unstarted chan struct{} 3237 stopped chan struct{} 3238 3239 fakeFields *fakeMetricsFields 3240 3241 *etcdraft.Chain 3242 3243 cryptoProvider bccsp.BCCSP 3244 } 3245 3246 func newChain( 3247 timeout time.Duration, 3248 channel, dataDir string, 3249 id uint64, 3250 raftMetadata *raftprotos.BlockMetadata, 3251 consenters map[uint64]*raftprotos.Consenter, 3252 cryptoProvider bccsp.BCCSP, 3253 ) *chain { 3254 rpc := &mocks.FakeRPC{} 3255 clock := fakeclock.NewFakeClock(time.Now()) 3256 storage := raft.NewMemoryStorage() 3257 3258 fakeFields := newFakeMetricsFields() 3259 3260 opts := etcdraft.Options{ 3261 RaftID: uint64(id), 3262 Clock: clock, 3263 TickInterval: interval, 3264 ElectionTick: ELECTION_TICK, 3265 HeartbeatTick: HEARTBEAT_TICK, 3266 MaxSizePerMsg: 1024 * 1024, 3267 MaxInflightBlocks: 256, 3268 BlockMetadata: raftMetadata, 3269 LeaderCheckInterval: 500 * time.Millisecond, 3270 Consenters: consenters, 3271 Logger: flogging.NewFabricLogger(zap.NewExample()), 3272 MemoryStorage: storage, 3273 WALDir: path.Join(dataDir, "wal"), 3274 SnapDir: path.Join(dataDir, "snapshot"), 3275 Metrics: newFakeMetrics(fakeFields), 3276 } 3277 3278 support := &consensusmocks.FakeConsenterSupport{} 3279 support.ChannelIDReturns(channel) 3280 support.SharedConfigReturns(mockOrderer(timeout, nil)) 3281 3282 cutter := mockblockcutter.NewReceiver() 3283 close(cutter.Block) 3284 support.BlockCutterReturns(cutter) 3285 3286 // upon leader change, lead is reset to 0 before set to actual 3287 // new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be 3288 // sent on this chan, so we need size to be 2 3289 observe := make(chan raft.SoftState, 2) 3290 3291 configurator := &mocks.FakeConfigurator{} 3292 puller := &mocks.FakeBlockPuller{} 3293 3294 ch := make(chan struct{}) 3295 close(ch) 3296 3297 c := &chain{ 3298 id: id, 3299 support: support, 3300 cutter: cutter, 3301 rpc: rpc, 3302 storage: storage, 3303 observe: observe, 3304 clock: clock, 3305 opts: opts, 3306 unstarted: ch, 3307 stopped: make(chan struct{}), 3308 configurator: configurator, 3309 puller: puller, 3310 ledger: map[uint64]*common.Block{ 3311 0: getSeedBlock(), // Very first block 3312 }, 3313 ledgerHeight: 1, 3314 fakeFields: fakeFields, 3315 cryptoProvider: cryptoProvider, 3316 } 3317 3318 // receives normal blocks and metadata and appends it into 3319 // the ledger struct to simulate write behaviour 3320 appendNormalBlockToLedger := func(b *common.Block, meta []byte) { 3321 c.ledgerLock.Lock() 3322 defer c.ledgerLock.Unlock() 3323 3324 b = proto.Clone(b).(*common.Block) 3325 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3326 Expect(err).NotTo(HaveOccurred()) 3327 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3328 3329 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3330 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3331 Value: lastConfigValue, 3332 }) 3333 3334 c.ledger[b.Header.Number] = b 3335 if c.ledgerHeight < b.Header.Number+1 { 3336 c.ledgerHeight = b.Header.Number + 1 3337 } 3338 } 3339 3340 // receives config blocks and metadata and appends it into 3341 // the ledger struct to simulate write behaviour 3342 appendConfigBlockToLedger := func(b *common.Block, meta []byte) { 3343 c.ledgerLock.Lock() 3344 defer c.ledgerLock.Unlock() 3345 3346 b = proto.Clone(b).(*common.Block) 3347 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3348 Expect(err).NotTo(HaveOccurred()) 3349 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3350 3351 c.lastConfigBlockNumber = b.Header.Number 3352 3353 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3354 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3355 Value: lastConfigValue, 3356 }) 3357 3358 c.ledger[b.Header.Number] = b 3359 if c.ledgerHeight < b.Header.Number+1 { 3360 c.ledgerHeight = b.Header.Number + 1 3361 } 3362 } 3363 3364 c.support.WriteBlockStub = appendNormalBlockToLedger 3365 c.support.WriteConfigBlockStub = appendConfigBlockToLedger 3366 3367 // returns current ledger height 3368 c.support.HeightStub = func() uint64 { 3369 c.ledgerLock.RLock() 3370 defer c.ledgerLock.RUnlock() 3371 return c.ledgerHeight 3372 } 3373 3374 // reads block from the ledger 3375 c.support.BlockStub = func(number uint64) *common.Block { 3376 c.ledgerLock.RLock() 3377 defer c.ledgerLock.RUnlock() 3378 return c.ledger[number] 3379 } 3380 3381 return c 3382 } 3383 3384 func (c *chain) init() { 3385 ch, err := etcdraft.NewChain( 3386 c.support, 3387 c.opts, 3388 c.configurator, 3389 c.rpc, 3390 c.cryptoProvider, 3391 func() (etcdraft.BlockPuller, error) { return c.puller, nil }, 3392 nil, 3393 c.observe, 3394 ) 3395 Expect(err).NotTo(HaveOccurred()) 3396 c.Chain = ch 3397 } 3398 3399 func (c *chain) start() { 3400 c.unstarted = nil 3401 c.Start() 3402 } 3403 3404 func (c *chain) setStepFunc(f stepFunc) { 3405 c.stepLock.Lock() 3406 c.step = f 3407 c.stepLock.Unlock() 3408 } 3409 3410 func (c *chain) getStepFunc() stepFunc { 3411 c.stepLock.Lock() 3412 defer c.stepLock.Unlock() 3413 return c.step 3414 } 3415 3416 type network struct { 3417 sync.RWMutex 3418 3419 leader uint64 3420 chains map[uint64]*chain 3421 3422 // links simulates the configuration of comm layer (link is bi-directional). 3423 // if links[left][right] == true, right can send msg to left. 3424 links map[uint64]map[uint64]bool 3425 // connectivity determines if a node is connected to network. This is used for tests 3426 // to simulate network partition. 3427 connectivity map[uint64]bool 3428 } 3429 3430 func (n *network) link(from []uint64, to uint64) { 3431 links := make(map[uint64]bool) 3432 for _, id := range from { 3433 links[id] = true 3434 } 3435 3436 n.Lock() 3437 defer n.Unlock() 3438 3439 n.links[to] = links 3440 } 3441 3442 func (n *network) linked(from, to uint64) bool { 3443 n.RLock() 3444 defer n.RUnlock() 3445 3446 return n.links[to][from] 3447 } 3448 3449 func (n *network) connect(id uint64) { 3450 n.Lock() 3451 defer n.Unlock() 3452 3453 n.connectivity[id] = true 3454 } 3455 3456 func (n *network) disconnect(id uint64) { 3457 n.Lock() 3458 defer n.Unlock() 3459 3460 n.connectivity[id] = false 3461 } 3462 3463 func (n *network) connected(id uint64) bool { 3464 n.RLock() 3465 defer n.RUnlock() 3466 3467 return n.connectivity[id] 3468 } 3469 3470 func (n *network) addChain(c *chain) { 3471 n.connect(c.id) // chain is connected by default 3472 3473 c.step = func(dest uint64, msg *orderer.ConsensusRequest) error { 3474 if !n.linked(c.id, dest) { 3475 return errors.Errorf("connection refused") 3476 } 3477 3478 if !n.connected(c.id) || !n.connected(dest) { 3479 return errors.Errorf("connection lost") 3480 } 3481 3482 n.RLock() 3483 target := n.chains[dest] 3484 n.RUnlock() 3485 go func() { 3486 defer GinkgoRecover() 3487 target.Consensus(msg, c.id) 3488 }() 3489 return nil 3490 } 3491 3492 c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error { 3493 c.stepLock.Lock() 3494 defer c.stepLock.Unlock() 3495 return c.step(dest, msg) 3496 } 3497 3498 c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest) error { 3499 if !n.linked(c.id, dest) { 3500 return errors.Errorf("connection refused") 3501 } 3502 3503 if !n.connected(c.id) || !n.connected(dest) { 3504 return errors.Errorf("connection lost") 3505 } 3506 3507 n.RLock() 3508 target := n.chains[dest] 3509 n.RUnlock() 3510 go func() { 3511 defer GinkgoRecover() 3512 target.Submit(msg, c.id) 3513 }() 3514 return nil 3515 } 3516 3517 c.puller.PullBlockStub = func(i uint64) *common.Block { 3518 n.RLock() 3519 leaderChain := n.chains[n.leader] 3520 n.RUnlock() 3521 3522 leaderChain.ledgerLock.RLock() 3523 defer leaderChain.ledgerLock.RUnlock() 3524 block := leaderChain.ledger[i] 3525 return block 3526 } 3527 3528 c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) { 3529 n.RLock() 3530 leader := n.chains[n.leader] 3531 n.RUnlock() 3532 3533 if leader == nil { 3534 return nil, errors.Errorf("ledger not available") 3535 } 3536 3537 leader.ledgerLock.RLock() 3538 defer leader.ledgerLock.RUnlock() 3539 return map[string]uint64{"leader": leader.ledgerHeight}, nil 3540 } 3541 3542 c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) { 3543 var ids []uint64 3544 for _, node := range nodes { 3545 ids = append(ids, node.ID) 3546 } 3547 n.link(ids, c.id) 3548 }) 3549 3550 n.Lock() 3551 defer n.Unlock() 3552 n.chains[c.id] = c 3553 } 3554 3555 func createNetwork( 3556 timeout time.Duration, 3557 channel, dataDir string, 3558 raftMetadata *raftprotos.BlockMetadata, 3559 consenters map[uint64]*raftprotos.Consenter, 3560 cryptoProvider bccsp.BCCSP, 3561 ) *network { 3562 n := &network{ 3563 chains: make(map[uint64]*chain), 3564 connectivity: make(map[uint64]bool), 3565 links: make(map[uint64]map[uint64]bool), 3566 } 3567 3568 for _, nodeID := range raftMetadata.ConsenterIds { 3569 dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID)) 3570 Expect(err).NotTo(HaveOccurred()) 3571 3572 m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata) 3573 n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider)) 3574 } 3575 3576 return n 3577 } 3578 3579 // tests could alter configuration of a chain before creating it 3580 func (n *network) init() { 3581 n.exec(func(c *chain) { c.init() }) 3582 } 3583 3584 func (n *network) start(ids ...uint64) { 3585 nodes := ids 3586 if len(nodes) == 0 { 3587 for i := range n.chains { 3588 nodes = append(nodes, i) 3589 } 3590 } 3591 3592 for _, id := range nodes { 3593 n.chains[id].start() 3594 3595 // When the Raft node bootstraps, it produces a ConfChange 3596 // to add itself, which needs to be consumed with Ready(). 3597 // If there are pending configuration changes in raft, 3598 // it refused to campaign, no matter how many ticks supplied. 3599 // This is not a problem in production code because eventually 3600 // raft.Ready will be consumed as real time goes by. 3601 // 3602 // However, this is problematic when using fake clock and artificial 3603 // ticks. Instead of ticking raft indefinitely until raft.Ready is 3604 // consumed, this check is added to indirectly guarantee 3605 // that first ConfChange is actually consumed and we can safely 3606 // proceed to tick raft. 3607 Eventually(func() error { 3608 _, err := n.chains[id].storage.Entries(1, 1, 1) 3609 return err 3610 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 3611 Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred()) 3612 } 3613 } 3614 3615 func (n *network) stop(ids ...uint64) { 3616 nodes := ids 3617 if len(nodes) == 0 { 3618 for i := range n.chains { 3619 nodes = append(nodes, i) 3620 } 3621 } 3622 3623 for _, id := range nodes { 3624 c := n.chains[id] 3625 c.Halt() 3626 Eventually(c.Errored).Should(BeClosed()) 3627 select { 3628 case <-c.stopped: 3629 default: 3630 close(c.stopped) 3631 } 3632 } 3633 } 3634 3635 func (n *network) exec(f func(c *chain), ids ...uint64) { 3636 if len(ids) == 0 { 3637 for _, c := range n.chains { 3638 f(c) 3639 } 3640 3641 return 3642 } 3643 3644 for _, i := range ids { 3645 f(n.chains[i]) 3646 } 3647 } 3648 3649 // connect a node to network and tick leader to trigger 3650 // a heartbeat so newly joined node can detect leader. 3651 // 3652 // expectLeaderChange controls whether leader change should 3653 // be observed on newly joined node. 3654 // - it should be true if newly joined node was leader 3655 // - it should be false if newly joined node was follower, and 3656 // already knows the leader. 3657 func (n *network) join(id uint64, expectLeaderChange bool) { 3658 n.connect(id) 3659 3660 n.RLock() 3661 leader, follower := n.chains[n.leader], n.chains[id] 3662 n.RUnlock() 3663 3664 step := leader.getStepFunc() 3665 signal := make(chan struct{}) 3666 leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 3667 if dest == id { 3668 // close signal channel when a message targeting newly 3669 // joined node is observed on wire. 3670 select { 3671 case <-signal: 3672 default: 3673 close(signal) 3674 } 3675 } 3676 3677 return step(dest, msg) 3678 }) 3679 3680 // Tick leader so it sends out a heartbeat to new node. 3681 // One tick _may_ not be enough because leader might be busy 3682 // and this tick is droppped on the floor. 3683 Eventually(func() <-chan struct{} { 3684 leader.clock.Increment(interval) 3685 return signal 3686 }, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed()) 3687 3688 leader.setStepFunc(step) 3689 3690 if expectLeaderChange { 3691 Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower}))) 3692 } 3693 3694 // wait for newly joined node to catch up with leader 3695 i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex() 3696 Expect(err).NotTo(HaveOccurred()) 3697 Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i)) 3698 } 3699 3700 // elect deterministically elects a node as leader 3701 func (n *network) elect(id uint64) { 3702 n.RLock() 3703 // skip observing leader change on followers if the same leader is elected as the previous one, 3704 // because this may happen too quickly from a slow follower's point of view, and 0 -> X transition 3705 // may not be omitted at all. 3706 observeFollowers := id != n.leader 3707 candidate := n.chains[id] 3708 var followers []*chain 3709 for _, c := range n.chains { 3710 if c.id != id { 3711 followers = append(followers, c) 3712 } 3713 } 3714 n.RUnlock() 3715 3716 // Send node an artificial MsgTimeoutNow to emulate leadership transfer. 3717 fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id) 3718 candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow})}, 0) 3719 Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader))) 3720 3721 n.Lock() 3722 n.leader = id 3723 n.Unlock() 3724 3725 if !observeFollowers { 3726 return 3727 } 3728 3729 // now observe leader change on other nodes 3730 for _, c := range followers { 3731 if c.id == id { 3732 continue 3733 } 3734 3735 select { 3736 case <-c.stopped: // skip check if node n is stopped 3737 case <-c.unstarted: // skip check if node is not started yet 3738 default: 3739 if n.linked(c.id, id) && n.connected(c.id) { 3740 Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower))) 3741 } 3742 } 3743 } 3744 3745 } 3746 3747 // sets the configEnv var declared above 3748 func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope { 3749 return &common.Envelope{ 3750 Payload: marshalOrPanic(&common.Payload{ 3751 Header: &common.Header{ 3752 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3753 Type: int32(headerType), 3754 ChannelId: chainID, 3755 }), 3756 }, 3757 Data: marshalOrPanic(&common.ConfigEnvelope{ 3758 LastUpdate: &common.Envelope{ 3759 Payload: marshalOrPanic(&common.Payload{ 3760 Header: &common.Header{ 3761 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3762 Type: int32(common.HeaderType_CONFIG_UPDATE), 3763 ChannelId: chainID, 3764 }), 3765 }, 3766 Data: marshalOrPanic(configUpdateEnv), 3767 }), // common.Payload 3768 }, // LastUpdate 3769 }), 3770 }), 3771 } 3772 } 3773 3774 func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope { 3775 return &common.ConfigUpdateEnvelope{ 3776 ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{ 3777 ChannelId: chainID, 3778 ReadSet: &common.ConfigGroup{ 3779 Groups: map[string]*common.ConfigGroup{ 3780 "Orderer": { 3781 Values: oldValues, 3782 }, 3783 }, 3784 }, 3785 WriteSet: &common.ConfigGroup{ 3786 Groups: map[string]*common.ConfigGroup{ 3787 "Orderer": { 3788 Values: newValues, 3789 }, 3790 }, 3791 }, // WriteSet 3792 }), 3793 } 3794 } 3795 3796 func getSeedBlock() *common.Block { 3797 return &common.Block{ 3798 Header: &common.BlockHeader{}, 3799 Data: &common.BlockData{Data: [][]byte{[]byte("foo")}}, 3800 Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)}, 3801 } 3802 } 3803 3804 func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher { 3805 return Equal(raft.SoftState{Lead: lead, RaftState: state}) 3806 } 3807 3808 func BeLeader() types.GomegaMatcher { 3809 return &StateMatcher{expect: raft.StateLeader} 3810 } 3811 3812 func BeFollower() types.GomegaMatcher { 3813 return &StateMatcher{expect: raft.StateFollower} 3814 } 3815 3816 type StateMatcher struct { 3817 expect raft.StateType 3818 } 3819 3820 func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) { 3821 state, ok := actual.(raft.SoftState) 3822 if !ok { 3823 return false, errors.Errorf("StateMatcher expects a raft SoftState") 3824 } 3825 3826 return state.RaftState == stmatcher.expect, nil 3827 } 3828 3829 func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) { 3830 state, ok := actual.(raft.SoftState) 3831 if !ok { 3832 return "StateMatcher expects a raft SoftState" 3833 } 3834 3835 return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect) 3836 } 3837 3838 func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) { 3839 state, ok := actual.(raft.SoftState) 3840 if !ok { 3841 return "StateMatcher expects a raft SoftState" 3842 } 3843 3844 return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect) 3845 } 3846 3847 func noOpBlockPuller() (etcdraft.BlockPuller, error) { 3848 bp := &mocks.FakeBlockPuller{} 3849 return bp, nil 3850 }