github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/consensus/etcdraft/chain_test.go (about) 1 /* 2 Copyright hechain. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft_test 8 9 import ( 10 "encoding/pem" 11 "fmt" 12 "io/ioutil" 13 "os" 14 "os/user" 15 "path" 16 "sync" 17 "time" 18 19 "code.cloudfoundry.org/clock/fakeclock" 20 "github.com/golang/protobuf/proto" 21 "github.com/hechain20/hechain/bccsp" 22 "github.com/hechain20/hechain/bccsp/factory" 23 "github.com/hechain20/hechain/bccsp/sw" 24 "github.com/hechain20/hechain/common/channelconfig" 25 "github.com/hechain20/hechain/common/crypto/tlsgen" 26 "github.com/hechain20/hechain/common/flogging" 27 "github.com/hechain20/hechain/orderer/common/cluster" 28 orderer_types "github.com/hechain20/hechain/orderer/common/types" 29 "github.com/hechain20/hechain/orderer/consensus/etcdraft" 30 "github.com/hechain20/hechain/orderer/consensus/etcdraft/mocks" 31 consensusmocks "github.com/hechain20/hechain/orderer/consensus/mocks" 32 mockblockcutter "github.com/hechain20/hechain/orderer/mocks/common/blockcutter" 33 "github.com/hechain20/hechain/protoutil" 34 "github.com/hyperledger/fabric-protos-go/common" 35 "github.com/hyperledger/fabric-protos-go/orderer" 36 raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 37 . "github.com/onsi/ginkgo" 38 . "github.com/onsi/gomega" 39 "github.com/onsi/gomega/types" 40 "github.com/pkg/errors" 41 "go.etcd.io/etcd/raft" 42 "go.etcd.io/etcd/raft/raftpb" 43 "go.uber.org/zap" 44 ) 45 46 const ( 47 interval = 100 * time.Millisecond 48 LongEventualTimeout = 10 * time.Second 49 50 // 10 is the default setting of ELECTION_TICK. 51 // We used to have a small number here (2) to reduce the time for test - we don't 52 // need to tick node 10 times to trigger election - however, we are using another 53 // mechanism to trigger it now which does not depend on time: send an artificial 54 // MsgTimeoutNow to node. 55 ELECTION_TICK = 10 56 HEARTBEAT_TICK = 1 57 ) 58 59 //go:generate counterfeiter -o mocks/halt_callbacker.go --fake-name HaltCallbacker . haltCallbacker 60 type haltCallbacker interface { 61 HaltCallback() 62 } 63 64 func init() { 65 factory.InitFactories(nil) 66 } 67 68 func mockOrderer(metadata []byte) *mocks.OrdererConfig { 69 return mockOrdererWithBatchTimeout(time.Second, metadata) 70 } 71 72 func mockOrdererWithBatchTimeout(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig { 73 mockOrderer := &mocks.OrdererConfig{} 74 mockOrderer.BatchTimeoutReturns(batchTimeout) 75 mockOrderer.ConsensusMetadataReturns(metadata) 76 return mockOrderer 77 } 78 79 func mockOrdererWithTLSRootCert(batchTimeout time.Duration, metadata []byte, tlsCA tlsgen.CA) *mocks.OrdererConfig { 80 mockOrderer := mockOrdererWithBatchTimeout(batchTimeout, metadata) 81 mockOrg := &mocks.OrdererOrg{} 82 mockMSP := &mocks.MSP{} 83 mockMSP.GetTLSRootCertsReturns([][]byte{tlsCA.CertBytes()}) 84 mockOrg.MSPReturns(mockMSP) 85 mockOrderer.OrganizationsReturns(map[string]channelconfig.OrdererOrg{ 86 "fake-org": mockOrg, 87 }) 88 return mockOrderer 89 } 90 91 // for some test cases we chmod file/dir to test failures caused by exotic permissions. 92 // however this does not work if tests are running as root, i.e. in a container. 93 func skipIfRoot() { 94 u, err := user.Current() 95 Expect(err).NotTo(HaveOccurred()) 96 if u.Uid == "0" { 97 Skip("you are running test as root, there's no way to make files unreadable") 98 } 99 } 100 101 var _ = Describe("Chain", func() { 102 var ( 103 env *common.Envelope 104 channelID string 105 tlsCA tlsgen.CA 106 logger *flogging.FabricLogger 107 ) 108 109 BeforeEach(func() { 110 tlsCA, _ = tlsgen.NewCA() 111 channelID = "test-channel" 112 logger = flogging.NewFabricLogger(zap.NewExample()) 113 env = &common.Envelope{ 114 Payload: marshalOrPanic(&common.Payload{ 115 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 116 Data: []byte("TEST_MESSAGE"), 117 }), 118 } 119 }) 120 121 Describe("Single Raft node", func() { 122 var ( 123 configurator *mocks.FakeConfigurator 124 consenterMetadata *raftprotos.ConfigMetadata 125 consenters map[uint64]*raftprotos.Consenter 126 clock *fakeclock.FakeClock 127 opts etcdraft.Options 128 support *consensusmocks.FakeConsenterSupport 129 cutter *mockblockcutter.Receiver 130 storage *raft.MemoryStorage 131 observeC chan raft.SoftState 132 chain *etcdraft.Chain 133 dataDir string 134 walDir string 135 snapDir string 136 err error 137 fakeFields *fakeMetricsFields 138 cryptoProvider bccsp.BCCSP 139 fakeHaltCallbacker *mocks.HaltCallbacker 140 ) 141 142 BeforeEach(func() { 143 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 144 Expect(err).NotTo(HaveOccurred()) 145 146 configurator = &mocks.FakeConfigurator{} 147 clock = fakeclock.NewFakeClock(time.Now()) 148 storage = raft.NewMemoryStorage() 149 150 dataDir, err = ioutil.TempDir("", "wal-") 151 Expect(err).NotTo(HaveOccurred()) 152 walDir = path.Join(dataDir, "wal") 153 snapDir = path.Join(dataDir, "snapshot") 154 155 observeC = make(chan raft.SoftState, 1) 156 157 support = &consensusmocks.FakeConsenterSupport{} 158 support.ChannelIDReturns(channelID) 159 consenterMetadata = createMetadata(1, tlsCA) 160 support.SharedConfigReturns(mockOrdererWithTLSRootCert(time.Hour, marshalOrPanic(consenterMetadata), tlsCA)) 161 162 cutter = mockblockcutter.NewReceiver() 163 support.BlockCutterReturns(cutter) 164 165 // for block creator initialization 166 support.HeightReturns(1) 167 support.BlockReturns(getSeedBlock()) 168 169 meta := &raftprotos.BlockMetadata{ 170 ConsenterIds: make([]uint64, len(consenterMetadata.Consenters)), 171 NextConsenterId: 1, 172 } 173 174 for i := range meta.ConsenterIds { 175 meta.ConsenterIds[i] = meta.NextConsenterId 176 meta.NextConsenterId++ 177 } 178 179 consenters = map[uint64]*raftprotos.Consenter{} 180 for i, c := range consenterMetadata.Consenters { 181 consenters[meta.ConsenterIds[i]] = c 182 } 183 184 fakeFields = newFakeMetricsFields() 185 186 opts = etcdraft.Options{ 187 RPCTimeout: time.Second * 5, 188 RaftID: 1, 189 Clock: clock, 190 TickInterval: interval, 191 ElectionTick: ELECTION_TICK, 192 HeartbeatTick: HEARTBEAT_TICK, 193 MaxSizePerMsg: 1024 * 1024, 194 MaxInflightBlocks: 256, 195 BlockMetadata: meta, 196 Consenters: consenters, 197 Logger: logger, 198 MemoryStorage: storage, 199 WALDir: walDir, 200 SnapDir: snapDir, 201 Metrics: newFakeMetrics(fakeFields), 202 } 203 204 fakeHaltCallbacker = &mocks.HaltCallbacker{} 205 }) 206 207 campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) { 208 Eventually(func() <-chan raft.SoftState { 209 c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: 1})}, 0) 210 return observeC 211 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 212 } 213 214 JustBeforeEach(func() { 215 chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, fakeHaltCallbacker.HaltCallback, observeC) 216 Expect(err).NotTo(HaveOccurred()) 217 218 chain.Start() 219 cRel, status := chain.StatusReport() 220 Expect(cRel).To(Equal(orderer_types.ConsensusRelationConsenter)) 221 Expect(status).To(Equal(orderer_types.StatusActive)) 222 223 // When the Raft node bootstraps, it produces a ConfChange 224 // to add itself, which needs to be consumed with Ready(). 225 // If there are pending configuration changes in raft, 226 // it refuses to campaign, no matter how many ticks elapse. 227 // This is not a problem in the production code because raft.Ready 228 // will be consumed eventually, as the wall clock advances. 229 // 230 // However, this is problematic when using the fake clock and 231 // artificial ticks. Instead of ticking raft indefinitely until 232 // raft.Ready is consumed, this check is added to indirectly guarantee 233 // that the first ConfChange is actually consumed and we can safely 234 // proceed to tick the Raft FSM. 235 Eventually(func() error { 236 _, err := storage.Entries(1, 1, 1) 237 return err 238 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 239 }) 240 241 AfterEach(func() { 242 chain.Halt() 243 Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed()) 244 // Make sure no timer leak 245 Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 246 os.RemoveAll(dataDir) 247 }) 248 249 Context("when a node starts up", func() { 250 It("properly configures the communication layer", func() { 251 expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata) 252 Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1)) 253 _, arg2 := configurator.ConfigureArgsForCall(0) 254 Expect(arg2).To(Equal(expectedNodeConfig)) 255 }) 256 257 It("correctly sets the metrics labels and publishes requisite metrics", func() { 258 type withImplementers interface { 259 WithCallCount() int 260 WithArgsForCall(int) []string 261 } 262 metricsList := []withImplementers{ 263 fakeFields.fakeClusterSize, 264 fakeFields.fakeIsLeader, 265 fakeFields.fakeActiveNodes, 266 fakeFields.fakeCommittedBlockNumber, 267 fakeFields.fakeSnapshotBlockNumber, 268 fakeFields.fakeLeaderChanges, 269 fakeFields.fakeProposalFailures, 270 fakeFields.fakeDataPersistDuration, 271 fakeFields.fakeNormalProposalsReceived, 272 fakeFields.fakeConfigProposalsReceived, 273 } 274 for _, m := range metricsList { 275 Expect(m.WithCallCount()).To(Equal(1)) 276 Expect(func() string { 277 return m.WithArgsForCall(0)[1] 278 }()).To(Equal(channelID)) 279 } 280 281 Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1)) 282 Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1))) 283 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1)) 284 Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0))) 285 Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1)) 286 Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0))) 287 }) 288 }) 289 290 Context("when no Raft leader is elected", func() { 291 It("fails to order envelope", func() { 292 err := chain.Order(env, 0) 293 Expect(err).To(MatchError("no Raft leader")) 294 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 295 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 296 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0)) 297 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 298 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 299 }) 300 301 It("starts proactive campaign", func() { 302 // assert that even tick supplied are less than ELECTION_TIMEOUT, 303 // a leader can still be successfully elected. 304 for i := 0; i < ELECTION_TICK; i++ { 305 clock.Increment(interval) 306 time.Sleep(10 * time.Millisecond) 307 } 308 Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 309 }) 310 }) 311 312 Context("when Raft leader is elected", func() { 313 JustBeforeEach(func() { 314 campaign(chain, observeC) 315 }) 316 317 It("updates metrics upon leader election", func() { 318 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2)) 319 Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1))) 320 Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1)) 321 Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1))) 322 }) 323 324 It("fails to order envelope if chain is halted", func() { 325 chain.Halt() 326 err := chain.Order(env, 0) 327 Expect(err).To(MatchError("chain is stopped")) 328 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 329 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 330 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 331 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 332 }) 333 334 It("produces blocks following batch rules", func() { 335 close(cutter.Block) 336 337 By("cutting next batch directly") 338 cutter.CutNext = true 339 err := chain.Order(env, 0) 340 Expect(err).NotTo(HaveOccurred()) 341 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 342 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 343 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 344 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 345 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 346 347 // There are three calls to DataPersistDuration by now corresponding to the following three 348 // arriving on the Ready channel: 349 // 1. an EntryConfChange to let this node join the Raft cluster 350 // 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader 351 // 3. a block being committed 352 // The duration being emitted is zero since we don't tick the fake clock during this time 353 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3)) 354 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0))) 355 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0))) 356 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0))) 357 358 By("respecting batch timeout") 359 cutter.CutNext = false 360 timeout := time.Second 361 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 362 err = chain.Order(env, 0) 363 Expect(err).NotTo(HaveOccurred()) 364 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 365 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 366 367 clock.WaitForNWatchersAndIncrement(timeout, 2) 368 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 369 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 370 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 371 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4)) 372 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0))) 373 }) 374 375 It("does not reset timer for every envelope", func() { 376 close(cutter.Block) 377 378 timeout := time.Second 379 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 380 381 err := chain.Order(env, 0) 382 Expect(err).NotTo(HaveOccurred()) 383 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 384 385 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 386 387 err = chain.Order(env, 0) 388 Expect(err).NotTo(HaveOccurred()) 389 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2)) 390 391 // the second envelope should not reset the timer; it should 392 // therefore expire if we increment it by just timeout/2 393 clock.Increment(timeout / 2) 394 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 395 }) 396 397 It("does not write a block if halted before timeout", func() { 398 close(cutter.Block) 399 timeout := time.Second 400 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 401 402 err := chain.Order(env, 0) 403 Expect(err).NotTo(HaveOccurred()) 404 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 405 406 // wait for timer to start 407 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 408 409 chain.Halt() 410 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 411 }) 412 413 It("stops the timer if a batch is cut", func() { 414 close(cutter.Block) 415 416 timeout := time.Second 417 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 418 419 err := chain.Order(env, 0) 420 Expect(err).NotTo(HaveOccurred()) 421 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 422 423 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 424 425 By("force a batch to be cut before timer expires") 426 cutter.CutNext = true 427 err = chain.Order(env, 0) 428 Expect(err).NotTo(HaveOccurred()) 429 430 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 431 b, _ := support.WriteBlockArgsForCall(0) 432 Expect(b.Data.Data).To(HaveLen(2)) 433 Expect(cutter.CurBatch()).To(HaveLen(0)) 434 435 // this should start a fresh timer 436 cutter.CutNext = false 437 err = chain.Order(env, 0) 438 Expect(err).NotTo(HaveOccurred()) 439 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 440 441 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 442 Consistently(support.WriteBlockCallCount).Should(Equal(1)) 443 444 clock.Increment(timeout / 2) 445 446 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 447 b, _ = support.WriteBlockArgsForCall(1) 448 Expect(b.Data.Data).To(HaveLen(1)) 449 }) 450 451 It("cut two batches if incoming envelope does not fit into first batch", func() { 452 close(cutter.Block) 453 454 timeout := time.Second 455 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 456 457 err := chain.Order(env, 0) 458 Expect(err).NotTo(HaveOccurred()) 459 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 460 461 cutter.IsolatedTx = true 462 err = chain.Order(env, 0) 463 Expect(err).NotTo(HaveOccurred()) 464 465 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 466 }) 467 468 Context("revalidation", func() { 469 BeforeEach(func() { 470 close(cutter.Block) 471 472 timeout := time.Hour 473 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 474 support.SequenceReturns(1) 475 }) 476 477 It("enqueue if envelope is still valid", func() { 478 support.ProcessNormalMsgReturns(1, nil) 479 480 err := chain.Order(env, 0) 481 Expect(err).NotTo(HaveOccurred()) 482 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 483 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 484 }) 485 486 It("does not enqueue if envelope is not valid", func() { 487 support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid")) 488 489 err := chain.Order(env, 0) 490 Expect(err).NotTo(HaveOccurred()) 491 Consistently(cutter.CurBatch).Should(HaveLen(0)) 492 Consistently(clock.WatcherCount).Should(Equal(1)) 493 }) 494 }) 495 496 It("unblocks Errored if chain is halted", func() { 497 errorC := chain.Errored() 498 Expect(errorC).NotTo(BeClosed()) 499 chain.Halt() 500 Eventually(errorC, LongEventualTimeout).Should(BeClosed()) 501 }) 502 503 It("does not call the halt callback function when halting externally", func() { 504 chain.Halt() 505 Consistently(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(0)) 506 }) 507 508 Describe("Config updates", func() { 509 var ( 510 configEnv *common.Envelope 511 configSeq uint64 512 ) 513 514 Context("when a type A config update comes", func() { 515 Context("for existing channel", func() { 516 // use to prepare the Orderer Values 517 BeforeEach(func() { 518 newValues := map[string]*common.ConfigValue{ 519 "BatchTimeout": { 520 Version: 1, 521 Value: marshalOrPanic(&orderer.BatchTimeout{ 522 Timeout: "3ms", 523 }), 524 }, 525 "ConsensusType": { 526 Version: 4, 527 }, 528 } 529 oldValues := map[string]*common.ConfigValue{ 530 "ConsensusType": { 531 Version: 4, 532 }, 533 } 534 configEnv = newConfigEnv(channelID, 535 common.HeaderType_CONFIG, 536 newConfigUpdateEnv(channelID, oldValues, newValues), 537 ) 538 configSeq = 0 539 }) // BeforeEach block 540 541 Context("without revalidation (i.e. correct config sequence)", func() { 542 Context("without pending normal envelope", func() { 543 It("should create a config block and no normal block", func() { 544 err := chain.Configure(configEnv, configSeq) 545 Expect(err).NotTo(HaveOccurred()) 546 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 547 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 548 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 549 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 550 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 551 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 552 }) 553 }) 554 555 Context("with pending normal envelope", func() { 556 It("should create a normal block and a config block", func() { 557 // We do not need to block the cutter from ordering in our test case and therefore close this channel. 558 close(cutter.Block) 559 560 By("adding a normal envelope") 561 err := chain.Order(env, 0) 562 Expect(err).NotTo(HaveOccurred()) 563 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 564 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 565 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 566 567 By("adding a config envelope") 568 err = chain.Configure(configEnv, configSeq) 569 Expect(err).NotTo(HaveOccurred()) 570 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 571 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 572 573 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 574 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 575 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 576 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 577 }) 578 }) 579 }) 580 581 Context("with revalidation (i.e. incorrect config sequence)", func() { 582 BeforeEach(func() { 583 close(cutter.Block) 584 support.SequenceReturns(1) // this causes the revalidation 585 }) 586 587 It("should create config block upon correct revalidation", func() { 588 support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation 589 590 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 591 Consistently(clock.WatcherCount).Should(Equal(1)) 592 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 593 }) 594 595 It("should not create config block upon incorrect revalidation", func() { 596 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 597 598 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 599 Consistently(clock.WatcherCount).Should(Equal(1)) 600 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock 601 }) 602 603 It("should not disturb current running timer upon incorrect revalidation", func() { 604 support.ProcessNormalMsgReturns(1, nil) 605 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 606 607 Expect(chain.Order(env, configSeq)).To(Succeed()) 608 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 609 610 clock.Increment(30 * time.Minute) 611 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 612 613 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 614 Consistently(clock.WatcherCount).Should(Equal(2)) 615 616 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 617 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) 618 619 clock.Increment(30 * time.Minute) 620 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 621 }) 622 }) 623 }) 624 625 Context("for creating a new channel", func() { 626 // use to prepare the Orderer Values 627 BeforeEach(func() { 628 chainID := "mychannel" 629 values := make(map[string]*common.ConfigValue) 630 configEnv = newConfigEnv(chainID, 631 common.HeaderType_CONFIG, 632 newConfigUpdateEnv(chainID, nil, values), 633 ) 634 configSeq = 0 635 }) // BeforeEach block 636 637 It("should be able to create a channel", func() { 638 err := chain.Configure(configEnv, configSeq) 639 Expect(err).NotTo(HaveOccurred()) 640 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 641 }) 642 }) 643 }) // Context block for type A config 644 645 Context("when a type B config update comes", func() { 646 Context("updating protocol values", func() { 647 // use to prepare the Orderer Values 648 BeforeEach(func() { 649 values := map[string]*common.ConfigValue{ 650 "ConsensusType": { 651 Version: 1, 652 Value: marshalOrPanic(&orderer.ConsensusType{ 653 Metadata: marshalOrPanic(consenterMetadata), 654 }), 655 }, 656 } 657 configEnv = newConfigEnv(channelID, 658 common.HeaderType_CONFIG, 659 newConfigUpdateEnv(channelID, nil, values)) 660 configSeq = 0 661 }) // BeforeEach block 662 663 It("should be able to process config update of type B", func() { 664 err := chain.Configure(configEnv, configSeq) 665 Expect(err).NotTo(HaveOccurred()) 666 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 667 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 668 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 669 }) 670 }) 671 672 Context("updating consenters set by exactly one node", func() { 673 It("should be able to process config update adding single node", func() { 674 metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata) 675 metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{ 676 Host: "localhost", 677 Port: 7050, 678 ServerTlsCert: serverTLSCert(tlsCA), 679 ClientTlsCert: clientTLSCert(tlsCA), 680 }) 681 682 values := map[string]*common.ConfigValue{ 683 "ConsensusType": { 684 Version: 1, 685 Value: marshalOrPanic(&orderer.ConsensusType{ 686 Metadata: marshalOrPanic(metadata), 687 }), 688 }, 689 } 690 configEnv = newConfigEnv(channelID, 691 common.HeaderType_CONFIG, 692 newConfigUpdateEnv(channelID, nil, values)) 693 configSeq = 0 694 695 err := chain.Configure(configEnv, configSeq) 696 Expect(err).NotTo(HaveOccurred()) 697 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 698 }) 699 }) 700 }) 701 }) 702 703 Describe("Crash Fault Tolerance", func() { 704 var raftMetadata *raftprotos.BlockMetadata 705 706 BeforeEach(func() { 707 raftMetadata = &raftprotos.BlockMetadata{ 708 ConsenterIds: []uint64{1}, 709 NextConsenterId: 2, 710 } 711 }) 712 713 Describe("when a chain is started with existing WAL", func() { 714 var ( 715 m1 *raftprotos.BlockMetadata 716 m2 *raftprotos.BlockMetadata 717 ) 718 JustBeforeEach(func() { 719 // to generate WAL data, we start a chain, 720 // order several envelopes and then halt the chain. 721 close(cutter.Block) 722 cutter.CutNext = true 723 724 // enque some data to be persisted on disk by raft 725 err := chain.Order(env, uint64(0)) 726 Expect(err).NotTo(HaveOccurred()) 727 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 728 729 _, metadata := support.WriteBlockArgsForCall(0) 730 m1 = &raftprotos.BlockMetadata{} 731 proto.Unmarshal(metadata, m1) 732 733 err = chain.Order(env, uint64(0)) 734 Expect(err).NotTo(HaveOccurred()) 735 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 736 737 _, metadata = support.WriteBlockArgsForCall(1) 738 m2 = &raftprotos.BlockMetadata{} 739 proto.Unmarshal(metadata, m2) 740 741 chain.Halt() 742 }) 743 744 It("replays blocks from committed entries", func() { 745 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 746 c.init() 747 c.Start() 748 defer c.Halt() 749 750 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 751 752 _, metadata := c.support.WriteBlockArgsForCall(0) 753 m := &raftprotos.BlockMetadata{} 754 proto.Unmarshal(metadata, m) 755 Expect(m.RaftIndex).To(Equal(m1.RaftIndex)) 756 757 _, metadata = c.support.WriteBlockArgsForCall(1) 758 m = &raftprotos.BlockMetadata{} 759 proto.Unmarshal(metadata, m) 760 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 761 762 // chain should keep functioning 763 campaign(c.Chain, c.observe) 764 765 c.cutter.CutNext = true 766 767 err := c.Order(env, uint64(0)) 768 Expect(err).NotTo(HaveOccurred()) 769 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 770 }) 771 772 It("only replays blocks after Applied index", func() { 773 raftMetadata.RaftIndex = m1.RaftIndex 774 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 775 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 776 777 c.init() 778 c.Start() 779 defer c.Halt() 780 781 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 782 783 _, metadata := c.support.WriteBlockArgsForCall(1) 784 m := &raftprotos.BlockMetadata{} 785 proto.Unmarshal(metadata, m) 786 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 787 788 // chain should keep functioning 789 campaign(c.Chain, c.observe) 790 791 c.cutter.CutNext = true 792 793 err := c.Order(env, uint64(0)) 794 Expect(err).NotTo(HaveOccurred()) 795 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 796 }) 797 798 It("does not replay any block if already in sync", func() { 799 raftMetadata.RaftIndex = m2.RaftIndex 800 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 801 c.init() 802 c.Start() 803 defer c.Halt() 804 805 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 806 807 // chain should keep functioning 808 campaign(c.Chain, c.observe) 809 810 c.cutter.CutNext = true 811 812 err := c.Order(env, uint64(0)) 813 Expect(err).NotTo(HaveOccurred()) 814 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 815 }) 816 817 Context("WAL file is not readable", func() { 818 It("fails to load wal", func() { 819 skipIfRoot() 820 821 files, err := ioutil.ReadDir(walDir) 822 Expect(err).NotTo(HaveOccurred()) 823 for _, f := range files { 824 os.Chmod(path.Join(walDir, f.Name()), 0o300) 825 } 826 827 c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 828 Expect(c).To(BeNil()) 829 Expect(err).To(MatchError(ContainSubstring("permission denied"))) 830 }) 831 }) 832 }) 833 834 Describe("when snapshotting is enabled (snapshot interval is not zero)", func() { 835 var ( 836 ledgerLock sync.Mutex 837 ledger map[uint64]*common.Block 838 ) 839 840 countFiles := func() int { 841 files, err := ioutil.ReadDir(snapDir) 842 Expect(err).NotTo(HaveOccurred()) 843 return len(files) 844 } 845 846 BeforeEach(func() { 847 opts.SnapshotCatchUpEntries = 2 848 849 close(cutter.Block) 850 cutter.CutNext = true 851 852 ledgerLock.Lock() 853 ledger = map[uint64]*common.Block{ 854 0: getSeedBlock(), // genesis block 855 } 856 ledgerLock.Unlock() 857 858 support.WriteBlockStub = func(block *common.Block, meta []byte) { 859 b := proto.Clone(block).(*common.Block) 860 861 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 862 Expect(err).NotTo(HaveOccurred()) 863 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 864 865 ledgerLock.Lock() 866 defer ledgerLock.Unlock() 867 ledger[b.Header.Number] = b 868 } 869 870 support.HeightStub = func() uint64 { 871 ledgerLock.Lock() 872 defer ledgerLock.Unlock() 873 return uint64(len(ledger)) 874 } 875 }) 876 877 Context("Small SnapshotInterval", func() { 878 BeforeEach(func() { 879 opts.SnapshotIntervalSize = 1 880 }) 881 882 It("writes snapshot file to snapDir", func() { 883 // Scenario: start a chain with SnapInterval = 1 byte, expect it to take 884 // one snapshot for each block 885 886 i, _ := opts.MemoryStorage.FirstIndex() 887 888 Expect(chain.Order(env, uint64(0))).To(Succeed()) 889 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 890 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 891 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 892 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call 893 s, _ := opts.MemoryStorage.Snapshot() 894 b := protoutil.UnmarshalBlockOrPanic(s.Data) 895 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number))) 896 897 i, _ = opts.MemoryStorage.FirstIndex() 898 899 Expect(chain.Order(env, uint64(0))).To(Succeed()) 900 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 901 902 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 903 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 904 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call 905 s, _ = opts.MemoryStorage.Snapshot() 906 b = protoutil.UnmarshalBlockOrPanic(s.Data) 907 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number))) 908 }) 909 910 It("pauses chain if sync is in progress", func() { 911 // Scenario: 912 // after a snapshot is taken, reboot chain with raftIndex = 0 913 // chain should attempt to sync upon reboot, and blocks on 914 // `WaitReady` API 915 916 i, _ := opts.MemoryStorage.FirstIndex() 917 918 Expect(chain.Order(env, uint64(0))).To(Succeed()) 919 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 920 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 921 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 922 923 i, _ = opts.MemoryStorage.FirstIndex() 924 925 Expect(chain.Order(env, uint64(0))).To(Succeed()) 926 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 927 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 928 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 929 930 chain.Halt() 931 932 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 933 c.init() 934 935 signal := make(chan struct{}) 936 937 c.puller.PullBlockStub = func(i uint64) *common.Block { 938 <-signal // blocking for assertions 939 ledgerLock.Lock() 940 defer ledgerLock.Unlock() 941 if i >= uint64(len(ledger)) { 942 return nil 943 } 944 945 // This is a false assumption - single node shouldn't be able to pull block from anywhere. 946 // However, this test is mainly to assert that chain should attempt catchup upon start, 947 // so we could live with it. 948 return ledger[i] 949 } 950 951 err := c.WaitReady() 952 Expect(err).To(MatchError("chain is not started")) 953 954 c.Start() 955 defer c.Halt() 956 957 // pull block is called, so chain should be catching up now, WaitReady should block 958 signal <- struct{}{} 959 960 done := make(chan error) 961 go func() { 962 done <- c.WaitReady() 963 }() 964 965 Consistently(done).ShouldNot(Receive()) 966 close(signal) // unblock block puller 967 Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked 968 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 969 }) 970 971 It("commits block from snapshot if it's missing from ledger", func() { 972 // Scenario: 973 // Single node exists right after a snapshot is taken, while the block 974 // in it hasn't been successfully persisted into ledger (there can be one 975 // async block write in-flight). Then the node is restarted, and catches 976 // up using the block in snapshot. 977 978 Expect(chain.Order(env, uint64(0))).To(Succeed()) 979 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 980 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 981 982 chain.Halt() 983 984 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 985 c.init() 986 c.Start() 987 defer c.Halt() 988 989 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 990 }) 991 992 It("restores snapshot w/o extra entries", func() { 993 // Scenario: 994 // after a snapshot is taken, no more entries are appended. 995 // then node is restarted, it loads snapshot, finds its term 996 // and index. While replaying WAL to memory storage, it should 997 // not append any entry because no extra entry was appended 998 // after snapshot was taken. 999 1000 Expect(chain.Order(env, uint64(0))).To(Succeed()) 1001 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1002 _, metadata := support.WriteBlockArgsForCall(0) 1003 m := &raftprotos.BlockMetadata{} 1004 proto.Unmarshal(metadata, m) 1005 1006 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1007 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 1008 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1009 Expect(err).NotTo(HaveOccurred()) 1010 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1011 Expect(err).NotTo(HaveOccurred()) 1012 1013 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1014 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1015 1016 chain.Halt() 1017 1018 raftMetadata.RaftIndex = m.RaftIndex 1019 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 1020 c.opts.SnapshotIntervalSize = 1 1021 1022 c.init() 1023 c.Start() 1024 1025 // following arithmetic reflects how etcdraft MemoryStorage is implemented 1026 // when no entry is appended after snapshot being loaded. 1027 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1028 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index)) 1029 1030 // chain keeps functioning 1031 Eventually(func() <-chan raft.SoftState { 1032 c.clock.Increment(interval) 1033 return c.observe 1034 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1035 1036 c.cutter.CutNext = true 1037 err = c.Order(env, uint64(0)) 1038 Expect(err).NotTo(HaveOccurred()) 1039 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1040 1041 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1042 c.Halt() 1043 1044 _, metadata = c.support.WriteBlockArgsForCall(0) 1045 m = &raftprotos.BlockMetadata{} 1046 proto.Unmarshal(metadata, m) 1047 raftMetadata.RaftIndex = m.RaftIndex 1048 cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 1049 1050 cx.init() 1051 cx.Start() 1052 defer cx.Halt() 1053 1054 // chain keeps functioning 1055 Eventually(func() <-chan raft.SoftState { 1056 cx.clock.Increment(interval) 1057 return cx.observe 1058 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1059 }) 1060 }) 1061 1062 Context("Large SnapshotInterval", func() { 1063 BeforeEach(func() { 1064 opts.SnapshotIntervalSize = 1024 1065 }) 1066 1067 It("restores snapshot w/ extra entries", func() { 1068 // Scenario: 1069 // after a snapshot is taken, more entries are appended. 1070 // then node is restarted, it loads snapshot, finds its term 1071 // and index. While replaying WAL to memory storage, it should 1072 // append some entries. 1073 1074 largeEnv := &common.Envelope{ 1075 Payload: marshalOrPanic(&common.Payload{ 1076 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1077 Data: make([]byte, 500), 1078 }), 1079 } 1080 1081 By("Ordering two large envelopes to trigger snapshot") 1082 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1083 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1084 1085 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1086 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1087 1088 _, metadata := support.WriteBlockArgsForCall(1) 1089 m := &raftprotos.BlockMetadata{} 1090 proto.Unmarshal(metadata, m) 1091 1092 // check snapshot does exit 1093 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1094 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 1095 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1096 Expect(err).NotTo(HaveOccurred()) 1097 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1098 Expect(err).NotTo(HaveOccurred()) 1099 1100 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1101 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1102 1103 By("Ordering another envlope to append new data to memory after snaphost") 1104 Expect(chain.Order(env, uint64(0))).To(Succeed()) 1105 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1106 1107 lasti, _ := opts.MemoryStorage.LastIndex() 1108 1109 chain.Halt() 1110 1111 raftMetadata.RaftIndex = m.RaftIndex 1112 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 1113 cnt := support.WriteBlockCallCount() 1114 for i := 0; i < cnt; i++ { 1115 c.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1116 } 1117 1118 By("Restarting the node") 1119 c.init() 1120 c.Start() 1121 defer c.Halt() 1122 1123 By("Checking latest index is larger than index in snapshot") 1124 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1125 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti)) 1126 }) 1127 1128 When("local ledger is in sync with snapshot", func() { 1129 It("does not pull blocks and still respects snapshot interval", func() { 1130 // Scenario: 1131 // - snapshot is taken at block 2 1132 // - order one more envelope (block 3) 1133 // - reboot chain at block 2 1134 // - block 3 should be replayed from wal 1135 // - order another envelope to trigger snapshot, containing block 3 & 4 1136 // Assertions: 1137 // - block puller should NOT be called 1138 // - chain should keep functioning after reboot 1139 // - chain should respect snapshot interval to trigger next snapshot 1140 1141 largeEnv := &common.Envelope{ 1142 Payload: marshalOrPanic(&common.Payload{ 1143 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1144 Data: make([]byte, 500), 1145 }), 1146 } 1147 1148 By("Ordering two large envelopes to trigger snapshot") 1149 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1150 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1151 1152 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1153 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1154 1155 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1156 1157 _, metadata := support.WriteBlockArgsForCall(1) 1158 m := &raftprotos.BlockMetadata{} 1159 proto.Unmarshal(metadata, m) 1160 1161 By("Cutting block [3]") 1162 // order another envelope. this should not trigger snapshot 1163 err = chain.Order(largeEnv, uint64(0)) 1164 Expect(err).NotTo(HaveOccurred()) 1165 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1166 1167 chain.Halt() 1168 1169 raftMetadata.RaftIndex = m.RaftIndex 1170 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 1171 // replay block 1&2 1172 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 1173 c.support.WriteBlock(support.WriteBlockArgsForCall(1)) 1174 1175 c.opts.SnapshotIntervalSize = 1024 1176 1177 By("Restarting node at block [2]") 1178 c.init() 1179 c.Start() 1180 defer c.Halt() 1181 1182 // elect leader 1183 campaign(c.Chain, c.observe) 1184 1185 By("Ordering one more block to trigger snapshot") 1186 c.cutter.CutNext = true 1187 err = c.Order(largeEnv, uint64(0)) 1188 Expect(err).NotTo(HaveOccurred()) 1189 1190 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4)) 1191 Expect(c.puller.PullBlockCallCount()).Should(BeZero()) 1192 // old snapshot file is retained 1193 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1194 }) 1195 }) 1196 1197 It("respects snapshot interval after reboot", func() { 1198 largeEnv := &common.Envelope{ 1199 Payload: marshalOrPanic(&common.Payload{ 1200 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1201 Data: make([]byte, 500), 1202 }), 1203 } 1204 1205 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1206 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1207 // check no snapshot is taken 1208 Consistently(countFiles).Should(Equal(0)) 1209 1210 _, metadata := support.WriteBlockArgsForCall(0) 1211 m := &raftprotos.BlockMetadata{} 1212 proto.Unmarshal(metadata, m) 1213 1214 chain.Halt() 1215 1216 raftMetadata.RaftIndex = m.RaftIndex 1217 c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil) 1218 cnt := support.WriteBlockCallCount() 1219 for i := 0; i < cnt; i++ { 1220 c1.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1221 } 1222 c1.cutter.CutNext = true 1223 c1.opts.SnapshotIntervalSize = 1024 1224 1225 By("Restarting chain") 1226 c1.init() 1227 c1.Start() 1228 // chain keeps functioning 1229 campaign(c1.Chain, c1.observe) 1230 1231 Expect(c1.Order(largeEnv, uint64(0))).To(Succeed()) 1232 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1233 // check snapshot does exit 1234 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1235 }) 1236 }) 1237 }) 1238 }) 1239 1240 Context("Invalid WAL dir", func() { 1241 support := &consensusmocks.FakeConsenterSupport{} 1242 BeforeEach(func() { 1243 // for block creator initialization 1244 support.HeightReturns(1) 1245 support.BlockReturns(getSeedBlock()) 1246 }) 1247 1248 When("WAL dir is a file", func() { 1249 It("replaces file with fresh WAL dir", func() { 1250 f, err := ioutil.TempFile("", "wal-") 1251 Expect(err).NotTo(HaveOccurred()) 1252 defer os.RemoveAll(f.Name()) 1253 1254 chain, err := etcdraft.NewChain( 1255 support, 1256 etcdraft.Options{ 1257 WALDir: f.Name(), 1258 SnapDir: snapDir, 1259 Logger: logger, 1260 MemoryStorage: storage, 1261 BlockMetadata: &raftprotos.BlockMetadata{}, 1262 Metrics: newFakeMetrics(newFakeMetricsFields()), 1263 }, 1264 configurator, 1265 nil, 1266 cryptoProvider, 1267 nil, 1268 nil, 1269 observeC) 1270 Expect(chain).NotTo(BeNil()) 1271 Expect(err).NotTo(HaveOccurred()) 1272 1273 info, err := os.Stat(f.Name()) 1274 Expect(err).NotTo(HaveOccurred()) 1275 Expect(info.IsDir()).To(BeTrue()) 1276 }) 1277 }) 1278 1279 When("WAL dir is not writeable", func() { 1280 It("replace it with fresh WAL dir", func() { 1281 d, err := ioutil.TempDir("", "wal-") 1282 Expect(err).NotTo(HaveOccurred()) 1283 defer os.RemoveAll(d) 1284 1285 err = os.Chmod(d, 0o500) 1286 Expect(err).NotTo(HaveOccurred()) 1287 1288 chain, err := etcdraft.NewChain( 1289 support, 1290 etcdraft.Options{ 1291 WALDir: d, 1292 SnapDir: snapDir, 1293 Logger: logger, 1294 MemoryStorage: storage, 1295 BlockMetadata: &raftprotos.BlockMetadata{}, 1296 Metrics: newFakeMetrics(newFakeMetricsFields()), 1297 }, 1298 nil, 1299 nil, 1300 cryptoProvider, 1301 noOpBlockPuller, 1302 nil, 1303 nil) 1304 Expect(chain).NotTo(BeNil()) 1305 Expect(err).NotTo(HaveOccurred()) 1306 }) 1307 }) 1308 1309 When("WAL parent dir is not writeable", func() { 1310 It("fails to bootstrap fresh raft node", func() { 1311 skipIfRoot() 1312 1313 d, err := ioutil.TempDir("", "wal-") 1314 Expect(err).NotTo(HaveOccurred()) 1315 defer os.RemoveAll(d) 1316 1317 err = os.Chmod(d, 0o500) 1318 Expect(err).NotTo(HaveOccurred()) 1319 1320 chain, err := etcdraft.NewChain( 1321 support, 1322 etcdraft.Options{ 1323 WALDir: path.Join(d, "wal-dir"), 1324 SnapDir: snapDir, 1325 Logger: logger, 1326 BlockMetadata: &raftprotos.BlockMetadata{}, 1327 }, 1328 nil, 1329 nil, 1330 cryptoProvider, 1331 noOpBlockPuller, 1332 nil, 1333 nil) 1334 Expect(chain).To(BeNil()) 1335 Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir"))) 1336 }) 1337 }) 1338 }) 1339 }) 1340 }) 1341 1342 Describe("2-node Raft cluster", func() { 1343 var ( 1344 network *network 1345 channelID string 1346 timeout time.Duration 1347 dataDir string 1348 c1, c2 *chain 1349 raftMetadata *raftprotos.BlockMetadata 1350 consenters map[uint64]*raftprotos.Consenter 1351 configEnv *common.Envelope 1352 cryptoProvider bccsp.BCCSP 1353 fakeHaltCallbacker *mocks.HaltCallbacker 1354 ) 1355 BeforeEach(func() { 1356 var err error 1357 1358 channelID = "multi-node-channel" 1359 timeout = 10 * time.Second 1360 1361 dataDir, err = ioutil.TempDir("", "raft-test-") 1362 Expect(err).NotTo(HaveOccurred()) 1363 1364 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1365 Expect(err).NotTo(HaveOccurred()) 1366 1367 raftMetadata = &raftprotos.BlockMetadata{ 1368 ConsenterIds: []uint64{1, 2}, 1369 NextConsenterId: 3, 1370 } 1371 1372 consenters = map[uint64]*raftprotos.Consenter{ 1373 1: { 1374 Host: "localhost", 1375 Port: 7051, 1376 ClientTlsCert: clientTLSCert(tlsCA), 1377 ServerTlsCert: serverTLSCert(tlsCA), 1378 }, 1379 2: { 1380 Host: "localhost", 1381 Port: 7051, 1382 ClientTlsCert: clientTLSCert(tlsCA), 1383 ServerTlsCert: serverTLSCert(tlsCA), 1384 }, 1385 } 1386 1387 metadata := &raftprotos.ConfigMetadata{ 1388 Options: &raftprotos.Options{ 1389 TickInterval: "500ms", 1390 ElectionTick: 10, 1391 HeartbeatTick: 1, 1392 MaxInflightBlocks: 5, 1393 SnapshotIntervalSize: 200, 1394 }, 1395 Consenters: []*raftprotos.Consenter{consenters[2]}, 1396 } 1397 value := map[string]*common.ConfigValue{ 1398 "ConsensusType": { 1399 Version: 1, 1400 Value: marshalOrPanic(&orderer.ConsensusType{ 1401 Metadata: marshalOrPanic(metadata), 1402 }), 1403 }, 1404 } 1405 // prepare config update to remove 1 1406 configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1407 1408 fakeHaltCallbacker = &mocks.HaltCallbacker{} 1409 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA, fakeHaltCallbacker.HaltCallback) 1410 c1, c2 = network.chains[1], network.chains[2] 1411 c1.cutter.CutNext = true 1412 network.init() 1413 network.start() 1414 }) 1415 1416 AfterEach(func() { 1417 network.stop() 1418 network.exec(func(c *chain) { 1419 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1420 }) 1421 1422 os.RemoveAll(dataDir) 1423 }) 1424 1425 It("can remove leader by reconfiguring cluster", func() { 1426 network.elect(1) 1427 1428 // trigger status dissemination 1429 Eventually(func() int { 1430 c1.clock.Increment(interval) 1431 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1432 }, LongEventualTimeout).Should(Equal(2)) 1433 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1434 1435 By("Configuring cluster to remove node") 1436 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1437 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1438 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 1439 1440 Eventually(func() <-chan raft.SoftState { 1441 c2.clock.Increment(interval) 1442 return c2.observe 1443 }, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader))) 1444 1445 By("Asserting the haltCallback is called when the node is removed from the replica set") 1446 Eventually(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(1)) 1447 By("Asserting the StatusReport responds correctly after eviction") 1448 Eventually( 1449 func() orderer_types.ConsensusRelation { 1450 cRel, _ := c1.StatusReport() 1451 return cRel 1452 }, 1453 ).Should(Equal(orderer_types.ConsensusRelationConfigTracker)) 1454 _, status := c1.StatusReport() 1455 Expect(status).To(Equal(orderer_types.StatusInactive)) 1456 1457 By("Asserting leader can still serve requests as single-node cluster") 1458 c2.cutter.CutNext = true 1459 Expect(c2.Order(env, 0)).To(Succeed()) 1460 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1461 }) 1462 1463 It("remove leader by reconfiguring cluster, but Halt before eviction", func() { 1464 network.elect(1) 1465 1466 // trigger status dissemination 1467 Eventually(func() int { 1468 c1.clock.Increment(interval) 1469 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1470 }, LongEventualTimeout).Should(Equal(2)) 1471 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1472 1473 By("Configuring cluster to remove node") 1474 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1475 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1476 c1.clock.WaitForNWatchersAndIncrement((ELECTION_TICK-1)*interval, 2) 1477 c1.Halt() 1478 1479 Eventually(func() <-chan raft.SoftState { 1480 c2.clock.Increment(interval) 1481 return c2.observe 1482 }, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader))) 1483 1484 By("Asserting leader can still serve requests as single-node cluster") 1485 c2.cutter.CutNext = true 1486 Expect(c2.Order(env, 0)).To(Succeed()) 1487 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1488 1489 By("Asserting the haltCallback is not called when Halt is called before eviction") 1490 c1.clock.Increment(interval) 1491 Eventually(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(0)) 1492 By("Asserting the StatusReport responds correctly if the haltCallback is not called") 1493 Eventually( 1494 func() orderer_types.Status { 1495 _, status := c1.StatusReport() 1496 return status 1497 }, 1498 ).Should(Equal(orderer_types.StatusInactive)) 1499 cRel, _ := c1.StatusReport() 1500 Expect(cRel).To(Equal(orderer_types.ConsensusRelationConsenter)) 1501 }) 1502 1503 It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() { 1504 network.elect(1) 1505 1506 step1 := c1.getStepFunc() 1507 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1508 stepMsg := &raftpb.Message{} 1509 if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil { 1510 return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err) 1511 } 1512 1513 if stepMsg.Type == raftpb.MsgTimeoutNow { 1514 return nil 1515 } 1516 1517 return step1(dest, msg) 1518 }) 1519 1520 By("Configuring cluster to remove node") 1521 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1522 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1523 c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1524 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1525 1526 c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1527 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1528 close(c1.stopped) // mark c1 stopped in network 1529 1530 network.elect(2) 1531 1532 By("Asserting leader can still serve requests as single-node cluster") 1533 c2.cutter.CutNext = true 1534 Expect(c2.Order(env, 0)).To(Succeed()) 1535 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1536 }) 1537 1538 It("can remove follower by reconfiguring cluster", func() { 1539 network.elect(2) 1540 1541 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1542 network.exec(func(c *chain) { 1543 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1544 }) 1545 1546 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1547 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1548 1549 By("Asserting leader can still serve requests as single-node cluster") 1550 c2.cutter.CutNext = true 1551 Expect(c2.Order(env, 0)).To(Succeed()) 1552 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1553 }) 1554 }) 1555 1556 Describe("3-node Raft cluster", func() { 1557 var ( 1558 network *network 1559 channelID string 1560 timeout time.Duration 1561 dataDir string 1562 c1, c2, c3 *chain 1563 raftMetadata *raftprotos.BlockMetadata 1564 consenters map[uint64]*raftprotos.Consenter 1565 cryptoProvider bccsp.BCCSP 1566 ) 1567 1568 BeforeEach(func() { 1569 var err error 1570 1571 channelID = "multi-node-channel" 1572 timeout = 10 * time.Second 1573 1574 dataDir, err = ioutil.TempDir("", "raft-test-") 1575 Expect(err).NotTo(HaveOccurred()) 1576 1577 raftMetadata = &raftprotos.BlockMetadata{ 1578 ConsenterIds: []uint64{1, 2, 3}, 1579 NextConsenterId: 4, 1580 } 1581 1582 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1583 Expect(err).NotTo(HaveOccurred()) 1584 1585 consenters = map[uint64]*raftprotos.Consenter{ 1586 1: { 1587 Host: "localhost", 1588 Port: 7051, 1589 ClientTlsCert: clientTLSCert(tlsCA), 1590 ServerTlsCert: serverTLSCert(tlsCA), 1591 }, 1592 2: { 1593 Host: "localhost", 1594 Port: 7051, 1595 ClientTlsCert: clientTLSCert(tlsCA), 1596 ServerTlsCert: serverTLSCert(tlsCA), 1597 }, 1598 3: { 1599 Host: "localhost", 1600 Port: 7051, 1601 ClientTlsCert: clientTLSCert(tlsCA), 1602 ServerTlsCert: serverTLSCert(tlsCA), 1603 }, 1604 } 1605 1606 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA, nil) 1607 c1 = network.chains[1] 1608 c2 = network.chains[2] 1609 c3 = network.chains[3] 1610 }) 1611 1612 AfterEach(func() { 1613 network.stop() 1614 network.exec(func(c *chain) { 1615 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1616 }) 1617 1618 os.RemoveAll(dataDir) 1619 }) 1620 1621 When("2/3 nodes are running", func() { 1622 It("late node can catch up", func() { 1623 network.init() 1624 network.start(1, 2) 1625 network.elect(1) 1626 1627 // trigger status dissemination 1628 Eventually(func() int { 1629 c1.clock.Increment(interval) 1630 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1631 }, LongEventualTimeout).Should(Equal(2)) 1632 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1633 1634 c1.cutter.CutNext = true 1635 err := c1.Order(env, 0) 1636 Expect(err).NotTo(HaveOccurred()) 1637 1638 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1639 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1640 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1641 1642 network.start(3) 1643 1644 c1.clock.Increment(interval) 1645 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1646 1647 network.stop() 1648 }) 1649 1650 It("late node receives snapshot from leader", func() { 1651 c1.opts.SnapshotIntervalSize = 1 1652 c1.opts.SnapshotCatchUpEntries = 1 1653 1654 c1.cutter.CutNext = true 1655 1656 var blocksLock sync.Mutex 1657 blocks := make(map[uint64]*common.Block) // storing written blocks for block puller 1658 1659 c1.support.WriteBlockStub = func(b *common.Block, meta []byte) { 1660 blocksLock.Lock() 1661 defer blocksLock.Unlock() 1662 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 1663 Expect(err).NotTo(HaveOccurred()) 1664 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 1665 blocks[b.Header.Number] = b 1666 } 1667 1668 c3.puller.PullBlockStub = func(i uint64) *common.Block { 1669 blocksLock.Lock() 1670 defer blocksLock.Unlock() 1671 b, exist := blocks[i] 1672 if !exist { 1673 return nil 1674 } 1675 1676 return b 1677 } 1678 1679 network.init() 1680 network.start(1, 2) 1681 network.elect(1) 1682 1683 err := c1.Order(env, 0) 1684 Expect(err).NotTo(HaveOccurred()) 1685 1686 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1687 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1688 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1689 1690 err = c1.Order(env, 0) 1691 Expect(err).NotTo(HaveOccurred()) 1692 1693 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1694 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1695 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1696 1697 network.start(3) 1698 1699 c1.clock.Increment(interval) 1700 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1701 1702 network.stop() 1703 }) 1704 }) 1705 1706 When("reconfiguring raft cluster", func() { 1707 const ( 1708 defaultTimeout = 5 * time.Second 1709 ) 1710 var ( 1711 options = &raftprotos.Options{ 1712 TickInterval: "500ms", 1713 ElectionTick: 10, 1714 HeartbeatTick: 1, 1715 MaxInflightBlocks: 5, 1716 SnapshotIntervalSize: 200, 1717 } 1718 updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue { 1719 return map[string]*common.ConfigValue{ 1720 "ConsensusType": { 1721 Version: 1, 1722 Value: marshalOrPanic(&orderer.ConsensusType{ 1723 Metadata: marshalOrPanic(metadata), 1724 }), 1725 }, 1726 } 1727 } 1728 addConsenterConfigValue = func() map[string]*common.ConfigValue { 1729 metadata := &raftprotos.ConfigMetadata{Options: options} 1730 for _, consenter := range consenters { 1731 metadata.Consenters = append(metadata.Consenters, consenter) 1732 } 1733 1734 newConsenter := &raftprotos.Consenter{ 1735 Host: "localhost", 1736 Port: 7050, 1737 ServerTlsCert: serverTLSCert(tlsCA), 1738 ClientTlsCert: clientTLSCert(tlsCA), 1739 } 1740 metadata.Consenters = append(metadata.Consenters, newConsenter) 1741 return updateRaftConfigValue(metadata) 1742 } 1743 removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue { 1744 metadata := &raftprotos.ConfigMetadata{Options: options} 1745 for nodeID, consenter := range consenters { 1746 if nodeID == id { 1747 continue 1748 } 1749 metadata.Consenters = append(metadata.Consenters, consenter) 1750 } 1751 return updateRaftConfigValue(metadata) 1752 } 1753 createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope { 1754 configEnv := newConfigEnv("another-channel", 1755 common.HeaderType_CONFIG, 1756 newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata))) 1757 1758 // Wrap config env in Orderer transaction 1759 return &common.Envelope{ 1760 Payload: marshalOrPanic(&common.Payload{ 1761 Header: &common.Header{ 1762 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 1763 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 1764 ChannelId: channelID, 1765 }), 1766 }, 1767 Data: marshalOrPanic(configEnv), 1768 }), 1769 } 1770 } 1771 ) 1772 1773 BeforeEach(func() { 1774 network.exec(func(c *chain) { 1775 c.opts.EvictionSuspicion = time.Millisecond * 100 1776 c.opts.LeaderCheckInterval = time.Millisecond * 100 1777 }) 1778 1779 network.init() 1780 network.start() 1781 network.elect(1) 1782 1783 By("Submitting first tx to cut the block") 1784 c1.cutter.CutNext = true 1785 err := c1.Order(env, 0) 1786 Expect(err).NotTo(HaveOccurred()) 1787 1788 c1.clock.Increment(interval) 1789 1790 network.exec( 1791 func(c *chain) { 1792 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1793 }) 1794 }) 1795 1796 AfterEach(func() { 1797 network.stop() 1798 }) 1799 1800 Context("channel creation", func() { 1801 It("succeeds with valid config metadata", func() { 1802 metadata := &raftprotos.ConfigMetadata{Options: options} 1803 for _, consenter := range consenters { 1804 metadata.Consenters = append(metadata.Consenters, consenter) 1805 } 1806 1807 Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed()) 1808 network.exec(func(c *chain) { 1809 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1810 }) 1811 }) 1812 }) 1813 1814 Context("reconfiguration", func() { 1815 It("can rotate certificate by adding and removing 1 node in one config update", func() { 1816 metadata := &raftprotos.ConfigMetadata{Options: options} 1817 for id, consenter := range consenters { 1818 if id == 2 { 1819 // remove second consenter 1820 continue 1821 } 1822 metadata.Consenters = append(metadata.Consenters, consenter) 1823 } 1824 1825 // add new consenter 1826 newConsenter := &raftprotos.Consenter{ 1827 Host: "localhost", 1828 Port: 7050, 1829 ServerTlsCert: serverTLSCert(tlsCA), 1830 ClientTlsCert: clientTLSCert(tlsCA), 1831 } 1832 metadata.Consenters = append(metadata.Consenters, newConsenter) 1833 1834 value := map[string]*common.ConfigValue{ 1835 "ConsensusType": { 1836 Version: 1, 1837 Value: marshalOrPanic(&orderer.ConsensusType{ 1838 Metadata: marshalOrPanic(metadata), 1839 }), 1840 }, 1841 } 1842 1843 By("creating new configuration with removed node and new one") 1844 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1845 c1.cutter.CutNext = true 1846 1847 By("sending config transaction") 1848 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1849 1850 network.exec(func(c *chain) { 1851 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1852 }) 1853 }) 1854 1855 It("rotates leader certificate and triggers leadership transfer", func() { 1856 metadata := &raftprotos.ConfigMetadata{Options: options} 1857 for id, consenter := range consenters { 1858 if id == 1 { 1859 // remove second consenter 1860 continue 1861 } 1862 metadata.Consenters = append(metadata.Consenters, consenter) 1863 } 1864 1865 // add new consenter 1866 newConsenter := &raftprotos.Consenter{ 1867 Host: "localhost", 1868 Port: 7050, 1869 ServerTlsCert: serverTLSCert(tlsCA), 1870 ClientTlsCert: clientTLSCert(tlsCA), 1871 } 1872 metadata.Consenters = append(metadata.Consenters, newConsenter) 1873 1874 value := map[string]*common.ConfigValue{ 1875 "ConsensusType": { 1876 Version: 1, 1877 Value: marshalOrPanic(&orderer.ConsensusType{ 1878 Metadata: marshalOrPanic(metadata), 1879 }), 1880 }, 1881 } 1882 1883 By("creating new configuration with removed node and new one") 1884 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1885 c1.cutter.CutNext = true 1886 1887 By("sending config transaction") 1888 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1889 1890 Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower())) 1891 network.exec(func(c *chain) { 1892 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1893 }) 1894 }) 1895 1896 When("Leader is disconnected after cert rotation", func() { 1897 It("still configures communication after failed leader transfer attempt", func() { 1898 metadata := &raftprotos.ConfigMetadata{Options: options} 1899 for id, consenter := range consenters { 1900 if id == 1 { 1901 // remove second consenter 1902 continue 1903 } 1904 metadata.Consenters = append(metadata.Consenters, consenter) 1905 } 1906 1907 // add new consenter 1908 newConsenter := &raftprotos.Consenter{ 1909 Host: "localhost", 1910 Port: 7050, 1911 ServerTlsCert: serverTLSCert(tlsCA), 1912 ClientTlsCert: clientTLSCert(tlsCA), 1913 } 1914 metadata.Consenters = append(metadata.Consenters, newConsenter) 1915 1916 value := map[string]*common.ConfigValue{ 1917 "ConsensusType": { 1918 Version: 1, 1919 Value: marshalOrPanic(&orderer.ConsensusType{ 1920 Metadata: marshalOrPanic(metadata), 1921 }), 1922 }, 1923 } 1924 1925 By("creating new configuration with removed node and new one") 1926 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1927 c1.cutter.CutNext = true 1928 1929 step1 := c1.getStepFunc() 1930 count := c1.rpc.SendConsensusCallCount() // record current step call count 1931 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1932 // disconnect network after 4 MsgApp are sent by c1: 1933 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 1934 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 1935 if c1.rpc.SendConsensusCallCount() == count+4 { 1936 defer network.disconnect(1) 1937 } 1938 1939 return step1(dest, msg) 1940 }) 1941 1942 network.exec(func(c *chain) { 1943 Consistently(c.clock.WatcherCount).Should(Equal(1)) 1944 }) 1945 1946 By("sending config transaction") 1947 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1948 1949 Consistently(c1.observe).ShouldNot(Receive()) 1950 network.exec(func(c *chain) { 1951 // wait for timeout timer to start 1952 c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1953 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1954 }) 1955 }) 1956 }) 1957 1958 When("Follower is disconnected while leader cert is being rotated", func() { 1959 It("still configures communication and transfer leader", func() { 1960 metadata := &raftprotos.ConfigMetadata{Options: options} 1961 for id, consenter := range consenters { 1962 if id == 1 { 1963 // remove second consenter 1964 continue 1965 } 1966 metadata.Consenters = append(metadata.Consenters, consenter) 1967 } 1968 1969 // add new consenter 1970 newConsenter := &raftprotos.Consenter{ 1971 Host: "localhost", 1972 Port: 7050, 1973 ServerTlsCert: serverTLSCert(tlsCA), 1974 ClientTlsCert: clientTLSCert(tlsCA), 1975 } 1976 metadata.Consenters = append(metadata.Consenters, newConsenter) 1977 1978 value := map[string]*common.ConfigValue{ 1979 "ConsensusType": { 1980 Version: 1, 1981 Value: marshalOrPanic(&orderer.ConsensusType{ 1982 Metadata: marshalOrPanic(metadata), 1983 }), 1984 }, 1985 } 1986 1987 cnt := c1.rpc.SendConsensusCallCount() 1988 network.disconnect(3) 1989 1990 // Trigger some heartbeats to be sent so that leader notices 1991 // failed message delivery to 3, and mark it as Paused. 1992 // This is to ensure leadership is transferred to 2. 1993 Eventually(func() int { 1994 c1.clock.Increment(interval) 1995 return c1.rpc.SendConsensusCallCount() 1996 }, LongEventualTimeout).Should(BeNumerically(">=", cnt+5)) 1997 1998 By("creating new configuration with removed node and new one") 1999 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 2000 c1.cutter.CutNext = true 2001 2002 By("sending config transaction") 2003 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 2004 2005 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower))) 2006 network.Lock() 2007 network.leader = 2 // manually set network leader 2008 network.Unlock() 2009 network.disconnect(1) 2010 2011 network.exec(func(c *chain) { 2012 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2013 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 2014 }, 1, 2) 2015 2016 network.join(3, true) 2017 Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2018 Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 2019 2020 By("Ordering normal transaction") 2021 c2.cutter.CutNext = true 2022 Expect(c3.Order(env, 0)).To(Succeed()) 2023 network.exec(func(c *chain) { 2024 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2025 }, 2, 3) 2026 }) 2027 }) 2028 2029 It("adding node to the cluster", func() { 2030 addConsenterUpdate := addConsenterConfigValue() 2031 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate)) 2032 c1.cutter.CutNext = true 2033 2034 By("sending config transaction") 2035 err := c1.Configure(configEnv, 0) 2036 Expect(err).NotTo(HaveOccurred()) 2037 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 2038 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2039 2040 network.exec(func(c *chain) { 2041 Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 2042 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 2043 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4))) 2044 }) 2045 2046 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2047 meta := &common.Metadata{Value: raftmetabytes} 2048 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2049 Expect(err).NotTo(HaveOccurred()) 2050 2051 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil) 2052 // if we join a node to existing network, it MUST already obtained blocks 2053 // till the config block that adds this node to cluster. 2054 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2055 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2056 c4.init() 2057 2058 network.addChain(c4) 2059 c4.Start() 2060 2061 // ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added 2062 // to leader's node list right away. An immediate tick does not trigger a heartbeat 2063 // being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins 2064 // the cluster successfully. 2065 Eventually(func() <-chan raft.SoftState { 2066 c1.clock.Increment(interval) 2067 return c4.observe 2068 }, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower}))) 2069 2070 Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 2071 Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 2072 2073 By("submitting new transaction to follower") 2074 c1.cutter.CutNext = true 2075 err = c4.Order(env, 0) 2076 Expect(err).NotTo(HaveOccurred()) 2077 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2078 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2079 2080 network.exec(func(c *chain) { 2081 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2)) 2082 }) 2083 }) 2084 2085 It("does not reconfigure raft cluster if it's a channel creation tx", func() { 2086 configEnv := newConfigEnv("another-channel", 2087 common.HeaderType_CONFIG, 2088 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2))) 2089 2090 // Wrap config env in Orderer transaction 2091 channelCreationEnv := &common.Envelope{ 2092 Payload: marshalOrPanic(&common.Payload{ 2093 Header: &common.Header{ 2094 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 2095 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 2096 ChannelId: channelID, 2097 }), 2098 }, 2099 Data: marshalOrPanic(configEnv), 2100 }), 2101 } 2102 2103 c1.cutter.CutNext = true 2104 2105 Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed()) 2106 network.exec(func(c *chain) { 2107 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2108 }) 2109 2110 // assert c2 is not evicted 2111 Consistently(c2.Errored).ShouldNot(BeClosed()) 2112 Expect(c2.Order(env, 0)).To(Succeed()) 2113 2114 network.exec(func(c *chain) { 2115 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2116 }) 2117 }) 2118 2119 It("stop leader and continue reconfiguration failing over to new leader", func() { 2120 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2121 // configure chain support mock to disconnect c1 right after it writes configuration block 2122 // into the ledger, this to simulate failover. 2123 // Next boostraping a new node c4 to join a cluster and creating config transaction, submitting 2124 // it to the leader. Once leader writes configuration block it fails and leadership transferred to 2125 // c2. 2126 // Test asserts that new node c4, will join the cluster and c2 will handle failover of 2127 // re-configuration. Later we connecting c1 back and making sure it capable of catching up with 2128 // new configuration and successfully rejoins replica set. 2129 2130 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2131 c1.cutter.CutNext = true 2132 2133 step1 := c1.getStepFunc() 2134 count := c1.rpc.SendConsensusCallCount() // record current step call count 2135 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2136 // disconnect network after 4 MsgApp are sent by c1: 2137 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2138 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2139 if c1.rpc.SendConsensusCallCount() == count+4 { 2140 defer network.disconnect(1) 2141 } 2142 2143 return step1(dest, msg) 2144 }) 2145 2146 By("sending config transaction") 2147 err := c1.Configure(configEnv, 0) 2148 Expect(err).NotTo(HaveOccurred()) 2149 2150 // every node has written config block to the OSN ledger 2151 network.exec( 2152 func(c *chain) { 2153 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2154 }) 2155 2156 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2157 c1.setStepFunc(step1) 2158 2159 // elect node with higher index 2160 i2, _ := c2.storage.LastIndex() // err is always nil 2161 i3, _ := c3.storage.LastIndex() 2162 candidate := uint64(2) 2163 if i3 > i2 { 2164 candidate = 3 2165 } 2166 network.chains[candidate].cutter.CutNext = true 2167 network.elect(candidate) 2168 2169 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2170 meta := &common.Metadata{Value: raftmetabytes} 2171 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2172 Expect(err).NotTo(HaveOccurred()) 2173 2174 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil) 2175 // if we join a node to existing network, it MUST already obtained blocks 2176 // till the config block that adds this node to cluster. 2177 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2178 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2179 c4.init() 2180 2181 network.addChain(c4) 2182 c4.start() 2183 Expect(c4.WaitReady()).To(Succeed()) 2184 network.join(4, true) 2185 2186 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2187 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2188 2189 By("submitting new transaction to follower") 2190 err = c4.Order(env, 0) 2191 Expect(err).NotTo(HaveOccurred()) 2192 2193 // rest nodes are alive include a newly added, hence should write 2 blocks 2194 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2195 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2196 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2197 2198 // node 1 has been stopped should not write any block 2199 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2200 2201 network.join(1, true) 2202 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2203 }) 2204 2205 It("stop cluster quorum and continue reconfiguration after the restart", func() { 2206 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2207 // configure chain support mock to stop cluster after config block is committed. 2208 // Restart the cluster and ensure it picks up updates and capable to finish reconfiguration. 2209 2210 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2211 c1.cutter.CutNext = true 2212 2213 step1 := c1.getStepFunc() 2214 count := c1.rpc.SendConsensusCallCount() // record current step call count 2215 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2216 // disconnect network after 4 MsgApp are sent by c1: 2217 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2218 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2219 if c1.rpc.SendConsensusCallCount() == count+4 { 2220 defer func() { 2221 network.disconnect(1) 2222 network.disconnect(2) 2223 network.disconnect(3) 2224 }() 2225 } 2226 2227 return step1(dest, msg) 2228 }) 2229 2230 By("sending config transaction") 2231 err := c1.Configure(configEnv, 0) 2232 Expect(err).NotTo(HaveOccurred()) 2233 2234 // every node has written config block to the OSN ledger 2235 network.exec( 2236 func(c *chain) { 2237 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2238 }) 2239 2240 // assert conf change proposals have been dropped, before proceed to reconnect network 2241 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2242 c1.setStepFunc(step1) 2243 2244 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2245 meta := &common.Metadata{Value: raftmetabytes} 2246 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2247 Expect(err).NotTo(HaveOccurred()) 2248 2249 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil) 2250 // if we join a node to existing network, it MUST already obtained blocks 2251 // till the config block that adds this node to cluster. 2252 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2253 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2254 c4.init() 2255 2256 network.addChain(c4) 2257 2258 By("reconnecting nodes back") 2259 for i := uint64(1); i < 4; i++ { 2260 network.connect(i) 2261 } 2262 2263 // elect node with higher index 2264 i2, _ := c2.storage.LastIndex() // err is always nil 2265 i3, _ := c3.storage.LastIndex() 2266 candidate := uint64(2) 2267 if i3 > i2 { 2268 candidate = 3 2269 } 2270 network.chains[candidate].cutter.CutNext = true 2271 network.elect(candidate) 2272 2273 c4.start() 2274 Expect(c4.WaitReady()).To(Succeed()) 2275 network.join(4, false) 2276 2277 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2278 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2279 2280 By("submitting new transaction to follower") 2281 err = c4.Order(env, 0) 2282 Expect(err).NotTo(HaveOccurred()) 2283 2284 // rest nodes are alive include a newly added, hence should write 2 blocks 2285 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2286 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2287 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2288 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2289 }) 2290 2291 It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() { 2292 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2293 // Prepare config update transaction which removes leader (nodeID = 1), then leader 2294 // fails right after it commits configuration block. 2295 2296 configEnv := newConfigEnv(channelID, 2297 common.HeaderType_CONFIG, 2298 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2299 2300 c1.cutter.CutNext = true 2301 2302 step1 := c1.getStepFunc() 2303 count := c1.rpc.SendConsensusCallCount() // record current step call count 2304 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2305 // disconnect network after 4 MsgApp are sent by c1: 2306 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2307 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2308 if c1.rpc.SendConsensusCallCount() == count+4 { 2309 defer network.disconnect(1) 2310 } 2311 2312 return step1(dest, msg) 2313 }) 2314 2315 By("sending config transaction") 2316 err := c1.Configure(configEnv, 0) 2317 Expect(err).NotTo(HaveOccurred()) 2318 2319 network.exec(func(c *chain) { 2320 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2321 }) 2322 2323 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2324 c1.setStepFunc(step1) 2325 2326 // elect node with higher index 2327 i2, _ := c2.storage.LastIndex() // err is always nil 2328 i3, _ := c3.storage.LastIndex() 2329 candidate := uint64(2) 2330 if i3 > i2 { 2331 candidate = 3 2332 } 2333 network.chains[candidate].cutter.CutNext = true 2334 network.elect(candidate) 2335 2336 By("submitting new transaction to follower") 2337 err = c3.Order(env, 0) 2338 Expect(err).NotTo(HaveOccurred()) 2339 2340 // rest nodes are alive include a newly added, hence should write 2 blocks 2341 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2342 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2343 }) 2344 2345 It("removes leader from replica set", func() { 2346 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2347 // Prepare config update transaction which removes leader (nodeID = 1), this to 2348 // ensure we handle re-configuration of node removal correctly and remaining two 2349 // nodes still capable to form functional quorum and Raft capable of making further progress. 2350 // Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e. 2351 // should not be able to get updates or forward transactions. 2352 2353 configEnv := newConfigEnv(channelID, 2354 common.HeaderType_CONFIG, 2355 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2356 2357 c1.cutter.CutNext = true 2358 2359 By("sending config transaction") 2360 err := c1.Configure(configEnv, 0) 2361 Expect(err).NotTo(HaveOccurred()) 2362 2363 // every node has written config block to the OSN ledger 2364 network.exec( 2365 func(c *chain) { 2366 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2367 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 2368 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2))) 2369 }) 2370 2371 // Assert c1 has exited 2372 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 2373 Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed()) 2374 close(c1.stopped) 2375 2376 var newLeader, remainingFollower *chain 2377 for newLeader == nil || remainingFollower == nil { 2378 var state raft.SoftState 2379 select { 2380 case state = <-c2.observe: 2381 case state = <-c3.observe: 2382 case <-time.After(LongEventualTimeout): 2383 Fail("Expected a new leader to present") 2384 } 2385 2386 if state.RaftState == raft.StateLeader && state.Lead != raft.None { 2387 newLeader = network.chains[state.Lead] 2388 } 2389 2390 if state.RaftState == raft.StateFollower && state.Lead != raft.None { 2391 remainingFollower = network.chains[state.Lead] 2392 } 2393 } 2394 2395 By("submitting transaction to new leader") 2396 newLeader.cutter.CutNext = true 2397 err = newLeader.Order(env, 0) 2398 Expect(err).NotTo(HaveOccurred()) 2399 2400 Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2401 Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2402 // node 1 has been stopped should not write any block 2403 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2404 2405 By("trying to submit to new node, expected to fail") 2406 c1.cutter.CutNext = true 2407 err = c1.Order(env, 0) 2408 Expect(err).To(HaveOccurred()) 2409 2410 // number of block writes should remain the same 2411 Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2)) 2412 Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2)) 2413 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2414 }) 2415 2416 It("does not deadlock if leader steps down while config block is in-flight", func() { 2417 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2418 c1.cutter.CutNext = true 2419 2420 signal := make(chan struct{}) 2421 stub := c1.support.WriteConfigBlockStub 2422 c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) { 2423 signal <- struct{}{} 2424 <-signal 2425 stub(b, meta) 2426 } 2427 2428 By("Sending config transaction") 2429 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 2430 2431 Eventually(signal, LongEventualTimeout).Should(Receive()) 2432 network.disconnect(1) 2433 2434 By("Ticking leader till it steps down") 2435 Eventually(func() raft.SoftState { 2436 c1.clock.Increment(interval) 2437 return c1.Node.Status().SoftState 2438 }, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower)) 2439 2440 close(signal) 2441 2442 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower))) 2443 2444 By("Re-electing 1 as leader") 2445 network.connect(1) 2446 network.elect(1) 2447 2448 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2449 meta := &common.Metadata{Value: raftmetabytes} 2450 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2451 Expect(err).NotTo(HaveOccurred()) 2452 2453 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil) 2454 // if we join a node to existing network, it MUST already obtained blocks 2455 // till the config block that adds this node to cluster. 2456 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2457 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2458 c4.init() 2459 2460 network.addChain(c4) 2461 c4.Start() 2462 2463 Eventually(func() <-chan raft.SoftState { 2464 c1.clock.Increment(interval) 2465 return c4.observe 2466 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower))) 2467 2468 By("Submitting tx to confirm network is still working") 2469 Expect(c1.Order(env, 0)).To(Succeed()) 2470 2471 network.exec(func(c *chain) { 2472 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2473 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2474 }) 2475 }) 2476 }) 2477 }) 2478 2479 When("3/3 nodes are running", func() { 2480 JustBeforeEach(func() { 2481 network.init() 2482 network.start() 2483 network.elect(1) 2484 }) 2485 2486 AfterEach(func() { 2487 network.stop() 2488 }) 2489 2490 It("correctly sets the cluster size and leadership metrics", func() { 2491 // the network should see only one leadership change 2492 network.exec(func(c *chain) { 2493 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1)) 2494 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1))) 2495 Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1)) 2496 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3))) 2497 }) 2498 // c1 should be the leader 2499 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2500 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2501 // c2 and c3 should continue to remain followers 2502 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2503 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2504 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2505 Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2506 }) 2507 2508 It("orders envelope on leader", func() { 2509 By("instructed to cut next block") 2510 c1.cutter.CutNext = true 2511 err := c1.Order(env, 0) 2512 Expect(err).NotTo(HaveOccurred()) 2513 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2514 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2515 2516 network.exec( 2517 func(c *chain) { 2518 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2519 }) 2520 2521 By("respect batch timeout") 2522 c1.cutter.CutNext = false 2523 2524 err = c1.Order(env, 0) 2525 Expect(err).NotTo(HaveOccurred()) 2526 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2527 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2528 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2529 2530 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2531 network.exec( 2532 func(c *chain) { 2533 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2534 }) 2535 }) 2536 2537 It("orders envelope on follower", func() { 2538 By("instructed to cut next block") 2539 c1.cutter.CutNext = true 2540 err := c2.Order(env, 0) 2541 Expect(err).NotTo(HaveOccurred()) 2542 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2543 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2544 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2545 2546 network.exec( 2547 func(c *chain) { 2548 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2549 }) 2550 2551 By("respect batch timeout") 2552 c1.cutter.CutNext = false 2553 2554 err = c2.Order(env, 0) 2555 Expect(err).NotTo(HaveOccurred()) 2556 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2557 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2558 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2559 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2560 2561 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2562 network.exec( 2563 func(c *chain) { 2564 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2565 }) 2566 }) 2567 2568 When("MaxInflightBlocks is reached", func() { 2569 BeforeEach(func() { 2570 network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 }) 2571 }) 2572 2573 It("waits for in flight blocks to be committed", func() { 2574 c1.cutter.CutNext = true 2575 // disconnect c1 to disrupt consensus 2576 network.disconnect(1) 2577 2578 Expect(c1.Order(env, 0)).To(Succeed()) 2579 2580 doneProp := make(chan struct{}) 2581 go func() { 2582 defer GinkgoRecover() 2583 Expect(c1.Order(env, 0)).To(Succeed()) 2584 close(doneProp) 2585 }() 2586 // expect second `Order` to block 2587 Consistently(doneProp).ShouldNot(BeClosed()) 2588 network.exec(func(c *chain) { 2589 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2590 }) 2591 2592 network.connect(1) 2593 c1.clock.Increment(interval) 2594 2595 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2596 network.exec(func(c *chain) { 2597 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2598 }) 2599 }) 2600 2601 It("resets block in flight when steps down from leader", func() { 2602 c1.cutter.CutNext = true 2603 c2.cutter.CutNext = true 2604 // disconnect c1 to disrupt consensus 2605 network.disconnect(1) 2606 2607 Expect(c1.Order(env, 0)).To(Succeed()) 2608 2609 doneProp := make(chan struct{}) 2610 go func() { 2611 defer GinkgoRecover() 2612 2613 Expect(c1.Order(env, 0)).To(Succeed()) 2614 close(doneProp) 2615 }() 2616 // expect second `Order` to block 2617 Consistently(doneProp).ShouldNot(BeClosed()) 2618 network.exec(func(c *chain) { 2619 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2620 }) 2621 2622 network.elect(2) 2623 Expect(c3.Order(env, 0)).To(Succeed()) 2624 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2625 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2626 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2627 2628 network.connect(1) 2629 c2.clock.Increment(interval) 2630 2631 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2632 network.exec(func(c *chain) { 2633 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2634 }) 2635 }) 2636 }) 2637 2638 When("gRPC stream to leader is stuck", func() { 2639 BeforeEach(func() { 2640 c2.opts.RPCTimeout = time.Second 2641 network.Lock() 2642 network.delayWG.Add(1) 2643 network.Unlock() 2644 }) 2645 It("correctly times out", func() { 2646 err := c2.Order(env, 0) 2647 Expect(err).To(MatchError("timed out (1s) waiting on forwarding to 1")) 2648 network.delayWG.Done() 2649 }) 2650 }) 2651 2652 When("leader is disconnected", func() { 2653 It("correctly returns a failure to the client when forwarding from a follower", func() { 2654 network.disconnect(1) 2655 2656 err := c2.Order(env, 0) 2657 Expect(err).To(MatchError("connection lost")) 2658 }) 2659 2660 It("proactively steps down to follower", func() { 2661 network.disconnect(1) 2662 2663 By("Ticking leader until it steps down") 2664 Eventually(func() <-chan raft.SoftState { 2665 c1.clock.Increment(interval) 2666 return c1.observe 2667 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower}))) 2668 2669 By("Ensuring it does not accept message due to the cluster being leaderless") 2670 err := c1.Order(env, 0) 2671 Expect(err).To(MatchError("no Raft leader")) 2672 2673 network.elect(2) 2674 2675 // c1 should have lost leadership 2676 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3)) 2677 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0))) 2678 // c2 should become the leader 2679 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2680 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2681 // c2 should continue to remain follower 2682 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2683 2684 network.join(1, true) 2685 network.exec(func(c *chain) { 2686 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3)) 2687 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1))) 2688 }) 2689 2690 err = c1.Order(env, 0) 2691 Expect(err).NotTo(HaveOccurred()) 2692 }) 2693 2694 It("does not deadlock if propose is blocked", func() { 2695 signal := make(chan struct{}) 2696 c1.cutter.CutNext = true 2697 c1.support.SequenceStub = func() uint64 { 2698 signal <- struct{}{} 2699 <-signal 2700 return 0 2701 } 2702 2703 By("Sending a normal transaction") 2704 Expect(c1.Order(env, 0)).To(Succeed()) 2705 2706 Eventually(signal).Should(Receive()) 2707 network.disconnect(1) 2708 2709 By("Ticking leader till it steps down") 2710 Eventually(func() raft.SoftState { 2711 c1.clock.Increment(interval) 2712 return c1.Node.Status().SoftState 2713 }).Should(StateEqual(0, raft.StateFollower)) 2714 2715 close(signal) 2716 2717 Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower))) 2718 c1.support.SequenceStub = nil 2719 network.exec(func(c *chain) { 2720 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2721 }) 2722 2723 By("Re-electing 1 as leader") 2724 network.connect(1) 2725 network.elect(1) 2726 2727 By("Sending another normal transaction") 2728 Expect(c1.Order(env, 0)).To(Succeed()) 2729 2730 network.exec(func(c *chain) { 2731 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2732 }) 2733 }) 2734 }) 2735 2736 When("follower is disconnected", func() { 2737 It("should return error when receiving an env", func() { 2738 network.disconnect(2) 2739 2740 errorC := c2.Errored() 2741 Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed 2742 2743 By("Ticking node 2 until it becomes pre-candidate") 2744 Eventually(func() <-chan raft.SoftState { 2745 c2.clock.Increment(interval) 2746 return c2.observe 2747 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate}))) 2748 2749 Eventually(errorC).Should(BeClosed()) 2750 err := c2.Order(env, 0) 2751 Expect(err).To(HaveOccurred()) 2752 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2753 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2754 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2755 2756 network.connect(2) 2757 c1.clock.Increment(interval) 2758 Expect(errorC).To(BeClosed()) 2759 2760 Eventually(c2.Errored).ShouldNot(BeClosed()) 2761 }) 2762 }) 2763 2764 It("leader retransmits lost messages", func() { 2765 // This tests that heartbeats will trigger leader to retransmit lost MsgApp 2766 2767 c1.cutter.CutNext = true 2768 2769 network.disconnect(1) // drop MsgApp 2770 2771 err := c1.Order(env, 0) 2772 Expect(err).NotTo(HaveOccurred()) 2773 2774 network.exec( 2775 func(c *chain) { 2776 Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0)) 2777 }) 2778 2779 network.connect(1) // reconnect leader 2780 2781 c1.clock.Increment(interval) // trigger a heartbeat 2782 network.exec( 2783 func(c *chain) { 2784 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2785 }) 2786 }) 2787 2788 It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() { 2789 // this ensures that the created blocks are not written out 2790 network.disconnect(1) 2791 2792 c1.cutter.CutNext = true 2793 for i := 0; i < 3; i++ { 2794 Expect(c1.Order(env, 0)).To(Succeed()) 2795 } 2796 2797 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 2798 2799 network.connect(1) 2800 2801 // After FAB-13722, leader would pause replication if it gets notified that message 2802 // delivery to certain node is failed, i.e. connection refused. Replication to that 2803 // follower is resumed if leader receives a MsgHeartbeatResp from it. 2804 // We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we 2805 // would also risk a slow leader stepping down due to excessive ticks. 2806 // 2807 // Instead, we can simply send artificial MsgHeartbeatResp to leader to resume. 2808 m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp} 2809 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id) 2810 m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp} 2811 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id) 2812 2813 network.exec(func(c *chain) { 2814 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2815 }) 2816 }) 2817 2818 It("new leader should wait for in-fight blocks to commit before accepting new env", func() { 2819 // Scenario: when a node is elected as new leader and there are still in-flight blocks, 2820 // it should not immediately start accepting new envelopes, instead it should wait for 2821 // those in-flight blocks to be committed, otherwise we may create uncle block which 2822 // forks and panicks chain. 2823 // 2824 // Steps: 2825 // - start raft cluster with three nodes and genesis block0 2826 // - order env1 on c1, which creates block1 2827 // - drop MsgApp from 1 to 3 2828 // - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1 2829 // - disconnect c1 and elect c2 2830 // - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create 2831 // an uncle block1 based on block0. 2832 // - c2 commits block1 2833 // - c2 accepts env2, and creates block2 2834 // - c2 commits block2 2835 c1.cutter.CutNext = true 2836 c2.cutter.CutNext = true 2837 2838 step1 := c1.getStepFunc() 2839 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2840 stepMsg := &raftpb.Message{} 2841 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2842 2843 if dest == 3 { 2844 return nil 2845 } 2846 2847 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 { 2848 return nil 2849 } 2850 2851 return step1(dest, msg) 2852 }) 2853 2854 Expect(c1.Order(env, 0)).NotTo(HaveOccurred()) 2855 2856 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2857 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2858 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2859 2860 network.disconnect(1) 2861 2862 step2 := c2.getStepFunc() 2863 c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2864 stepMsg := &raftpb.Message{} 2865 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2866 2867 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 { 2868 for _, ent := range stepMsg.Entries { 2869 if len(ent.Data) != 0 { 2870 return nil 2871 } 2872 } 2873 } 2874 return step2(dest, msg) 2875 }) 2876 2877 network.elect(2) 2878 2879 go func() { 2880 defer GinkgoRecover() 2881 Expect(c2.Order(env, 0)).NotTo(HaveOccurred()) 2882 }() 2883 2884 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2885 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2886 2887 c2.setStepFunc(step2) 2888 c2.clock.Increment(interval) 2889 2890 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2891 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2892 2893 b, _ := c2.support.WriteBlockArgsForCall(0) 2894 Expect(b.Header.Number).To(Equal(uint64(1))) 2895 b, _ = c2.support.WriteBlockArgsForCall(1) 2896 Expect(b.Header.Number).To(Equal(uint64(2))) 2897 }) 2898 2899 Context("handling config blocks", func() { 2900 var configEnv *common.Envelope 2901 BeforeEach(func() { 2902 values := map[string]*common.ConfigValue{ 2903 "BatchTimeout": { 2904 Version: 1, 2905 Value: marshalOrPanic(&orderer.BatchTimeout{ 2906 Timeout: "3ms", 2907 }), 2908 }, 2909 } 2910 configEnv = newConfigEnv(channelID, 2911 common.HeaderType_CONFIG, 2912 newConfigUpdateEnv(channelID, nil, values), 2913 ) 2914 }) 2915 2916 It("holds up block creation on leader once a config block has been created and not written out", func() { 2917 // this ensures that the created blocks are not written out 2918 network.disconnect(1) 2919 2920 c1.cutter.CutNext = true 2921 // config block 2922 err := c1.Order(configEnv, 0) 2923 Expect(err).NotTo(HaveOccurred()) 2924 2925 // to avoid data races since we are accessing these within a goroutine 2926 tempEnv := env 2927 tempC1 := c1 2928 2929 done := make(chan struct{}) 2930 2931 // normal block 2932 go func() { 2933 defer GinkgoRecover() 2934 2935 // This should be blocked if config block is not committed 2936 err := tempC1.Order(tempEnv, 0) 2937 Expect(err).NotTo(HaveOccurred()) 2938 2939 close(done) 2940 }() 2941 2942 Consistently(done).ShouldNot(BeClosed()) 2943 2944 network.connect(1) 2945 c1.clock.Increment(interval) 2946 2947 network.exec( 2948 func(c *chain) { 2949 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2950 }) 2951 2952 network.exec( 2953 func(c *chain) { 2954 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2955 }) 2956 }) 2957 2958 It("continues creating blocks on leader after a config block has been successfully written out", func() { 2959 c1.cutter.CutNext = true 2960 // config block 2961 err := c1.Configure(configEnv, 0) 2962 Expect(err).NotTo(HaveOccurred()) 2963 network.exec( 2964 func(c *chain) { 2965 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2966 }) 2967 2968 // normal block following config block 2969 err = c1.Order(env, 0) 2970 Expect(err).NotTo(HaveOccurred()) 2971 network.exec( 2972 func(c *chain) { 2973 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2974 }) 2975 }) 2976 }) 2977 2978 When("Snapshotting is enabled", func() { 2979 BeforeEach(func() { 2980 c1.opts.SnapshotIntervalSize = 1 2981 c1.opts.SnapshotCatchUpEntries = 1 2982 }) 2983 2984 It("keeps running if some entries in memory are purged", func() { 2985 // Scenario: snapshotting is enabled on node 1 and it purges memory storage 2986 // per every snapshot. Cluster should be correctly functioning. 2987 2988 i, err := c1.opts.MemoryStorage.FirstIndex() 2989 Expect(err).NotTo(HaveOccurred()) 2990 Expect(i).To(Equal(uint64(1))) 2991 2992 c1.cutter.CutNext = true 2993 2994 err = c1.Order(env, 0) 2995 Expect(err).NotTo(HaveOccurred()) 2996 2997 network.exec( 2998 func(c *chain) { 2999 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3000 }) 3001 3002 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 3003 i, err = c1.opts.MemoryStorage.FirstIndex() 3004 Expect(err).NotTo(HaveOccurred()) 3005 3006 err = c1.Order(env, 0) 3007 Expect(err).NotTo(HaveOccurred()) 3008 3009 network.exec( 3010 func(c *chain) { 3011 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 3012 }) 3013 3014 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 3015 i, err = c1.opts.MemoryStorage.FirstIndex() 3016 Expect(err).NotTo(HaveOccurred()) 3017 3018 err = c1.Order(env, 0) 3019 Expect(err).NotTo(HaveOccurred()) 3020 3021 network.exec( 3022 func(c *chain) { 3023 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 3024 }) 3025 3026 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 3027 }) 3028 3029 It("lagged node can catch up using snapshot", func() { 3030 network.disconnect(2) 3031 c1.cutter.CutNext = true 3032 3033 c2Lasti, _ := c2.opts.MemoryStorage.LastIndex() 3034 var blockCnt int 3035 // Order blocks until first index of c1 memory is greater than last index of c2, 3036 // so a snapshot will be sent to c2 when it rejoins network 3037 Eventually(func() bool { 3038 c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex() 3039 if c1Firsti > c2Lasti+1 { 3040 return true 3041 } 3042 3043 Expect(c1.Order(env, 0)).To(Succeed()) 3044 blockCnt++ 3045 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 3046 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 3047 return false 3048 }, LongEventualTimeout).Should(BeTrue()) 3049 3050 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3051 3052 network.join(2, false) 3053 3054 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 3055 indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir) 3056 Expect(indices).To(HaveLen(1)) 3057 gap := indices[0] - c2Lasti 3058 3059 // TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector, 3060 // which calls block puller, is still replying on real clock, and sometimes increment puller 3061 // call count. Therefore we are being more lenient here until suspector starts using fake clock 3062 // so we have more deterministic control over it. 3063 Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap))) 3064 3065 // chain should keeps functioning 3066 Expect(c2.Order(env, 0)).To(Succeed()) 3067 3068 network.exec( 3069 func(c *chain) { 3070 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1)) 3071 }) 3072 }) 3073 }) 3074 3075 Context("failover", func() { 3076 It("follower should step up as leader upon failover", func() { 3077 network.stop(1) 3078 network.elect(2) 3079 3080 By("order envelope on new leader") 3081 c2.cutter.CutNext = true 3082 err := c2.Order(env, 0) 3083 Expect(err).NotTo(HaveOccurred()) 3084 3085 // block should not be produced on chain 1 3086 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3087 3088 // block should be produced on chain 2 & 3 3089 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3090 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3091 3092 By("order envelope on follower") 3093 err = c3.Order(env, 0) 3094 Expect(err).NotTo(HaveOccurred()) 3095 3096 // block should not be produced on chain 1 3097 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3098 3099 // block should be produced on chain 2 & 3 3100 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 3101 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 3102 }) 3103 3104 It("follower cannot be elected if its log is not up-to-date", func() { 3105 network.disconnect(2) 3106 3107 c1.cutter.CutNext = true 3108 err := c1.Order(env, 0) 3109 Expect(err).NotTo(HaveOccurred()) 3110 3111 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3112 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3113 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3114 3115 network.disconnect(1) 3116 network.connect(2) 3117 3118 // node 2 has not caught up with other nodes 3119 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3120 c2.clock.Increment(interval) 3121 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3122 } 3123 3124 // When PreVote is enabled, node 2 would fail to collect enough 3125 // PreVote because its index is not up-to-date. Therefore, it 3126 // does not cause leader change on other nodes. 3127 Consistently(c3.observe).ShouldNot(Receive()) 3128 network.elect(3) // node 3 has newest logs among 2&3, so it can be elected 3129 }) 3130 3131 It("PreVote prevents reconnected node from disturbing network", func() { 3132 network.disconnect(2) 3133 3134 c1.cutter.CutNext = true 3135 err := c1.Order(env, 0) 3136 Expect(err).NotTo(HaveOccurred()) 3137 3138 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3139 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3140 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3141 3142 network.connect(2) 3143 3144 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3145 c2.clock.Increment(interval) 3146 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3147 } 3148 3149 Consistently(c1.observe).ShouldNot(Receive()) 3150 Consistently(c3.observe).ShouldNot(Receive()) 3151 }) 3152 3153 It("follower can catch up and then campaign with success", func() { 3154 network.disconnect(2) 3155 3156 c1.cutter.CutNext = true 3157 for i := 0; i < 10; i++ { 3158 err := c1.Order(env, 0) 3159 Expect(err).NotTo(HaveOccurred()) 3160 } 3161 3162 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3163 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3164 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3165 3166 network.join(2, false) 3167 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3168 3169 network.disconnect(1) 3170 network.elect(2) 3171 }) 3172 3173 It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() { 3174 // enqueue one transaction into 1's blockcutter to test for purging of block cutter 3175 c1.cutter.CutNext = false 3176 err := c1.Order(env, 0) 3177 Expect(err).NotTo(HaveOccurred()) 3178 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 3179 3180 // no block should be written because env is not cut into block yet 3181 c1.clock.WaitForNWatchersAndIncrement(interval, 2) 3182 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3183 3184 network.disconnect(1) 3185 network.elect(2) 3186 network.join(1, true) 3187 3188 Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped 3189 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0)) 3190 // the created block should be discarded since there is a leadership change 3191 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3192 3193 network.disconnect(2) 3194 network.elect(1) 3195 3196 err = c1.Order(env, 0) 3197 Expect(err).NotTo(HaveOccurred()) 3198 3199 // The following group of assertions is redundant - it's here for completeness. 3200 // If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing. 3201 // If the blockcucter has been reset, fast-forwarding won't do anything. 3202 // 3203 // Put differently: 3204 // 3205 // correct: 3206 // stop start fire 3207 // |--------------|---------------------------| 3208 // n*intervals timeout 3209 // (advanced in election) 3210 // 3211 // wrong: 3212 // unstop fire 3213 // |---------------------------| 3214 // timeout 3215 // 3216 // timeout-n*interval n*interval 3217 // |-----------|----------------| 3218 // ^ ^ 3219 // at this point of time it should fire 3220 // timer should not fire at this point 3221 3222 c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2) 3223 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3224 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3225 3226 c1.clock.Increment(interval) 3227 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3228 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3229 }) 3230 3231 It("stale leader should not be able to propose block because of lagged term", func() { 3232 network.disconnect(1) 3233 network.elect(2) 3234 network.connect(1) 3235 3236 c1.cutter.CutNext = true 3237 err := c1.Order(env, 0) 3238 Expect(err).NotTo(HaveOccurred()) 3239 3240 network.exec( 3241 func(c *chain) { 3242 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3243 }) 3244 }) 3245 3246 It("aborts waiting for block to be committed upon leadership lost", func() { 3247 network.disconnect(1) 3248 3249 c1.cutter.CutNext = true 3250 err := c1.Order(env, 0) 3251 Expect(err).NotTo(HaveOccurred()) 3252 3253 network.exec( 3254 func(c *chain) { 3255 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3256 }) 3257 3258 network.elect(2) 3259 network.connect(1) 3260 3261 c2.clock.Increment(interval) 3262 // this check guarantees that signal on resignC is consumed in commitBatches method. 3263 Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower}))) 3264 }) 3265 }) 3266 }) 3267 }) 3268 }) 3269 3270 func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode { 3271 var nodes []cluster.RemoteNode 3272 for i, consenter := range consenterMetadata.Consenters { 3273 // For now, skip ourselves 3274 if i == 0 { 3275 continue 3276 } 3277 serverDER, _ := pem.Decode(consenter.ServerTlsCert) 3278 clientDER, _ := pem.Decode(consenter.ClientTlsCert) 3279 node := cluster.RemoteNode{ 3280 ID: uint64(i + 1), 3281 Endpoint: "localhost:7050", 3282 ServerTLSCert: serverDER.Bytes, 3283 ClientTLSCert: clientDER.Bytes, 3284 } 3285 nodes = append(nodes, node) 3286 } 3287 return nodes 3288 } 3289 3290 func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata { 3291 md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{ 3292 TickInterval: time.Duration(interval).String(), 3293 ElectionTick: ELECTION_TICK, 3294 HeartbeatTick: HEARTBEAT_TICK, 3295 MaxInflightBlocks: 5, 3296 }} 3297 for i := 0; i < nodeCount; i++ { 3298 md.Consenters = append(md.Consenters, &raftprotos.Consenter{ 3299 Host: "localhost", 3300 Port: 7050, 3301 ServerTlsCert: serverTLSCert(tlsCA), 3302 ClientTlsCert: clientTLSCert(tlsCA), 3303 }) 3304 } 3305 return md 3306 } 3307 3308 func serverTLSCert(tlsCA tlsgen.CA) []byte { 3309 cert, err := tlsCA.NewServerCertKeyPair("localhost") 3310 if err != nil { 3311 panic(err) 3312 } 3313 return cert.Cert 3314 } 3315 3316 func clientTLSCert(tlsCA tlsgen.CA) []byte { 3317 cert, err := tlsCA.NewClientCertKeyPair() 3318 if err != nil { 3319 panic(err) 3320 } 3321 return cert.Cert 3322 } 3323 3324 // marshalOrPanic serializes a protobuf message and panics if this 3325 // operation fails 3326 func marshalOrPanic(pb proto.Message) []byte { 3327 data, err := proto.Marshal(pb) 3328 if err != nil { 3329 panic(err) 3330 } 3331 return data 3332 } 3333 3334 // helpers to facilitate tests 3335 type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error 3336 3337 type chain struct { 3338 id uint64 3339 3340 stepLock sync.Mutex 3341 step stepFunc 3342 3343 // msgBuffer serializes ingress messages for a chain 3344 // so they are delivered in the same order 3345 msgBuffer chan *msg 3346 3347 support *consensusmocks.FakeConsenterSupport 3348 cutter *mockblockcutter.Receiver 3349 configurator *mocks.FakeConfigurator 3350 rpc *mocks.FakeRPC 3351 storage *raft.MemoryStorage 3352 clock *fakeclock.FakeClock 3353 opts etcdraft.Options 3354 puller *mocks.FakeBlockPuller 3355 3356 // store written blocks to be returned by mock block puller 3357 ledgerLock sync.RWMutex 3358 ledger map[uint64]*common.Block 3359 ledgerHeight uint64 3360 lastConfigBlockNumber uint64 3361 3362 observe chan raft.SoftState 3363 unstarted chan struct{} 3364 stopped chan struct{} 3365 haltCallback func() 3366 3367 fakeFields *fakeMetricsFields 3368 3369 *etcdraft.Chain 3370 3371 cryptoProvider bccsp.BCCSP 3372 } 3373 3374 type msg struct { 3375 req *orderer.ConsensusRequest 3376 sender uint64 3377 } 3378 3379 func newChain( 3380 timeout time.Duration, 3381 channel, dataDir string, 3382 id uint64, 3383 raftMetadata *raftprotos.BlockMetadata, 3384 consenters map[uint64]*raftprotos.Consenter, 3385 cryptoProvider bccsp.BCCSP, 3386 support *consensusmocks.FakeConsenterSupport, 3387 haltCallback func(), 3388 ) *chain { 3389 rpc := &mocks.FakeRPC{} 3390 clock := fakeclock.NewFakeClock(time.Now()) 3391 storage := raft.NewMemoryStorage() 3392 3393 fakeFields := newFakeMetricsFields() 3394 3395 opts := etcdraft.Options{ 3396 RPCTimeout: timeout, 3397 RaftID: uint64(id), 3398 Clock: clock, 3399 TickInterval: interval, 3400 ElectionTick: ELECTION_TICK, 3401 HeartbeatTick: HEARTBEAT_TICK, 3402 MaxSizePerMsg: 1024 * 1024, 3403 MaxInflightBlocks: 256, 3404 BlockMetadata: raftMetadata, 3405 LeaderCheckInterval: 500 * time.Millisecond, 3406 Consenters: consenters, 3407 Logger: flogging.NewFabricLogger(zap.NewExample()), 3408 MemoryStorage: storage, 3409 WALDir: path.Join(dataDir, "wal"), 3410 SnapDir: path.Join(dataDir, "snapshot"), 3411 Metrics: newFakeMetrics(fakeFields), 3412 } 3413 3414 if support == nil { 3415 support = &consensusmocks.FakeConsenterSupport{} 3416 support.ChannelIDReturns(channel) 3417 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 3418 } 3419 cutter := mockblockcutter.NewReceiver() 3420 close(cutter.Block) 3421 support.BlockCutterReturns(cutter) 3422 3423 // upon leader change, lead is reset to 0 before set to actual 3424 // new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be 3425 // sent on this chan, so we need size to be 2 3426 observe := make(chan raft.SoftState, 2) 3427 3428 configurator := &mocks.FakeConfigurator{} 3429 puller := &mocks.FakeBlockPuller{} 3430 3431 ch := make(chan struct{}) 3432 close(ch) 3433 3434 c := &chain{ 3435 id: id, 3436 support: support, 3437 cutter: cutter, 3438 rpc: rpc, 3439 storage: storage, 3440 observe: observe, 3441 clock: clock, 3442 opts: opts, 3443 unstarted: ch, 3444 stopped: make(chan struct{}), 3445 configurator: configurator, 3446 puller: puller, 3447 ledger: map[uint64]*common.Block{ 3448 0: getSeedBlock(), // Very first block 3449 }, 3450 ledgerHeight: 1, 3451 fakeFields: fakeFields, 3452 cryptoProvider: cryptoProvider, 3453 msgBuffer: make(chan *msg, 500), 3454 haltCallback: haltCallback, 3455 } 3456 3457 // receives normal blocks and metadata and appends it into 3458 // the ledger struct to simulate write behaviour 3459 appendNormalBlockToLedger := func(b *common.Block, meta []byte) { 3460 c.ledgerLock.Lock() 3461 defer c.ledgerLock.Unlock() 3462 3463 b = proto.Clone(b).(*common.Block) 3464 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3465 Expect(err).NotTo(HaveOccurred()) 3466 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3467 3468 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3469 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3470 Value: lastConfigValue, 3471 }) 3472 3473 c.ledger[b.Header.Number] = b 3474 if c.ledgerHeight < b.Header.Number+1 { 3475 c.ledgerHeight = b.Header.Number + 1 3476 } 3477 } 3478 3479 // receives config blocks and metadata and appends it into 3480 // the ledger struct to simulate write behaviour 3481 appendConfigBlockToLedger := func(b *common.Block, meta []byte) { 3482 c.ledgerLock.Lock() 3483 defer c.ledgerLock.Unlock() 3484 3485 b = proto.Clone(b).(*common.Block) 3486 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3487 Expect(err).NotTo(HaveOccurred()) 3488 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3489 3490 c.lastConfigBlockNumber = b.Header.Number 3491 3492 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3493 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3494 Value: lastConfigValue, 3495 }) 3496 3497 c.ledger[b.Header.Number] = b 3498 if c.ledgerHeight < b.Header.Number+1 { 3499 c.ledgerHeight = b.Header.Number + 1 3500 } 3501 } 3502 3503 c.support.WriteBlockStub = appendNormalBlockToLedger 3504 c.support.WriteConfigBlockStub = appendConfigBlockToLedger 3505 3506 // returns current ledger height 3507 c.support.HeightStub = func() uint64 { 3508 c.ledgerLock.RLock() 3509 defer c.ledgerLock.RUnlock() 3510 return c.ledgerHeight 3511 } 3512 3513 // reads block from the ledger 3514 c.support.BlockStub = func(number uint64) *common.Block { 3515 c.ledgerLock.RLock() 3516 defer c.ledgerLock.RUnlock() 3517 return c.ledger[number] 3518 } 3519 3520 // consume ingress messages for chain 3521 go func() { 3522 for msg := range c.msgBuffer { 3523 c.Consensus(msg.req, msg.sender) 3524 } 3525 }() 3526 3527 return c 3528 } 3529 3530 func (c *chain) init() { 3531 ch, err := etcdraft.NewChain( 3532 c.support, 3533 c.opts, 3534 c.configurator, 3535 c.rpc, 3536 c.cryptoProvider, 3537 func() (etcdraft.BlockPuller, error) { return c.puller, nil }, 3538 c.haltCallback, 3539 c.observe, 3540 ) 3541 Expect(err).NotTo(HaveOccurred()) 3542 c.Chain = ch 3543 } 3544 3545 func (c *chain) start() { 3546 c.unstarted = nil 3547 c.Start() 3548 } 3549 3550 func (c *chain) setStepFunc(f stepFunc) { 3551 c.stepLock.Lock() 3552 c.step = f 3553 c.stepLock.Unlock() 3554 } 3555 3556 func (c *chain) getStepFunc() stepFunc { 3557 c.stepLock.Lock() 3558 defer c.stepLock.Unlock() 3559 return c.step 3560 } 3561 3562 type network struct { 3563 delayWG sync.WaitGroup 3564 sync.RWMutex 3565 3566 leader uint64 3567 chains map[uint64]*chain 3568 3569 // links simulates the configuration of comm layer (link is bi-directional). 3570 // if links[left][right] == true, right can send msg to left. 3571 links map[uint64]map[uint64]bool 3572 // connectivity determines if a node is connected to network. This is used for tests 3573 // to simulate network partition. 3574 connectivity map[uint64]bool 3575 } 3576 3577 func (n *network) link(from []uint64, to uint64) { 3578 links := make(map[uint64]bool) 3579 for _, id := range from { 3580 links[id] = true 3581 } 3582 3583 n.Lock() 3584 defer n.Unlock() 3585 3586 n.links[to] = links 3587 } 3588 3589 func (n *network) linked(from, to uint64) bool { 3590 n.RLock() 3591 defer n.RUnlock() 3592 3593 return n.links[to][from] 3594 } 3595 3596 func (n *network) connect(id uint64) { 3597 n.Lock() 3598 defer n.Unlock() 3599 3600 n.connectivity[id] = true 3601 } 3602 3603 func (n *network) disconnect(id uint64) { 3604 n.Lock() 3605 defer n.Unlock() 3606 3607 n.connectivity[id] = false 3608 } 3609 3610 func (n *network) connected(id uint64) bool { 3611 n.RLock() 3612 defer n.RUnlock() 3613 3614 return n.connectivity[id] 3615 } 3616 3617 func (n *network) addChain(c *chain) { 3618 n.connect(c.id) // chain is connected by default 3619 3620 c.step = func(dest uint64, req *orderer.ConsensusRequest) error { 3621 if !n.linked(c.id, dest) { 3622 return errors.Errorf("connection refused") 3623 } 3624 3625 if !n.connected(c.id) || !n.connected(dest) { 3626 return errors.Errorf("connection lost") 3627 } 3628 3629 n.RLock() 3630 target := n.chains[dest] 3631 n.RUnlock() 3632 target.msgBuffer <- &msg{req: req, sender: c.id} 3633 return nil 3634 } 3635 3636 c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error { 3637 c.stepLock.Lock() 3638 defer c.stepLock.Unlock() 3639 return c.step(dest, msg) 3640 } 3641 3642 c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest, f func(error)) error { 3643 if !n.linked(c.id, dest) { 3644 err := errors.Errorf("connection refused") 3645 f(err) 3646 return err 3647 } 3648 3649 if !n.connected(c.id) || !n.connected(dest) { 3650 err := errors.Errorf("connection lost") 3651 f(err) 3652 return err 3653 } 3654 3655 n.RLock() 3656 target := n.chains[dest] 3657 n.RUnlock() 3658 go func() { 3659 n.Lock() 3660 n.delayWG.Wait() 3661 n.Unlock() 3662 3663 defer GinkgoRecover() 3664 target.Submit(msg, c.id) 3665 f(nil) 3666 }() 3667 return nil 3668 } 3669 3670 c.puller.PullBlockStub = func(i uint64) *common.Block { 3671 n.RLock() 3672 leaderChain := n.chains[n.leader] 3673 n.RUnlock() 3674 3675 leaderChain.ledgerLock.RLock() 3676 defer leaderChain.ledgerLock.RUnlock() 3677 block := leaderChain.ledger[i] 3678 return block 3679 } 3680 3681 c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) { 3682 n.RLock() 3683 leader := n.chains[n.leader] 3684 n.RUnlock() 3685 3686 if leader == nil { 3687 return nil, errors.Errorf("ledger not available") 3688 } 3689 3690 leader.ledgerLock.RLock() 3691 defer leader.ledgerLock.RUnlock() 3692 return map[string]uint64{"leader": leader.ledgerHeight}, nil 3693 } 3694 3695 c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) { 3696 var ids []uint64 3697 for _, node := range nodes { 3698 ids = append(ids, node.ID) 3699 } 3700 n.link(ids, c.id) 3701 }) 3702 3703 n.Lock() 3704 defer n.Unlock() 3705 n.chains[c.id] = c 3706 } 3707 3708 func createNetwork( 3709 timeout time.Duration, 3710 channel, dataDir string, 3711 raftMetadata *raftprotos.BlockMetadata, 3712 consenters map[uint64]*raftprotos.Consenter, 3713 cryptoProvider bccsp.BCCSP, 3714 tlsCA tlsgen.CA, 3715 haltCallback func(), 3716 ) *network { 3717 n := &network{ 3718 chains: make(map[uint64]*chain), 3719 connectivity: make(map[uint64]bool), 3720 links: make(map[uint64]map[uint64]bool), 3721 } 3722 3723 for _, nodeID := range raftMetadata.ConsenterIds { 3724 dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID)) 3725 Expect(err).NotTo(HaveOccurred()) 3726 3727 m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata) 3728 support := &consensusmocks.FakeConsenterSupport{} 3729 support.ChannelIDReturns(channel) 3730 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 3731 mockOrdererConfig := mockOrdererWithTLSRootCert(timeout, nil, tlsCA) 3732 support.SharedConfigReturns(mockOrdererConfig) 3733 n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider, support, haltCallback)) 3734 } 3735 3736 return n 3737 } 3738 3739 // tests could alter configuration of a chain before creating it 3740 func (n *network) init() { 3741 n.exec(func(c *chain) { c.init() }) 3742 } 3743 3744 func (n *network) start(ids ...uint64) { 3745 nodes := ids 3746 if len(nodes) == 0 { 3747 for i := range n.chains { 3748 nodes = append(nodes, i) 3749 } 3750 } 3751 3752 for _, id := range nodes { 3753 n.chains[id].start() 3754 3755 // When the Raft node bootstraps, it produces a ConfChange 3756 // to add itself, which needs to be consumed with Ready(). 3757 // If there are pending configuration changes in raft, 3758 // it refused to campaign, no matter how many ticks supplied. 3759 // This is not a problem in production code because eventually 3760 // raft.Ready will be consumed as real time goes by. 3761 // 3762 // However, this is problematic when using fake clock and artificial 3763 // ticks. Instead of ticking raft indefinitely until raft.Ready is 3764 // consumed, this check is added to indirectly guarantee 3765 // that first ConfChange is actually consumed and we can safely 3766 // proceed to tick raft. 3767 Eventually(func() error { 3768 _, err := n.chains[id].storage.Entries(1, 1, 1) 3769 return err 3770 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 3771 Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred()) 3772 } 3773 } 3774 3775 func (n *network) stop(ids ...uint64) { 3776 nodes := ids 3777 if len(nodes) == 0 { 3778 for i := range n.chains { 3779 nodes = append(nodes, i) 3780 } 3781 } 3782 3783 for _, id := range nodes { 3784 c := n.chains[id] 3785 c.Halt() 3786 Eventually(c.Errored).Should(BeClosed()) 3787 select { 3788 case <-c.stopped: 3789 default: 3790 close(c.stopped) 3791 } 3792 } 3793 } 3794 3795 func (n *network) exec(f func(c *chain), ids ...uint64) { 3796 if len(ids) == 0 { 3797 for _, c := range n.chains { 3798 f(c) 3799 } 3800 3801 return 3802 } 3803 3804 for _, i := range ids { 3805 f(n.chains[i]) 3806 } 3807 } 3808 3809 // connect a node to network and tick leader to trigger 3810 // a heartbeat so newly joined node can detect leader. 3811 // 3812 // expectLeaderChange controls whether leader change should 3813 // be observed on newly joined node. 3814 // - it should be true if newly joined node was leader 3815 // - it should be false if newly joined node was follower, and 3816 // already knows the leader. 3817 func (n *network) join(id uint64, expectLeaderChange bool) { 3818 n.connect(id) 3819 3820 n.RLock() 3821 leader, follower := n.chains[n.leader], n.chains[id] 3822 n.RUnlock() 3823 3824 step := leader.getStepFunc() 3825 signal := make(chan struct{}) 3826 leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 3827 if dest == id { 3828 // close signal channel when a message targeting newly 3829 // joined node is observed on wire. 3830 select { 3831 case <-signal: 3832 default: 3833 close(signal) 3834 } 3835 } 3836 3837 return step(dest, msg) 3838 }) 3839 3840 // Tick leader so it sends out a heartbeat to new node. 3841 // One tick _may_ not be enough because leader might be busy 3842 // and this tick is droppped on the floor. 3843 Eventually(func() <-chan struct{} { 3844 leader.clock.Increment(interval) 3845 return signal 3846 }, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed()) 3847 3848 leader.setStepFunc(step) 3849 3850 if expectLeaderChange { 3851 Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower}))) 3852 } 3853 3854 // wait for newly joined node to catch up with leader 3855 i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex() 3856 Expect(err).NotTo(HaveOccurred()) 3857 Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i)) 3858 } 3859 3860 // elect deterministically elects a node as leader 3861 func (n *network) elect(id uint64) { 3862 n.RLock() 3863 // skip observing leader change on followers if the same leader is elected as the previous one, 3864 // because this may happen too quickly from a slow follower's point of view, and 0 -> X transition 3865 // may not be omitted at all. 3866 observeFollowers := id != n.leader 3867 candidate := n.chains[id] 3868 var followers []*chain 3869 for _, c := range n.chains { 3870 if c.id != id { 3871 followers = append(followers, c) 3872 } 3873 } 3874 n.RUnlock() 3875 3876 // Send node an artificial MsgTimeoutNow to emulate leadership transfer. 3877 fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id) 3878 candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: id})}, 0) 3879 Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader))) 3880 3881 n.Lock() 3882 n.leader = id 3883 n.Unlock() 3884 3885 if !observeFollowers { 3886 return 3887 } 3888 3889 // now observe leader change on other nodes 3890 for _, c := range followers { 3891 if c.id == id { 3892 continue 3893 } 3894 3895 select { 3896 case <-c.stopped: // skip check if node n is stopped 3897 case <-c.unstarted: // skip check if node is not started yet 3898 default: 3899 if n.linked(c.id, id) && n.connected(c.id) { 3900 Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower))) 3901 } 3902 } 3903 } 3904 } 3905 3906 // sets the configEnv var declared above 3907 func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope { 3908 return &common.Envelope{ 3909 Payload: marshalOrPanic(&common.Payload{ 3910 Header: &common.Header{ 3911 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3912 Type: int32(headerType), 3913 ChannelId: chainID, 3914 }), 3915 }, 3916 Data: marshalOrPanic(&common.ConfigEnvelope{ 3917 LastUpdate: &common.Envelope{ 3918 Payload: marshalOrPanic(&common.Payload{ 3919 Header: &common.Header{ 3920 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3921 Type: int32(common.HeaderType_CONFIG_UPDATE), 3922 ChannelId: chainID, 3923 }), 3924 }, 3925 Data: marshalOrPanic(configUpdateEnv), 3926 }), // common.Payload 3927 }, // LastUpdate 3928 }), 3929 }), 3930 } 3931 } 3932 3933 func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope { 3934 return &common.ConfigUpdateEnvelope{ 3935 ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{ 3936 ChannelId: chainID, 3937 ReadSet: &common.ConfigGroup{ 3938 Groups: map[string]*common.ConfigGroup{ 3939 "Orderer": { 3940 Values: oldValues, 3941 }, 3942 }, 3943 }, 3944 WriteSet: &common.ConfigGroup{ 3945 Groups: map[string]*common.ConfigGroup{ 3946 "Orderer": { 3947 Values: newValues, 3948 }, 3949 }, 3950 }, // WriteSet 3951 }), 3952 } 3953 } 3954 3955 func getSeedBlock() *common.Block { 3956 return &common.Block{ 3957 Header: &common.BlockHeader{}, 3958 Data: &common.BlockData{Data: [][]byte{[]byte("foo")}}, 3959 Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)}, 3960 } 3961 } 3962 3963 func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher { 3964 return Equal(raft.SoftState{Lead: lead, RaftState: state}) 3965 } 3966 3967 func BeFollower() types.GomegaMatcher { 3968 return &StateMatcher{expect: raft.StateFollower} 3969 } 3970 3971 type StateMatcher struct { 3972 expect raft.StateType 3973 } 3974 3975 func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) { 3976 state, ok := actual.(raft.SoftState) 3977 if !ok { 3978 return false, errors.Errorf("StateMatcher expects a raft SoftState") 3979 } 3980 3981 return state.RaftState == stmatcher.expect, nil 3982 } 3983 3984 func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) { 3985 state, ok := actual.(raft.SoftState) 3986 if !ok { 3987 return "StateMatcher expects a raft SoftState" 3988 } 3989 3990 return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect) 3991 } 3992 3993 func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) { 3994 state, ok := actual.(raft.SoftState) 3995 if !ok { 3996 return "StateMatcher expects a raft SoftState" 3997 } 3998 3999 return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect) 4000 } 4001 4002 func noOpBlockPuller() (etcdraft.BlockPuller, error) { 4003 bp := &mocks.FakeBlockPuller{} 4004 return bp, nil 4005 }