github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/consensus/etcdraft/chain_test.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft_test 8 9 import ( 10 "encoding/pem" 11 "fmt" 12 "io/ioutil" 13 "os" 14 "os/user" 15 "path" 16 "sync" 17 "time" 18 19 "code.cloudfoundry.org/clock/fakeclock" 20 "github.com/golang/protobuf/proto" 21 "github.com/hyperledger/fabric-protos-go/common" 22 "github.com/hyperledger/fabric-protos-go/orderer" 23 raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 24 "github.com/osdi23p228/fabric/bccsp" 25 "github.com/osdi23p228/fabric/bccsp/factory" 26 "github.com/osdi23p228/fabric/bccsp/sw" 27 "github.com/osdi23p228/fabric/common/channelconfig" 28 "github.com/osdi23p228/fabric/common/crypto/tlsgen" 29 "github.com/osdi23p228/fabric/common/flogging" 30 "github.com/osdi23p228/fabric/orderer/common/cluster" 31 orderer_types "github.com/osdi23p228/fabric/orderer/common/types" 32 "github.com/osdi23p228/fabric/orderer/consensus/etcdraft" 33 "github.com/osdi23p228/fabric/orderer/consensus/etcdraft/mocks" 34 consensusmocks "github.com/osdi23p228/fabric/orderer/consensus/mocks" 35 mockblockcutter "github.com/osdi23p228/fabric/orderer/mocks/common/blockcutter" 36 "github.com/osdi23p228/fabric/protoutil" 37 . "github.com/onsi/ginkgo" 38 . "github.com/onsi/gomega" 39 "github.com/onsi/gomega/types" 40 "github.com/pkg/errors" 41 "go.etcd.io/etcd/raft" 42 "go.etcd.io/etcd/raft/raftpb" 43 "go.uber.org/zap" 44 ) 45 46 const ( 47 interval = 100 * time.Millisecond 48 LongEventualTimeout = 10 * time.Second 49 50 // 10 is the default setting of ELECTION_TICK. 51 // We used to have a small number here (2) to reduce the time for test - we don't 52 // need to tick node 10 times to trigger election - however, we are using another 53 // mechanism to trigger it now which does not depend on time: send an artificial 54 // MsgTimeoutNow to node. 55 ELECTION_TICK = 10 56 HEARTBEAT_TICK = 1 57 ) 58 59 func init() { 60 factory.InitFactories(nil) 61 } 62 63 func mockOrderer(metadata []byte) *mocks.OrdererConfig { 64 return mockOrdererWithBatchTimeout(time.Second, metadata) 65 } 66 67 func mockOrdererWithBatchTimeout(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig { 68 mockOrderer := &mocks.OrdererConfig{} 69 mockOrderer.BatchTimeoutReturns(batchTimeout) 70 mockOrderer.ConsensusMetadataReturns(metadata) 71 return mockOrderer 72 } 73 74 func mockOrdererWithTLSRootCert(batchTimeout time.Duration, metadata []byte, tlsCA tlsgen.CA) *mocks.OrdererConfig { 75 mockOrderer := mockOrdererWithBatchTimeout(batchTimeout, metadata) 76 mockOrg := &mocks.OrdererOrg{} 77 mockMSP := &mocks.MSP{} 78 mockMSP.GetTLSRootCertsReturns([][]byte{tlsCA.CertBytes()}) 79 mockOrg.MSPReturns(mockMSP) 80 mockOrderer.OrganizationsReturns(map[string]channelconfig.OrdererOrg{ 81 "fake-org": mockOrg, 82 }) 83 return mockOrderer 84 } 85 86 // for some test cases we chmod file/dir to test failures caused by exotic permissions. 87 // however this does not work if tests are running as root, i.e. in a container. 88 func skipIfRoot() { 89 u, err := user.Current() 90 Expect(err).NotTo(HaveOccurred()) 91 if u.Uid == "0" { 92 Skip("you are running test as root, there's no way to make files unreadable") 93 } 94 } 95 96 var _ = Describe("Chain", func() { 97 var ( 98 env *common.Envelope 99 channelID string 100 tlsCA tlsgen.CA 101 logger *flogging.FabricLogger 102 ) 103 104 BeforeEach(func() { 105 tlsCA, _ = tlsgen.NewCA() 106 channelID = "test-channel" 107 logger = flogging.NewFabricLogger(zap.NewExample()) 108 env = &common.Envelope{ 109 Payload: marshalOrPanic(&common.Payload{ 110 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 111 Data: []byte("TEST_MESSAGE"), 112 }), 113 } 114 }) 115 116 Describe("Single Raft node", func() { 117 var ( 118 configurator *mocks.FakeConfigurator 119 consenterMetadata *raftprotos.ConfigMetadata 120 consenters map[uint64]*raftprotos.Consenter 121 clock *fakeclock.FakeClock 122 opts etcdraft.Options 123 support *consensusmocks.FakeConsenterSupport 124 cutter *mockblockcutter.Receiver 125 storage *raft.MemoryStorage 126 observeC chan raft.SoftState 127 chain *etcdraft.Chain 128 dataDir string 129 walDir string 130 snapDir string 131 err error 132 fakeFields *fakeMetricsFields 133 cryptoProvider bccsp.BCCSP 134 ) 135 136 BeforeEach(func() { 137 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 138 Expect(err).NotTo(HaveOccurred()) 139 140 configurator = &mocks.FakeConfigurator{} 141 clock = fakeclock.NewFakeClock(time.Now()) 142 storage = raft.NewMemoryStorage() 143 144 dataDir, err = ioutil.TempDir("", "wal-") 145 Expect(err).NotTo(HaveOccurred()) 146 walDir = path.Join(dataDir, "wal") 147 snapDir = path.Join(dataDir, "snapshot") 148 149 observeC = make(chan raft.SoftState, 1) 150 151 support = &consensusmocks.FakeConsenterSupport{} 152 support.ChannelIDReturns(channelID) 153 consenterMetadata = createMetadata(1, tlsCA) 154 support.SharedConfigReturns(mockOrdererWithTLSRootCert(time.Hour, marshalOrPanic(consenterMetadata), tlsCA)) 155 156 cutter = mockblockcutter.NewReceiver() 157 support.BlockCutterReturns(cutter) 158 159 // for block creator initialization 160 support.HeightReturns(1) 161 support.BlockReturns(getSeedBlock()) 162 163 meta := &raftprotos.BlockMetadata{ 164 ConsenterIds: make([]uint64, len(consenterMetadata.Consenters)), 165 NextConsenterId: 1, 166 } 167 168 for i := range meta.ConsenterIds { 169 meta.ConsenterIds[i] = meta.NextConsenterId 170 meta.NextConsenterId++ 171 } 172 173 consenters = map[uint64]*raftprotos.Consenter{} 174 for i, c := range consenterMetadata.Consenters { 175 consenters[meta.ConsenterIds[i]] = c 176 } 177 178 fakeFields = newFakeMetricsFields() 179 180 opts = etcdraft.Options{ 181 RaftID: 1, 182 Clock: clock, 183 TickInterval: interval, 184 ElectionTick: ELECTION_TICK, 185 HeartbeatTick: HEARTBEAT_TICK, 186 MaxSizePerMsg: 1024 * 1024, 187 MaxInflightBlocks: 256, 188 BlockMetadata: meta, 189 Consenters: consenters, 190 Logger: logger, 191 MemoryStorage: storage, 192 WALDir: walDir, 193 SnapDir: snapDir, 194 Metrics: newFakeMetrics(fakeFields), 195 } 196 }) 197 198 campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) { 199 Eventually(func() <-chan raft.SoftState { 200 c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: 1})}, 0) 201 return observeC 202 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 203 } 204 205 JustBeforeEach(func() { 206 chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 207 Expect(err).NotTo(HaveOccurred()) 208 209 chain.Start() 210 cRel, status := chain.StatusReport() 211 Expect(cRel).To(Equal(orderer_types.ClusterRelationMember)) 212 Expect(status).To(Equal(orderer_types.StatusActive)) 213 214 // When the Raft node bootstraps, it produces a ConfChange 215 // to add itself, which needs to be consumed with Ready(). 216 // If there are pending configuration changes in raft, 217 // it refuses to campaign, no matter how many ticks elapse. 218 // This is not a problem in the production code because raft.Ready 219 // will be consumed eventually, as the wall clock advances. 220 // 221 // However, this is problematic when using the fake clock and 222 // artificial ticks. Instead of ticking raft indefinitely until 223 // raft.Ready is consumed, this check is added to indirectly guarantee 224 // that the first ConfChange is actually consumed and we can safely 225 // proceed to tick the Raft FSM. 226 Eventually(func() error { 227 _, err := storage.Entries(1, 1, 1) 228 return err 229 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 230 }) 231 232 AfterEach(func() { 233 chain.Halt() 234 Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed()) 235 // Make sure no timer leak 236 Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 237 os.RemoveAll(dataDir) 238 }) 239 240 Context("when a node starts up", func() { 241 It("properly configures the communication layer", func() { 242 expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata) 243 Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1)) 244 _, arg2 := configurator.ConfigureArgsForCall(0) 245 Expect(arg2).To(Equal(expectedNodeConfig)) 246 }) 247 248 It("correctly sets the metrics labels and publishes requisite metrics", func() { 249 type withImplementers interface { 250 WithCallCount() int 251 WithArgsForCall(int) []string 252 } 253 metricsList := []withImplementers{ 254 fakeFields.fakeClusterSize, 255 fakeFields.fakeIsLeader, 256 fakeFields.fakeActiveNodes, 257 fakeFields.fakeCommittedBlockNumber, 258 fakeFields.fakeSnapshotBlockNumber, 259 fakeFields.fakeLeaderChanges, 260 fakeFields.fakeProposalFailures, 261 fakeFields.fakeDataPersistDuration, 262 fakeFields.fakeNormalProposalsReceived, 263 fakeFields.fakeConfigProposalsReceived, 264 } 265 for _, m := range metricsList { 266 Expect(m.WithCallCount()).To(Equal(1)) 267 Expect(func() string { 268 return m.WithArgsForCall(0)[1] 269 }()).To(Equal(channelID)) 270 } 271 272 Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1)) 273 Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1))) 274 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1)) 275 Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0))) 276 Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1)) 277 Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0))) 278 }) 279 }) 280 281 Context("when no Raft leader is elected", func() { 282 It("fails to order envelope", func() { 283 err := chain.Order(env, 0) 284 Expect(err).To(MatchError("no Raft leader")) 285 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 286 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 287 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0)) 288 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 289 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 290 }) 291 292 It("starts proactive campaign", func() { 293 // assert that even tick supplied are less than ELECTION_TIMEOUT, 294 // a leader can still be successfully elected. 295 for i := 0; i < ELECTION_TICK; i++ { 296 clock.Increment(interval) 297 time.Sleep(10 * time.Millisecond) 298 } 299 Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 300 }) 301 }) 302 303 Context("when Raft leader is elected", func() { 304 JustBeforeEach(func() { 305 campaign(chain, observeC) 306 }) 307 308 It("updates metrics upon leader election", func() { 309 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2)) 310 Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1))) 311 Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1)) 312 Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1))) 313 }) 314 315 It("fails to order envelope if chain is halted", func() { 316 chain.Halt() 317 err := chain.Order(env, 0) 318 Expect(err).To(MatchError("chain is stopped")) 319 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 320 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 321 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 322 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 323 }) 324 325 It("produces blocks following batch rules", func() { 326 close(cutter.Block) 327 328 By("cutting next batch directly") 329 cutter.CutNext = true 330 err := chain.Order(env, 0) 331 Expect(err).NotTo(HaveOccurred()) 332 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 333 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 334 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 335 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 336 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 337 338 // There are three calls to DataPersistDuration by now corresponding to the following three 339 // arriving on the Ready channel: 340 // 1. an EntryConfChange to let this node join the Raft cluster 341 // 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader 342 // 3. a block being committed 343 // The duration being emitted is zero since we don't tick the fake clock during this time 344 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3)) 345 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0))) 346 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0))) 347 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0))) 348 349 By("respecting batch timeout") 350 cutter.CutNext = false 351 timeout := time.Second 352 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 353 err = chain.Order(env, 0) 354 Expect(err).NotTo(HaveOccurred()) 355 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 356 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 357 358 clock.WaitForNWatchersAndIncrement(timeout, 2) 359 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 360 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 361 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 362 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4)) 363 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0))) 364 }) 365 366 It("does not reset timer for every envelope", func() { 367 close(cutter.Block) 368 369 timeout := time.Second 370 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 371 372 err := chain.Order(env, 0) 373 Expect(err).NotTo(HaveOccurred()) 374 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 375 376 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 377 378 err = chain.Order(env, 0) 379 Expect(err).NotTo(HaveOccurred()) 380 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2)) 381 382 // the second envelope should not reset the timer; it should 383 // therefore expire if we increment it by just timeout/2 384 clock.Increment(timeout / 2) 385 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 386 }) 387 388 It("does not write a block if halted before timeout", func() { 389 close(cutter.Block) 390 timeout := time.Second 391 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 392 393 err := chain.Order(env, 0) 394 Expect(err).NotTo(HaveOccurred()) 395 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 396 397 // wait for timer to start 398 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 399 400 chain.Halt() 401 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 402 }) 403 404 It("stops the timer if a batch is cut", func() { 405 close(cutter.Block) 406 407 timeout := time.Second 408 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 409 410 err := chain.Order(env, 0) 411 Expect(err).NotTo(HaveOccurred()) 412 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 413 414 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 415 416 By("force a batch to be cut before timer expires") 417 cutter.CutNext = true 418 err = chain.Order(env, 0) 419 Expect(err).NotTo(HaveOccurred()) 420 421 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 422 b, _ := support.WriteBlockArgsForCall(0) 423 Expect(b.Data.Data).To(HaveLen(2)) 424 Expect(cutter.CurBatch()).To(HaveLen(0)) 425 426 // this should start a fresh timer 427 cutter.CutNext = false 428 err = chain.Order(env, 0) 429 Expect(err).NotTo(HaveOccurred()) 430 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 431 432 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 433 Consistently(support.WriteBlockCallCount).Should(Equal(1)) 434 435 clock.Increment(timeout / 2) 436 437 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 438 b, _ = support.WriteBlockArgsForCall(1) 439 Expect(b.Data.Data).To(HaveLen(1)) 440 }) 441 442 It("cut two batches if incoming envelope does not fit into first batch", func() { 443 close(cutter.Block) 444 445 timeout := time.Second 446 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 447 448 err := chain.Order(env, 0) 449 Expect(err).NotTo(HaveOccurred()) 450 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 451 452 cutter.IsolatedTx = true 453 err = chain.Order(env, 0) 454 Expect(err).NotTo(HaveOccurred()) 455 456 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 457 }) 458 459 Context("revalidation", func() { 460 BeforeEach(func() { 461 close(cutter.Block) 462 463 timeout := time.Hour 464 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 465 support.SequenceReturns(1) 466 }) 467 468 It("enqueue if envelope is still valid", func() { 469 support.ProcessNormalMsgReturns(1, nil) 470 471 err := chain.Order(env, 0) 472 Expect(err).NotTo(HaveOccurred()) 473 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 474 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 475 }) 476 477 It("does not enqueue if envelope is not valid", func() { 478 support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid")) 479 480 err := chain.Order(env, 0) 481 Expect(err).NotTo(HaveOccurred()) 482 Consistently(cutter.CurBatch).Should(HaveLen(0)) 483 Consistently(clock.WatcherCount).Should(Equal(1)) 484 }) 485 }) 486 487 It("unblocks Errored if chain is halted", func() { 488 errorC := chain.Errored() 489 Expect(errorC).NotTo(BeClosed()) 490 chain.Halt() 491 Eventually(errorC, LongEventualTimeout).Should(BeClosed()) 492 }) 493 494 Describe("Config updates", func() { 495 var ( 496 configEnv *common.Envelope 497 configSeq uint64 498 ) 499 500 Context("when a type A config update comes", func() { 501 Context("for existing channel", func() { 502 // use to prepare the Orderer Values 503 BeforeEach(func() { 504 newValues := map[string]*common.ConfigValue{ 505 "BatchTimeout": { 506 Version: 1, 507 Value: marshalOrPanic(&orderer.BatchTimeout{ 508 Timeout: "3ms", 509 }), 510 }, 511 "ConsensusType": { 512 Version: 4, 513 }, 514 } 515 oldValues := map[string]*common.ConfigValue{ 516 "ConsensusType": { 517 Version: 4, 518 }, 519 } 520 configEnv = newConfigEnv(channelID, 521 common.HeaderType_CONFIG, 522 newConfigUpdateEnv(channelID, oldValues, newValues), 523 ) 524 configSeq = 0 525 }) // BeforeEach block 526 527 Context("without revalidation (i.e. correct config sequence)", func() { 528 Context("without pending normal envelope", func() { 529 It("should create a config block and no normal block", func() { 530 err := chain.Configure(configEnv, configSeq) 531 Expect(err).NotTo(HaveOccurred()) 532 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 533 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 534 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 535 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 536 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 537 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 538 }) 539 }) 540 541 Context("with pending normal envelope", func() { 542 It("should create a normal block and a config block", func() { 543 // We do not need to block the cutter from ordering in our test case and therefore close this channel. 544 close(cutter.Block) 545 546 By("adding a normal envelope") 547 err := chain.Order(env, 0) 548 Expect(err).NotTo(HaveOccurred()) 549 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 550 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 551 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 552 553 By("adding a config envelope") 554 err = chain.Configure(configEnv, configSeq) 555 Expect(err).NotTo(HaveOccurred()) 556 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 557 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 558 559 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 560 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 561 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 562 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 563 }) 564 }) 565 }) 566 567 Context("with revalidation (i.e. incorrect config sequence)", func() { 568 BeforeEach(func() { 569 close(cutter.Block) 570 support.SequenceReturns(1) // this causes the revalidation 571 }) 572 573 It("should create config block upon correct revalidation", func() { 574 support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation 575 576 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 577 Consistently(clock.WatcherCount).Should(Equal(1)) 578 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 579 }) 580 581 It("should not create config block upon incorrect revalidation", func() { 582 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 583 584 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 585 Consistently(clock.WatcherCount).Should(Equal(1)) 586 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock 587 }) 588 589 It("should not disturb current running timer upon incorrect revalidation", func() { 590 support.ProcessNormalMsgReturns(1, nil) 591 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 592 593 Expect(chain.Order(env, configSeq)).To(Succeed()) 594 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 595 596 clock.Increment(30 * time.Minute) 597 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 598 599 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 600 Consistently(clock.WatcherCount).Should(Equal(2)) 601 602 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 603 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) 604 605 clock.Increment(30 * time.Minute) 606 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 607 }) 608 }) 609 }) 610 611 Context("for creating a new channel", func() { 612 // use to prepare the Orderer Values 613 BeforeEach(func() { 614 chainID := "mychannel" 615 values := make(map[string]*common.ConfigValue) 616 configEnv = newConfigEnv(chainID, 617 common.HeaderType_CONFIG, 618 newConfigUpdateEnv(chainID, nil, values), 619 ) 620 configSeq = 0 621 }) // BeforeEach block 622 623 It("should be able to create a channel", func() { 624 err := chain.Configure(configEnv, configSeq) 625 Expect(err).NotTo(HaveOccurred()) 626 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 627 }) 628 }) 629 }) // Context block for type A config 630 631 Context("when a type B config update comes", func() { 632 Context("updating protocol values", func() { 633 // use to prepare the Orderer Values 634 BeforeEach(func() { 635 values := map[string]*common.ConfigValue{ 636 "ConsensusType": { 637 Version: 1, 638 Value: marshalOrPanic(&orderer.ConsensusType{ 639 Metadata: marshalOrPanic(consenterMetadata), 640 }), 641 }, 642 } 643 configEnv = newConfigEnv(channelID, 644 common.HeaderType_CONFIG, 645 newConfigUpdateEnv(channelID, nil, values)) 646 configSeq = 0 647 648 }) // BeforeEach block 649 650 It("should be able to process config update of type B", func() { 651 err := chain.Configure(configEnv, configSeq) 652 Expect(err).NotTo(HaveOccurred()) 653 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 654 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 655 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 656 }) 657 }) 658 659 Context("updating consenters set by exactly one node", func() { 660 It("should be able to process config update adding single node", func() { 661 metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata) 662 metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{ 663 Host: "localhost", 664 Port: 7050, 665 ServerTlsCert: serverTLSCert(tlsCA), 666 ClientTlsCert: clientTLSCert(tlsCA), 667 }) 668 669 values := map[string]*common.ConfigValue{ 670 "ConsensusType": { 671 Version: 1, 672 Value: marshalOrPanic(&orderer.ConsensusType{ 673 Metadata: marshalOrPanic(metadata), 674 }), 675 }, 676 } 677 configEnv = newConfigEnv(channelID, 678 common.HeaderType_CONFIG, 679 newConfigUpdateEnv(channelID, nil, values)) 680 configSeq = 0 681 682 err := chain.Configure(configEnv, configSeq) 683 Expect(err).NotTo(HaveOccurred()) 684 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 685 }) 686 687 }) 688 }) 689 }) 690 691 Describe("Crash Fault Tolerance", func() { 692 var ( 693 raftMetadata *raftprotos.BlockMetadata 694 ) 695 696 BeforeEach(func() { 697 raftMetadata = &raftprotos.BlockMetadata{ 698 ConsenterIds: []uint64{1}, 699 NextConsenterId: 2, 700 } 701 }) 702 703 Describe("when a chain is started with existing WAL", func() { 704 var ( 705 m1 *raftprotos.BlockMetadata 706 m2 *raftprotos.BlockMetadata 707 ) 708 JustBeforeEach(func() { 709 // to generate WAL data, we start a chain, 710 // order several envelopes and then halt the chain. 711 close(cutter.Block) 712 cutter.CutNext = true 713 714 // enque some data to be persisted on disk by raft 715 err := chain.Order(env, uint64(0)) 716 Expect(err).NotTo(HaveOccurred()) 717 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 718 719 _, metadata := support.WriteBlockArgsForCall(0) 720 m1 = &raftprotos.BlockMetadata{} 721 proto.Unmarshal(metadata, m1) 722 723 err = chain.Order(env, uint64(0)) 724 Expect(err).NotTo(HaveOccurred()) 725 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 726 727 _, metadata = support.WriteBlockArgsForCall(1) 728 m2 = &raftprotos.BlockMetadata{} 729 proto.Unmarshal(metadata, m2) 730 731 chain.Halt() 732 }) 733 734 It("replays blocks from committed entries", func() { 735 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 736 c.init() 737 c.Start() 738 defer c.Halt() 739 740 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 741 742 _, metadata := c.support.WriteBlockArgsForCall(0) 743 m := &raftprotos.BlockMetadata{} 744 proto.Unmarshal(metadata, m) 745 Expect(m.RaftIndex).To(Equal(m1.RaftIndex)) 746 747 _, metadata = c.support.WriteBlockArgsForCall(1) 748 m = &raftprotos.BlockMetadata{} 749 proto.Unmarshal(metadata, m) 750 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 751 752 // chain should keep functioning 753 campaign(c.Chain, c.observe) 754 755 c.cutter.CutNext = true 756 757 err := c.Order(env, uint64(0)) 758 Expect(err).NotTo(HaveOccurred()) 759 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 760 761 }) 762 763 It("only replays blocks after Applied index", func() { 764 raftMetadata.RaftIndex = m1.RaftIndex 765 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 766 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 767 768 c.init() 769 c.Start() 770 defer c.Halt() 771 772 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 773 774 _, metadata := c.support.WriteBlockArgsForCall(1) 775 m := &raftprotos.BlockMetadata{} 776 proto.Unmarshal(metadata, m) 777 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 778 779 // chain should keep functioning 780 campaign(c.Chain, c.observe) 781 782 c.cutter.CutNext = true 783 784 err := c.Order(env, uint64(0)) 785 Expect(err).NotTo(HaveOccurred()) 786 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 787 }) 788 789 It("does not replay any block if already in sync", func() { 790 raftMetadata.RaftIndex = m2.RaftIndex 791 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 792 c.init() 793 c.Start() 794 defer c.Halt() 795 796 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 797 798 // chain should keep functioning 799 campaign(c.Chain, c.observe) 800 801 c.cutter.CutNext = true 802 803 err := c.Order(env, uint64(0)) 804 Expect(err).NotTo(HaveOccurred()) 805 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 806 }) 807 808 Context("WAL file is not readable", func() { 809 It("fails to load wal", func() { 810 skipIfRoot() 811 812 files, err := ioutil.ReadDir(walDir) 813 Expect(err).NotTo(HaveOccurred()) 814 for _, f := range files { 815 os.Chmod(path.Join(walDir, f.Name()), 0300) 816 } 817 818 c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 819 Expect(c).To(BeNil()) 820 Expect(err).To(MatchError(ContainSubstring("permission denied"))) 821 }) 822 }) 823 }) 824 825 Describe("when snapshotting is enabled (snapshot interval is not zero)", func() { 826 var ( 827 ledgerLock sync.Mutex 828 ledger map[uint64]*common.Block 829 ) 830 831 countFiles := func() int { 832 files, err := ioutil.ReadDir(snapDir) 833 Expect(err).NotTo(HaveOccurred()) 834 return len(files) 835 } 836 837 BeforeEach(func() { 838 opts.SnapshotCatchUpEntries = 2 839 840 close(cutter.Block) 841 cutter.CutNext = true 842 843 ledgerLock.Lock() 844 ledger = map[uint64]*common.Block{ 845 0: getSeedBlock(), // genesis block 846 } 847 ledgerLock.Unlock() 848 849 support.WriteBlockStub = func(block *common.Block, meta []byte) { 850 b := proto.Clone(block).(*common.Block) 851 852 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 853 Expect(err).NotTo(HaveOccurred()) 854 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 855 856 ledgerLock.Lock() 857 defer ledgerLock.Unlock() 858 ledger[b.Header.Number] = b 859 } 860 861 support.HeightStub = func() uint64 { 862 ledgerLock.Lock() 863 defer ledgerLock.Unlock() 864 return uint64(len(ledger)) 865 } 866 }) 867 868 Context("Small SnapshotInterval", func() { 869 BeforeEach(func() { 870 opts.SnapshotIntervalSize = 1 871 }) 872 873 It("writes snapshot file to snapDir", func() { 874 // Scenario: start a chain with SnapInterval = 1 byte, expect it to take 875 // one snapshot for each block 876 877 i, _ := opts.MemoryStorage.FirstIndex() 878 879 Expect(chain.Order(env, uint64(0))).To(Succeed()) 880 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 881 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 882 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 883 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call 884 s, _ := opts.MemoryStorage.Snapshot() 885 b := protoutil.UnmarshalBlockOrPanic(s.Data) 886 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number))) 887 888 i, _ = opts.MemoryStorage.FirstIndex() 889 890 Expect(chain.Order(env, uint64(0))).To(Succeed()) 891 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 892 893 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 894 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 895 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call 896 s, _ = opts.MemoryStorage.Snapshot() 897 b = protoutil.UnmarshalBlockOrPanic(s.Data) 898 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number))) 899 }) 900 901 It("pauses chain if sync is in progress", func() { 902 // Scenario: 903 // after a snapshot is taken, reboot chain with raftIndex = 0 904 // chain should attempt to sync upon reboot, and blocks on 905 // `WaitReady` API 906 907 i, _ := opts.MemoryStorage.FirstIndex() 908 909 Expect(chain.Order(env, uint64(0))).To(Succeed()) 910 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 911 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 912 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 913 914 i, _ = opts.MemoryStorage.FirstIndex() 915 916 Expect(chain.Order(env, uint64(0))).To(Succeed()) 917 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 918 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 919 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 920 921 chain.Halt() 922 923 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 924 c.init() 925 926 signal := make(chan struct{}) 927 928 c.puller.PullBlockStub = func(i uint64) *common.Block { 929 <-signal // blocking for assertions 930 ledgerLock.Lock() 931 defer ledgerLock.Unlock() 932 if i >= uint64(len(ledger)) { 933 return nil 934 } 935 936 // This is a false assumption - single node shouldn't be able to pull block from anywhere. 937 // However, this test is mainly to assert that chain should attempt catchup upon start, 938 // so we could live with it. 939 return ledger[i] 940 } 941 942 err := c.WaitReady() 943 Expect(err).To(MatchError("chain is not started")) 944 945 c.Start() 946 defer c.Halt() 947 948 // pull block is called, so chain should be catching up now, WaitReady should block 949 signal <- struct{}{} 950 951 done := make(chan error) 952 go func() { 953 done <- c.WaitReady() 954 }() 955 956 Consistently(done).ShouldNot(Receive()) 957 close(signal) // unblock block puller 958 Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked 959 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 960 }) 961 962 It("commits block from snapshot if it's missing from ledger", func() { 963 // Scenario: 964 // Single node exists right after a snapshot is taken, while the block 965 // in it hasn't been successfully persisted into ledger (there can be one 966 // async block write in-flight). Then the node is restarted, and catches 967 // up using the block in snapshot. 968 969 Expect(chain.Order(env, uint64(0))).To(Succeed()) 970 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 971 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 972 973 chain.Halt() 974 975 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 976 c.init() 977 c.Start() 978 defer c.Halt() 979 980 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 981 }) 982 983 It("restores snapshot w/o extra entries", func() { 984 // Scenario: 985 // after a snapshot is taken, no more entries are appended. 986 // then node is restarted, it loads snapshot, finds its term 987 // and index. While replaying WAL to memory storage, it should 988 // not append any entry because no extra entry was appended 989 // after snapshot was taken. 990 991 Expect(chain.Order(env, uint64(0))).To(Succeed()) 992 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 993 _, metadata := support.WriteBlockArgsForCall(0) 994 m := &raftprotos.BlockMetadata{} 995 proto.Unmarshal(metadata, m) 996 997 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 998 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 999 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1000 Expect(err).NotTo(HaveOccurred()) 1001 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1002 Expect(err).NotTo(HaveOccurred()) 1003 1004 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1005 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1006 1007 chain.Halt() 1008 1009 raftMetadata.RaftIndex = m.RaftIndex 1010 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1011 c.opts.SnapshotIntervalSize = 1 1012 1013 c.init() 1014 c.Start() 1015 1016 // following arithmetic reflects how etcdraft MemoryStorage is implemented 1017 // when no entry is appended after snapshot being loaded. 1018 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1019 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index)) 1020 1021 // chain keeps functioning 1022 Eventually(func() <-chan raft.SoftState { 1023 c.clock.Increment(interval) 1024 return c.observe 1025 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1026 1027 c.cutter.CutNext = true 1028 err = c.Order(env, uint64(0)) 1029 Expect(err).NotTo(HaveOccurred()) 1030 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1031 1032 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1033 c.Halt() 1034 1035 _, metadata = c.support.WriteBlockArgsForCall(0) 1036 m = &raftprotos.BlockMetadata{} 1037 proto.Unmarshal(metadata, m) 1038 raftMetadata.RaftIndex = m.RaftIndex 1039 cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1040 1041 cx.init() 1042 cx.Start() 1043 defer cx.Halt() 1044 1045 // chain keeps functioning 1046 Eventually(func() <-chan raft.SoftState { 1047 cx.clock.Increment(interval) 1048 return cx.observe 1049 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1050 }) 1051 }) 1052 1053 Context("Large SnapshotInterval", func() { 1054 BeforeEach(func() { 1055 opts.SnapshotIntervalSize = 1024 1056 }) 1057 1058 It("restores snapshot w/ extra entries", func() { 1059 // Scenario: 1060 // after a snapshot is taken, more entries are appended. 1061 // then node is restarted, it loads snapshot, finds its term 1062 // and index. While replaying WAL to memory storage, it should 1063 // append some entries. 1064 1065 largeEnv := &common.Envelope{ 1066 Payload: marshalOrPanic(&common.Payload{ 1067 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1068 Data: make([]byte, 500), 1069 }), 1070 } 1071 1072 By("Ordering two large envelopes to trigger snapshot") 1073 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1074 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1075 1076 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1077 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1078 1079 _, metadata := support.WriteBlockArgsForCall(1) 1080 m := &raftprotos.BlockMetadata{} 1081 proto.Unmarshal(metadata, m) 1082 1083 // check snapshot does exit 1084 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1085 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 1086 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1087 Expect(err).NotTo(HaveOccurred()) 1088 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1089 Expect(err).NotTo(HaveOccurred()) 1090 1091 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1092 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1093 1094 By("Ordering another envlope to append new data to memory after snaphost") 1095 Expect(chain.Order(env, uint64(0))).To(Succeed()) 1096 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1097 1098 lasti, _ := opts.MemoryStorage.LastIndex() 1099 1100 chain.Halt() 1101 1102 raftMetadata.RaftIndex = m.RaftIndex 1103 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1104 cnt := support.WriteBlockCallCount() 1105 for i := 0; i < cnt; i++ { 1106 c.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1107 } 1108 1109 By("Restarting the node") 1110 c.init() 1111 c.Start() 1112 defer c.Halt() 1113 1114 By("Checking latest index is larger than index in snapshot") 1115 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1116 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti)) 1117 }) 1118 1119 When("local ledger is in sync with snapshot", func() { 1120 It("does not pull blocks and still respects snapshot interval", func() { 1121 // Scenario: 1122 // - snapshot is taken at block 2 1123 // - order one more envelope (block 3) 1124 // - reboot chain at block 2 1125 // - block 3 should be replayed from wal 1126 // - order another envelope to trigger snapshot, containing block 3 & 4 1127 // Assertions: 1128 // - block puller should NOT be called 1129 // - chain should keep functioning after reboot 1130 // - chain should respect snapshot interval to trigger next snapshot 1131 1132 largeEnv := &common.Envelope{ 1133 Payload: marshalOrPanic(&common.Payload{ 1134 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1135 Data: make([]byte, 500), 1136 }), 1137 } 1138 1139 By("Ordering two large envelopes to trigger snapshot") 1140 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1141 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1142 1143 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1144 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1145 1146 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1147 1148 _, metadata := support.WriteBlockArgsForCall(1) 1149 m := &raftprotos.BlockMetadata{} 1150 proto.Unmarshal(metadata, m) 1151 1152 By("Cutting block [3]") 1153 // order another envelope. this should not trigger snapshot 1154 err = chain.Order(largeEnv, uint64(0)) 1155 Expect(err).NotTo(HaveOccurred()) 1156 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1157 1158 chain.Halt() 1159 1160 raftMetadata.RaftIndex = m.RaftIndex 1161 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1162 // replay block 1&2 1163 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 1164 c.support.WriteBlock(support.WriteBlockArgsForCall(1)) 1165 1166 c.opts.SnapshotIntervalSize = 1024 1167 1168 By("Restarting node at block [2]") 1169 c.init() 1170 c.Start() 1171 defer c.Halt() 1172 1173 // elect leader 1174 campaign(c.Chain, c.observe) 1175 1176 By("Ordering one more block to trigger snapshot") 1177 c.cutter.CutNext = true 1178 err = c.Order(largeEnv, uint64(0)) 1179 Expect(err).NotTo(HaveOccurred()) 1180 1181 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4)) 1182 Expect(c.puller.PullBlockCallCount()).Should(BeZero()) 1183 // old snapshot file is retained 1184 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1185 }) 1186 }) 1187 1188 It("respects snapshot interval after reboot", func() { 1189 largeEnv := &common.Envelope{ 1190 Payload: marshalOrPanic(&common.Payload{ 1191 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1192 Data: make([]byte, 500), 1193 }), 1194 } 1195 1196 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1197 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1198 // check no snapshot is taken 1199 Consistently(countFiles).Should(Equal(0)) 1200 1201 _, metadata := support.WriteBlockArgsForCall(0) 1202 m := &raftprotos.BlockMetadata{} 1203 proto.Unmarshal(metadata, m) 1204 1205 chain.Halt() 1206 1207 raftMetadata.RaftIndex = m.RaftIndex 1208 c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1209 cnt := support.WriteBlockCallCount() 1210 for i := 0; i < cnt; i++ { 1211 c1.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1212 } 1213 c1.cutter.CutNext = true 1214 c1.opts.SnapshotIntervalSize = 1024 1215 1216 By("Restarting chain") 1217 c1.init() 1218 c1.Start() 1219 // chain keeps functioning 1220 campaign(c1.Chain, c1.observe) 1221 1222 Expect(c1.Order(largeEnv, uint64(0))).To(Succeed()) 1223 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1224 // check snapshot does exit 1225 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1226 }) 1227 }) 1228 }) 1229 }) 1230 1231 Context("Invalid WAL dir", func() { 1232 var support = &consensusmocks.FakeConsenterSupport{} 1233 BeforeEach(func() { 1234 // for block creator initialization 1235 support.HeightReturns(1) 1236 support.BlockReturns(getSeedBlock()) 1237 }) 1238 1239 When("WAL dir is a file", func() { 1240 It("replaces file with fresh WAL dir", func() { 1241 f, err := ioutil.TempFile("", "wal-") 1242 Expect(err).NotTo(HaveOccurred()) 1243 defer os.RemoveAll(f.Name()) 1244 1245 chain, err := etcdraft.NewChain( 1246 support, 1247 etcdraft.Options{ 1248 WALDir: f.Name(), 1249 SnapDir: snapDir, 1250 Logger: logger, 1251 MemoryStorage: storage, 1252 BlockMetadata: &raftprotos.BlockMetadata{}, 1253 Metrics: newFakeMetrics(newFakeMetricsFields()), 1254 }, 1255 configurator, 1256 nil, 1257 cryptoProvider, 1258 nil, 1259 nil, 1260 observeC) 1261 Expect(chain).NotTo(BeNil()) 1262 Expect(err).NotTo(HaveOccurred()) 1263 1264 info, err := os.Stat(f.Name()) 1265 Expect(err).NotTo(HaveOccurred()) 1266 Expect(info.IsDir()).To(BeTrue()) 1267 }) 1268 }) 1269 1270 When("WAL dir is not writeable", func() { 1271 It("replace it with fresh WAL dir", func() { 1272 d, err := ioutil.TempDir("", "wal-") 1273 Expect(err).NotTo(HaveOccurred()) 1274 defer os.RemoveAll(d) 1275 1276 err = os.Chmod(d, 0500) 1277 Expect(err).NotTo(HaveOccurred()) 1278 1279 chain, err := etcdraft.NewChain( 1280 support, 1281 etcdraft.Options{ 1282 WALDir: d, 1283 SnapDir: snapDir, 1284 Logger: logger, 1285 MemoryStorage: storage, 1286 BlockMetadata: &raftprotos.BlockMetadata{}, 1287 Metrics: newFakeMetrics(newFakeMetricsFields()), 1288 }, 1289 nil, 1290 nil, 1291 cryptoProvider, 1292 noOpBlockPuller, 1293 nil, 1294 nil) 1295 Expect(chain).NotTo(BeNil()) 1296 Expect(err).NotTo(HaveOccurred()) 1297 }) 1298 }) 1299 1300 When("WAL parent dir is not writeable", func() { 1301 It("fails to bootstrap fresh raft node", func() { 1302 skipIfRoot() 1303 1304 d, err := ioutil.TempDir("", "wal-") 1305 Expect(err).NotTo(HaveOccurred()) 1306 defer os.RemoveAll(d) 1307 1308 err = os.Chmod(d, 0500) 1309 Expect(err).NotTo(HaveOccurred()) 1310 1311 chain, err := etcdraft.NewChain( 1312 support, 1313 etcdraft.Options{ 1314 WALDir: path.Join(d, "wal-dir"), 1315 SnapDir: snapDir, 1316 Logger: logger, 1317 BlockMetadata: &raftprotos.BlockMetadata{}, 1318 }, 1319 nil, 1320 nil, 1321 cryptoProvider, 1322 noOpBlockPuller, 1323 nil, 1324 nil) 1325 Expect(chain).To(BeNil()) 1326 Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir"))) 1327 }) 1328 }) 1329 }) 1330 }) 1331 }) 1332 1333 Describe("2-node Raft cluster", func() { 1334 var ( 1335 network *network 1336 channelID string 1337 timeout time.Duration 1338 dataDir string 1339 c1, c2 *chain 1340 raftMetadata *raftprotos.BlockMetadata 1341 consenters map[uint64]*raftprotos.Consenter 1342 configEnv *common.Envelope 1343 cryptoProvider bccsp.BCCSP 1344 ) 1345 BeforeEach(func() { 1346 var err error 1347 1348 channelID = "multi-node-channel" 1349 timeout = 10 * time.Second 1350 1351 dataDir, err = ioutil.TempDir("", "raft-test-") 1352 Expect(err).NotTo(HaveOccurred()) 1353 1354 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1355 Expect(err).NotTo(HaveOccurred()) 1356 1357 raftMetadata = &raftprotos.BlockMetadata{ 1358 ConsenterIds: []uint64{1, 2}, 1359 NextConsenterId: 3, 1360 } 1361 1362 consenters = map[uint64]*raftprotos.Consenter{ 1363 1: { 1364 Host: "localhost", 1365 Port: 7051, 1366 ClientTlsCert: clientTLSCert(tlsCA), 1367 ServerTlsCert: serverTLSCert(tlsCA), 1368 }, 1369 2: { 1370 Host: "localhost", 1371 Port: 7051, 1372 ClientTlsCert: clientTLSCert(tlsCA), 1373 ServerTlsCert: serverTLSCert(tlsCA), 1374 }, 1375 } 1376 1377 metadata := &raftprotos.ConfigMetadata{ 1378 Options: &raftprotos.Options{ 1379 TickInterval: "500ms", 1380 ElectionTick: 10, 1381 HeartbeatTick: 1, 1382 MaxInflightBlocks: 5, 1383 SnapshotIntervalSize: 200, 1384 }, 1385 Consenters: []*raftprotos.Consenter{consenters[2]}, 1386 } 1387 value := map[string]*common.ConfigValue{ 1388 "ConsensusType": { 1389 Version: 1, 1390 Value: marshalOrPanic(&orderer.ConsensusType{ 1391 Metadata: marshalOrPanic(metadata), 1392 }), 1393 }, 1394 } 1395 // prepare config update to remove 1 1396 configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1397 1398 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA) 1399 c1, c2 = network.chains[1], network.chains[2] 1400 c1.cutter.CutNext = true 1401 network.init() 1402 network.start() 1403 }) 1404 1405 AfterEach(func() { 1406 network.stop() 1407 network.exec(func(c *chain) { 1408 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1409 }) 1410 1411 os.RemoveAll(dataDir) 1412 }) 1413 1414 It("can remove leader by reconfiguring cluster", func() { 1415 network.elect(1) 1416 1417 // trigger status dissemination 1418 Eventually(func() int { 1419 c1.clock.Increment(interval) 1420 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1421 }, LongEventualTimeout).Should(Equal(2)) 1422 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1423 1424 By("Configuring cluster to remove node") 1425 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1426 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1427 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 1428 1429 Eventually(func() <-chan raft.SoftState { 1430 c2.clock.Increment(interval) 1431 return c2.observe 1432 }, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader))) 1433 1434 By("Asserting leader can still serve requests as single-node cluster") 1435 c2.cutter.CutNext = true 1436 Expect(c2.Order(env, 0)).To(Succeed()) 1437 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1438 }) 1439 1440 It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() { 1441 network.elect(1) 1442 1443 step1 := c1.getStepFunc() 1444 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1445 stepMsg := &raftpb.Message{} 1446 if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil { 1447 return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err) 1448 } 1449 1450 if stepMsg.Type == raftpb.MsgTimeoutNow { 1451 return nil 1452 } 1453 1454 return step1(dest, msg) 1455 }) 1456 1457 By("Configuring cluster to remove node") 1458 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1459 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1460 c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1461 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1462 1463 c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1464 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1465 close(c1.stopped) // mark c1 stopped in network 1466 1467 network.elect(2) 1468 1469 By("Asserting leader can still serve requests as single-node cluster") 1470 c2.cutter.CutNext = true 1471 Expect(c2.Order(env, 0)).To(Succeed()) 1472 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1473 }) 1474 1475 It("can remove follower by reconfiguring cluster", func() { 1476 network.elect(2) 1477 1478 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1479 network.exec(func(c *chain) { 1480 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1481 }) 1482 1483 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1484 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1485 1486 By("Asserting leader can still serve requests as single-node cluster") 1487 c2.cutter.CutNext = true 1488 Expect(c2.Order(env, 0)).To(Succeed()) 1489 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1490 }) 1491 }) 1492 1493 Describe("3-node Raft cluster", func() { 1494 var ( 1495 network *network 1496 channelID string 1497 timeout time.Duration 1498 dataDir string 1499 c1, c2, c3 *chain 1500 raftMetadata *raftprotos.BlockMetadata 1501 consenters map[uint64]*raftprotos.Consenter 1502 cryptoProvider bccsp.BCCSP 1503 ) 1504 1505 BeforeEach(func() { 1506 var err error 1507 1508 channelID = "multi-node-channel" 1509 timeout = 10 * time.Second 1510 1511 dataDir, err = ioutil.TempDir("", "raft-test-") 1512 Expect(err).NotTo(HaveOccurred()) 1513 1514 raftMetadata = &raftprotos.BlockMetadata{ 1515 ConsenterIds: []uint64{1, 2, 3}, 1516 NextConsenterId: 4, 1517 } 1518 1519 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1520 Expect(err).NotTo(HaveOccurred()) 1521 1522 consenters = map[uint64]*raftprotos.Consenter{ 1523 1: { 1524 Host: "localhost", 1525 Port: 7051, 1526 ClientTlsCert: clientTLSCert(tlsCA), 1527 ServerTlsCert: serverTLSCert(tlsCA), 1528 }, 1529 2: { 1530 Host: "localhost", 1531 Port: 7051, 1532 ClientTlsCert: clientTLSCert(tlsCA), 1533 ServerTlsCert: serverTLSCert(tlsCA), 1534 }, 1535 3: { 1536 Host: "localhost", 1537 Port: 7051, 1538 ClientTlsCert: clientTLSCert(tlsCA), 1539 ServerTlsCert: serverTLSCert(tlsCA), 1540 }, 1541 } 1542 1543 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA) 1544 c1 = network.chains[1] 1545 c2 = network.chains[2] 1546 c3 = network.chains[3] 1547 }) 1548 1549 AfterEach(func() { 1550 network.stop() 1551 network.exec(func(c *chain) { 1552 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1553 }) 1554 1555 os.RemoveAll(dataDir) 1556 }) 1557 1558 When("2/3 nodes are running", func() { 1559 It("late node can catch up", func() { 1560 network.init() 1561 network.start(1, 2) 1562 network.elect(1) 1563 1564 // trigger status dissemination 1565 Eventually(func() int { 1566 c1.clock.Increment(interval) 1567 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1568 }, LongEventualTimeout).Should(Equal(2)) 1569 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1570 1571 c1.cutter.CutNext = true 1572 err := c1.Order(env, 0) 1573 Expect(err).NotTo(HaveOccurred()) 1574 1575 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1576 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1577 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1578 1579 network.start(3) 1580 1581 c1.clock.Increment(interval) 1582 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1583 1584 network.stop() 1585 }) 1586 1587 It("late node receives snapshot from leader", func() { 1588 c1.opts.SnapshotIntervalSize = 1 1589 c1.opts.SnapshotCatchUpEntries = 1 1590 1591 c1.cutter.CutNext = true 1592 1593 var blocksLock sync.Mutex 1594 blocks := make(map[uint64]*common.Block) // storing written blocks for block puller 1595 1596 c1.support.WriteBlockStub = func(b *common.Block, meta []byte) { 1597 blocksLock.Lock() 1598 defer blocksLock.Unlock() 1599 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 1600 Expect(err).NotTo(HaveOccurred()) 1601 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 1602 blocks[b.Header.Number] = b 1603 } 1604 1605 c3.puller.PullBlockStub = func(i uint64) *common.Block { 1606 blocksLock.Lock() 1607 defer blocksLock.Unlock() 1608 b, exist := blocks[i] 1609 if !exist { 1610 return nil 1611 } 1612 1613 return b 1614 } 1615 1616 network.init() 1617 network.start(1, 2) 1618 network.elect(1) 1619 1620 err := c1.Order(env, 0) 1621 Expect(err).NotTo(HaveOccurred()) 1622 1623 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1624 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1625 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1626 1627 err = c1.Order(env, 0) 1628 Expect(err).NotTo(HaveOccurred()) 1629 1630 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1631 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1632 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1633 1634 network.start(3) 1635 1636 c1.clock.Increment(interval) 1637 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1638 1639 network.stop() 1640 }) 1641 }) 1642 1643 When("reconfiguring raft cluster", func() { 1644 const ( 1645 defaultTimeout = 5 * time.Second 1646 ) 1647 var ( 1648 options = &raftprotos.Options{ 1649 TickInterval: "500ms", 1650 ElectionTick: 10, 1651 HeartbeatTick: 1, 1652 MaxInflightBlocks: 5, 1653 SnapshotIntervalSize: 200, 1654 } 1655 updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue { 1656 return map[string]*common.ConfigValue{ 1657 "ConsensusType": { 1658 Version: 1, 1659 Value: marshalOrPanic(&orderer.ConsensusType{ 1660 Metadata: marshalOrPanic(metadata), 1661 }), 1662 }, 1663 } 1664 } 1665 addConsenterConfigValue = func() map[string]*common.ConfigValue { 1666 metadata := &raftprotos.ConfigMetadata{Options: options} 1667 for _, consenter := range consenters { 1668 metadata.Consenters = append(metadata.Consenters, consenter) 1669 } 1670 1671 newConsenter := &raftprotos.Consenter{ 1672 Host: "localhost", 1673 Port: 7050, 1674 ServerTlsCert: serverTLSCert(tlsCA), 1675 ClientTlsCert: clientTLSCert(tlsCA), 1676 } 1677 metadata.Consenters = append(metadata.Consenters, newConsenter) 1678 return updateRaftConfigValue(metadata) 1679 } 1680 removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue { 1681 metadata := &raftprotos.ConfigMetadata{Options: options} 1682 for nodeID, consenter := range consenters { 1683 if nodeID == id { 1684 continue 1685 } 1686 metadata.Consenters = append(metadata.Consenters, consenter) 1687 } 1688 return updateRaftConfigValue(metadata) 1689 } 1690 createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope { 1691 configEnv := newConfigEnv("another-channel", 1692 common.HeaderType_CONFIG, 1693 newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata))) 1694 1695 // Wrap config env in Orderer transaction 1696 return &common.Envelope{ 1697 Payload: marshalOrPanic(&common.Payload{ 1698 Header: &common.Header{ 1699 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 1700 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 1701 ChannelId: channelID, 1702 }), 1703 }, 1704 Data: marshalOrPanic(configEnv), 1705 }), 1706 } 1707 } 1708 ) 1709 1710 BeforeEach(func() { 1711 network.exec(func(c *chain) { 1712 c.opts.EvictionSuspicion = time.Millisecond * 100 1713 c.opts.LeaderCheckInterval = time.Millisecond * 100 1714 }) 1715 1716 network.init() 1717 network.start() 1718 network.elect(1) 1719 1720 By("Submitting first tx to cut the block") 1721 c1.cutter.CutNext = true 1722 err := c1.Order(env, 0) 1723 Expect(err).NotTo(HaveOccurred()) 1724 1725 c1.clock.Increment(interval) 1726 1727 network.exec( 1728 func(c *chain) { 1729 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1730 }) 1731 }) 1732 1733 AfterEach(func() { 1734 network.stop() 1735 }) 1736 1737 Context("channel creation", func() { 1738 It("succeeds with valid config metadata", func() { 1739 metadata := &raftprotos.ConfigMetadata{Options: options} 1740 for _, consenter := range consenters { 1741 metadata.Consenters = append(metadata.Consenters, consenter) 1742 } 1743 1744 Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed()) 1745 network.exec(func(c *chain) { 1746 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1747 }) 1748 }) 1749 1750 }) 1751 1752 Context("reconfiguration", func() { 1753 It("can rotate certificate by adding and removing 1 node in one config update", func() { 1754 metadata := &raftprotos.ConfigMetadata{Options: options} 1755 for id, consenter := range consenters { 1756 if id == 2 { 1757 // remove second consenter 1758 continue 1759 } 1760 metadata.Consenters = append(metadata.Consenters, consenter) 1761 } 1762 1763 // add new consenter 1764 newConsenter := &raftprotos.Consenter{ 1765 Host: "localhost", 1766 Port: 7050, 1767 ServerTlsCert: serverTLSCert(tlsCA), 1768 ClientTlsCert: clientTLSCert(tlsCA), 1769 } 1770 metadata.Consenters = append(metadata.Consenters, newConsenter) 1771 1772 value := map[string]*common.ConfigValue{ 1773 "ConsensusType": { 1774 Version: 1, 1775 Value: marshalOrPanic(&orderer.ConsensusType{ 1776 Metadata: marshalOrPanic(metadata), 1777 }), 1778 }, 1779 } 1780 1781 By("creating new configuration with removed node and new one") 1782 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1783 c1.cutter.CutNext = true 1784 1785 By("sending config transaction") 1786 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1787 1788 network.exec(func(c *chain) { 1789 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1790 }) 1791 }) 1792 1793 It("rotates leader certificate and triggers leadership transfer", func() { 1794 metadata := &raftprotos.ConfigMetadata{Options: options} 1795 for id, consenter := range consenters { 1796 if id == 1 { 1797 // remove second consenter 1798 continue 1799 } 1800 metadata.Consenters = append(metadata.Consenters, consenter) 1801 } 1802 1803 // add new consenter 1804 newConsenter := &raftprotos.Consenter{ 1805 Host: "localhost", 1806 Port: 7050, 1807 ServerTlsCert: serverTLSCert(tlsCA), 1808 ClientTlsCert: clientTLSCert(tlsCA), 1809 } 1810 metadata.Consenters = append(metadata.Consenters, newConsenter) 1811 1812 value := map[string]*common.ConfigValue{ 1813 "ConsensusType": { 1814 Version: 1, 1815 Value: marshalOrPanic(&orderer.ConsensusType{ 1816 Metadata: marshalOrPanic(metadata), 1817 }), 1818 }, 1819 } 1820 1821 By("creating new configuration with removed node and new one") 1822 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1823 c1.cutter.CutNext = true 1824 1825 By("sending config transaction") 1826 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1827 1828 Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower())) 1829 network.exec(func(c *chain) { 1830 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1831 }) 1832 }) 1833 1834 When("Leader is disconnected after cert rotation", func() { 1835 It("still configures communication after failed leader transfer attempt", func() { 1836 metadata := &raftprotos.ConfigMetadata{Options: options} 1837 for id, consenter := range consenters { 1838 if id == 1 { 1839 // remove second consenter 1840 continue 1841 } 1842 metadata.Consenters = append(metadata.Consenters, consenter) 1843 } 1844 1845 // add new consenter 1846 newConsenter := &raftprotos.Consenter{ 1847 Host: "localhost", 1848 Port: 7050, 1849 ServerTlsCert: serverTLSCert(tlsCA), 1850 ClientTlsCert: clientTLSCert(tlsCA), 1851 } 1852 metadata.Consenters = append(metadata.Consenters, newConsenter) 1853 1854 value := map[string]*common.ConfigValue{ 1855 "ConsensusType": { 1856 Version: 1, 1857 Value: marshalOrPanic(&orderer.ConsensusType{ 1858 Metadata: marshalOrPanic(metadata), 1859 }), 1860 }, 1861 } 1862 1863 By("creating new configuration with removed node and new one") 1864 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1865 c1.cutter.CutNext = true 1866 1867 step1 := c1.getStepFunc() 1868 count := c1.rpc.SendConsensusCallCount() // record current step call count 1869 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1870 // disconnect network after 4 MsgApp are sent by c1: 1871 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 1872 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 1873 if c1.rpc.SendConsensusCallCount() == count+4 { 1874 defer network.disconnect(1) 1875 } 1876 1877 return step1(dest, msg) 1878 }) 1879 1880 network.exec(func(c *chain) { 1881 Consistently(c.clock.WatcherCount).Should(Equal(1)) 1882 }) 1883 1884 By("sending config transaction") 1885 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1886 1887 Consistently(c1.observe).ShouldNot(Receive()) 1888 network.exec(func(c *chain) { 1889 // wait for timeout timer to start 1890 c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1891 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1892 }) 1893 }) 1894 }) 1895 1896 When("Follower is disconnected while leader cert is being rotated", func() { 1897 It("still configures communication and transfer leader", func() { 1898 metadata := &raftprotos.ConfigMetadata{Options: options} 1899 for id, consenter := range consenters { 1900 if id == 1 { 1901 // remove second consenter 1902 continue 1903 } 1904 metadata.Consenters = append(metadata.Consenters, consenter) 1905 } 1906 1907 // add new consenter 1908 newConsenter := &raftprotos.Consenter{ 1909 Host: "localhost", 1910 Port: 7050, 1911 ServerTlsCert: serverTLSCert(tlsCA), 1912 ClientTlsCert: clientTLSCert(tlsCA), 1913 } 1914 metadata.Consenters = append(metadata.Consenters, newConsenter) 1915 1916 value := map[string]*common.ConfigValue{ 1917 "ConsensusType": { 1918 Version: 1, 1919 Value: marshalOrPanic(&orderer.ConsensusType{ 1920 Metadata: marshalOrPanic(metadata), 1921 }), 1922 }, 1923 } 1924 1925 cnt := c1.rpc.SendConsensusCallCount() 1926 network.disconnect(3) 1927 1928 // Trigger some heartbeats to be sent so that leader notices 1929 // failed message delivery to 3, and mark it as Paused. 1930 // This is to ensure leadership is transferred to 2. 1931 Eventually(func() int { 1932 c1.clock.Increment(interval) 1933 return c1.rpc.SendConsensusCallCount() 1934 }, LongEventualTimeout).Should(BeNumerically(">=", cnt+5)) 1935 1936 By("creating new configuration with removed node and new one") 1937 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1938 c1.cutter.CutNext = true 1939 1940 By("sending config transaction") 1941 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1942 1943 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower))) 1944 network.Lock() 1945 network.leader = 2 // manually set network leader 1946 network.Unlock() 1947 network.disconnect(1) 1948 1949 network.exec(func(c *chain) { 1950 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1951 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1952 }, 1, 2) 1953 1954 network.join(3, true) 1955 Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1956 Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1957 1958 By("Ordering normal transaction") 1959 c2.cutter.CutNext = true 1960 Expect(c3.Order(env, 0)).To(Succeed()) 1961 network.exec(func(c *chain) { 1962 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1963 }, 2, 3) 1964 }) 1965 }) 1966 1967 It("adding node to the cluster", func() { 1968 addConsenterUpdate := addConsenterConfigValue() 1969 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate)) 1970 c1.cutter.CutNext = true 1971 1972 By("sending config transaction") 1973 err := c1.Configure(configEnv, 0) 1974 Expect(err).NotTo(HaveOccurred()) 1975 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 1976 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 1977 1978 network.exec(func(c *chain) { 1979 Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 1980 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 1981 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4))) 1982 }) 1983 1984 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 1985 meta := &common.Metadata{Value: raftmetabytes} 1986 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 1987 Expect(err).NotTo(HaveOccurred()) 1988 1989 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 1990 // if we join a node to existing network, it MUST already obtained blocks 1991 // till the config block that adds this node to cluster. 1992 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 1993 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 1994 c4.init() 1995 1996 network.addChain(c4) 1997 c4.Start() 1998 1999 // ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added 2000 // to leader's node list right away. An immediate tick does not trigger a heartbeat 2001 // being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins 2002 // the cluster successfully. 2003 Eventually(func() <-chan raft.SoftState { 2004 c1.clock.Increment(interval) 2005 return c4.observe 2006 }, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower}))) 2007 2008 Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 2009 Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 2010 2011 By("submitting new transaction to follower") 2012 c1.cutter.CutNext = true 2013 err = c4.Order(env, 0) 2014 Expect(err).NotTo(HaveOccurred()) 2015 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2016 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2017 2018 network.exec(func(c *chain) { 2019 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2)) 2020 }) 2021 }) 2022 2023 It("does not reconfigure raft cluster if it's a channel creation tx", func() { 2024 configEnv := newConfigEnv("another-channel", 2025 common.HeaderType_CONFIG, 2026 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2))) 2027 2028 // Wrap config env in Orderer transaction 2029 channelCreationEnv := &common.Envelope{ 2030 Payload: marshalOrPanic(&common.Payload{ 2031 Header: &common.Header{ 2032 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 2033 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 2034 ChannelId: channelID, 2035 }), 2036 }, 2037 Data: marshalOrPanic(configEnv), 2038 }), 2039 } 2040 2041 c1.cutter.CutNext = true 2042 2043 Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed()) 2044 network.exec(func(c *chain) { 2045 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2046 }) 2047 2048 // assert c2 is not evicted 2049 Consistently(c2.Errored).ShouldNot(BeClosed()) 2050 Expect(c2.Order(env, 0)).To(Succeed()) 2051 2052 network.exec(func(c *chain) { 2053 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2054 }) 2055 }) 2056 2057 It("stop leader and continue reconfiguration failing over to new leader", func() { 2058 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2059 // configure chain support mock to disconnect c1 right after it writes configuration block 2060 // into the ledger, this to simulate failover. 2061 // Next boostraping a new node c4 to join a cluster and creating config transaction, submitting 2062 // it to the leader. Once leader writes configuration block it fails and leadership transferred to 2063 // c2. 2064 // Test asserts that new node c4, will join the cluster and c2 will handle failover of 2065 // re-configuration. Later we connecting c1 back and making sure it capable of catching up with 2066 // new configuration and successfully rejoins replica set. 2067 2068 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2069 c1.cutter.CutNext = true 2070 2071 step1 := c1.getStepFunc() 2072 count := c1.rpc.SendConsensusCallCount() // record current step call count 2073 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2074 // disconnect network after 4 MsgApp are sent by c1: 2075 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2076 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2077 if c1.rpc.SendConsensusCallCount() == count+4 { 2078 defer network.disconnect(1) 2079 } 2080 2081 return step1(dest, msg) 2082 }) 2083 2084 By("sending config transaction") 2085 err := c1.Configure(configEnv, 0) 2086 Expect(err).NotTo(HaveOccurred()) 2087 2088 // every node has written config block to the OSN ledger 2089 network.exec( 2090 func(c *chain) { 2091 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2092 }) 2093 2094 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2095 c1.setStepFunc(step1) 2096 2097 // elect node with higher index 2098 i2, _ := c2.storage.LastIndex() // err is always nil 2099 i3, _ := c3.storage.LastIndex() 2100 candidate := uint64(2) 2101 if i3 > i2 { 2102 candidate = 3 2103 } 2104 network.chains[candidate].cutter.CutNext = true 2105 network.elect(candidate) 2106 2107 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2108 meta := &common.Metadata{Value: raftmetabytes} 2109 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2110 Expect(err).NotTo(HaveOccurred()) 2111 2112 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2113 // if we join a node to existing network, it MUST already obtained blocks 2114 // till the config block that adds this node to cluster. 2115 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2116 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2117 c4.init() 2118 2119 network.addChain(c4) 2120 c4.start() 2121 Expect(c4.WaitReady()).To(Succeed()) 2122 network.join(4, true) 2123 2124 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2125 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2126 2127 By("submitting new transaction to follower") 2128 err = c4.Order(env, 0) 2129 Expect(err).NotTo(HaveOccurred()) 2130 2131 // rest nodes are alive include a newly added, hence should write 2 blocks 2132 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2133 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2134 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2135 2136 // node 1 has been stopped should not write any block 2137 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2138 2139 network.join(1, true) 2140 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2141 }) 2142 2143 It("stop cluster quorum and continue reconfiguration after the restart", func() { 2144 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2145 // configure chain support mock to stop cluster after config block is committed. 2146 // Restart the cluster and ensure it picks up updates and capable to finish reconfiguration. 2147 2148 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2149 c1.cutter.CutNext = true 2150 2151 step1 := c1.getStepFunc() 2152 count := c1.rpc.SendConsensusCallCount() // record current step call count 2153 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2154 // disconnect network after 4 MsgApp are sent by c1: 2155 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2156 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2157 if c1.rpc.SendConsensusCallCount() == count+4 { 2158 defer func() { 2159 network.disconnect(1) 2160 network.disconnect(2) 2161 network.disconnect(3) 2162 }() 2163 } 2164 2165 return step1(dest, msg) 2166 }) 2167 2168 By("sending config transaction") 2169 err := c1.Configure(configEnv, 0) 2170 Expect(err).NotTo(HaveOccurred()) 2171 2172 // every node has written config block to the OSN ledger 2173 network.exec( 2174 func(c *chain) { 2175 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2176 }) 2177 2178 // assert conf change proposals have been dropped, before proceed to reconnect network 2179 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2180 c1.setStepFunc(step1) 2181 2182 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2183 meta := &common.Metadata{Value: raftmetabytes} 2184 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2185 Expect(err).NotTo(HaveOccurred()) 2186 2187 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2188 // if we join a node to existing network, it MUST already obtained blocks 2189 // till the config block that adds this node to cluster. 2190 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2191 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2192 c4.init() 2193 2194 network.addChain(c4) 2195 2196 By("reconnecting nodes back") 2197 for i := uint64(1); i < 4; i++ { 2198 network.connect(i) 2199 } 2200 2201 // elect node with higher index 2202 i2, _ := c2.storage.LastIndex() // err is always nil 2203 i3, _ := c3.storage.LastIndex() 2204 candidate := uint64(2) 2205 if i3 > i2 { 2206 candidate = 3 2207 } 2208 network.chains[candidate].cutter.CutNext = true 2209 network.elect(candidate) 2210 2211 c4.start() 2212 Expect(c4.WaitReady()).To(Succeed()) 2213 network.join(4, false) 2214 2215 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2216 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2217 2218 By("submitting new transaction to follower") 2219 err = c4.Order(env, 0) 2220 Expect(err).NotTo(HaveOccurred()) 2221 2222 // rest nodes are alive include a newly added, hence should write 2 blocks 2223 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2224 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2225 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2226 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2227 }) 2228 2229 It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() { 2230 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2231 // Prepare config update transaction which removes leader (nodeID = 1), then leader 2232 // fails right after it commits configuration block. 2233 2234 configEnv := newConfigEnv(channelID, 2235 common.HeaderType_CONFIG, 2236 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2237 2238 c1.cutter.CutNext = true 2239 2240 step1 := c1.getStepFunc() 2241 count := c1.rpc.SendConsensusCallCount() // record current step call count 2242 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2243 // disconnect network after 4 MsgApp are sent by c1: 2244 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2245 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2246 if c1.rpc.SendConsensusCallCount() == count+4 { 2247 defer network.disconnect(1) 2248 } 2249 2250 return step1(dest, msg) 2251 }) 2252 2253 By("sending config transaction") 2254 err := c1.Configure(configEnv, 0) 2255 Expect(err).NotTo(HaveOccurred()) 2256 2257 network.exec(func(c *chain) { 2258 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2259 }) 2260 2261 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2262 c1.setStepFunc(step1) 2263 2264 // elect node with higher index 2265 i2, _ := c2.storage.LastIndex() // err is always nil 2266 i3, _ := c3.storage.LastIndex() 2267 candidate := uint64(2) 2268 if i3 > i2 { 2269 candidate = 3 2270 } 2271 network.chains[candidate].cutter.CutNext = true 2272 network.elect(candidate) 2273 2274 By("submitting new transaction to follower") 2275 err = c3.Order(env, 0) 2276 Expect(err).NotTo(HaveOccurred()) 2277 2278 // rest nodes are alive include a newly added, hence should write 2 blocks 2279 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2280 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2281 }) 2282 2283 It("removes leader from replica set", func() { 2284 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2285 // Prepare config update transaction which removes leader (nodeID = 1), this to 2286 // ensure we handle re-configuration of node removal correctly and remaining two 2287 // nodes still capable to form functional quorum and Raft capable of making further progress. 2288 // Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e. 2289 // should not be able to get updates or forward transactions. 2290 2291 configEnv := newConfigEnv(channelID, 2292 common.HeaderType_CONFIG, 2293 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2294 2295 c1.cutter.CutNext = true 2296 2297 By("sending config transaction") 2298 err := c1.Configure(configEnv, 0) 2299 Expect(err).NotTo(HaveOccurred()) 2300 2301 // every node has written config block to the OSN ledger 2302 network.exec( 2303 func(c *chain) { 2304 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2305 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 2306 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2))) 2307 }) 2308 2309 // Assert c1 has exited 2310 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 2311 Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed()) 2312 close(c1.stopped) 2313 2314 var newLeader, remainingFollower *chain 2315 for newLeader == nil || remainingFollower == nil { 2316 var state raft.SoftState 2317 select { 2318 case state = <-c2.observe: 2319 case state = <-c3.observe: 2320 case <-time.After(LongEventualTimeout): 2321 Fail("Expected a new leader to present") 2322 } 2323 2324 if state.RaftState == raft.StateLeader && state.Lead != raft.None { 2325 newLeader = network.chains[state.Lead] 2326 } 2327 2328 if state.RaftState == raft.StateFollower && state.Lead != raft.None { 2329 remainingFollower = network.chains[state.Lead] 2330 } 2331 } 2332 2333 By("submitting transaction to new leader") 2334 newLeader.cutter.CutNext = true 2335 err = newLeader.Order(env, 0) 2336 Expect(err).NotTo(HaveOccurred()) 2337 2338 Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2339 Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2340 // node 1 has been stopped should not write any block 2341 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2342 2343 By("trying to submit to new node, expected to fail") 2344 c1.cutter.CutNext = true 2345 err = c1.Order(env, 0) 2346 Expect(err).To(HaveOccurred()) 2347 2348 // number of block writes should remain the same 2349 Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2)) 2350 Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2)) 2351 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2352 }) 2353 2354 It("does not deadlock if leader steps down while config block is in-flight", func() { 2355 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2356 c1.cutter.CutNext = true 2357 2358 signal := make(chan struct{}) 2359 stub := c1.support.WriteConfigBlockStub 2360 c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) { 2361 signal <- struct{}{} 2362 <-signal 2363 stub(b, meta) 2364 } 2365 2366 By("Sending config transaction") 2367 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 2368 2369 Eventually(signal, LongEventualTimeout).Should(Receive()) 2370 network.disconnect(1) 2371 2372 By("Ticking leader till it steps down") 2373 Eventually(func() raft.SoftState { 2374 c1.clock.Increment(interval) 2375 return c1.Node.Status().SoftState 2376 }, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower)) 2377 2378 close(signal) 2379 2380 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower))) 2381 2382 By("Re-electing 1 as leader") 2383 network.connect(1) 2384 network.elect(1) 2385 2386 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2387 meta := &common.Metadata{Value: raftmetabytes} 2388 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2389 Expect(err).NotTo(HaveOccurred()) 2390 2391 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2392 // if we join a node to existing network, it MUST already obtained blocks 2393 // till the config block that adds this node to cluster. 2394 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2395 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2396 c4.init() 2397 2398 network.addChain(c4) 2399 c4.Start() 2400 2401 Eventually(func() <-chan raft.SoftState { 2402 c1.clock.Increment(interval) 2403 return c4.observe 2404 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower))) 2405 2406 By("Submitting tx to confirm network is still working") 2407 Expect(c1.Order(env, 0)).To(Succeed()) 2408 2409 network.exec(func(c *chain) { 2410 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2411 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2412 }) 2413 }) 2414 }) 2415 }) 2416 2417 When("3/3 nodes are running", func() { 2418 JustBeforeEach(func() { 2419 network.init() 2420 network.start() 2421 network.elect(1) 2422 }) 2423 2424 AfterEach(func() { 2425 network.stop() 2426 }) 2427 2428 It("correctly sets the cluster size and leadership metrics", func() { 2429 // the network should see only one leadership change 2430 network.exec(func(c *chain) { 2431 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1)) 2432 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1))) 2433 Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1)) 2434 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3))) 2435 }) 2436 // c1 should be the leader 2437 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2438 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2439 // c2 and c3 should continue to remain followers 2440 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2441 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2442 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2443 Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2444 }) 2445 2446 It("orders envelope on leader", func() { 2447 By("instructed to cut next block") 2448 c1.cutter.CutNext = true 2449 err := c1.Order(env, 0) 2450 Expect(err).NotTo(HaveOccurred()) 2451 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2452 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2453 2454 network.exec( 2455 func(c *chain) { 2456 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2457 }) 2458 2459 By("respect batch timeout") 2460 c1.cutter.CutNext = false 2461 2462 err = c1.Order(env, 0) 2463 Expect(err).NotTo(HaveOccurred()) 2464 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2465 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2466 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2467 2468 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2469 network.exec( 2470 func(c *chain) { 2471 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2472 }) 2473 }) 2474 2475 It("orders envelope on follower", func() { 2476 By("instructed to cut next block") 2477 c1.cutter.CutNext = true 2478 err := c2.Order(env, 0) 2479 Expect(err).NotTo(HaveOccurred()) 2480 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2481 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2482 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2483 2484 network.exec( 2485 func(c *chain) { 2486 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2487 }) 2488 2489 By("respect batch timeout") 2490 c1.cutter.CutNext = false 2491 2492 err = c2.Order(env, 0) 2493 Expect(err).NotTo(HaveOccurred()) 2494 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2495 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2496 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2497 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2498 2499 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2500 network.exec( 2501 func(c *chain) { 2502 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2503 }) 2504 }) 2505 2506 When("MaxInflightBlocks is reached", func() { 2507 BeforeEach(func() { 2508 network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 }) 2509 }) 2510 2511 It("waits for in flight blocks to be committed", func() { 2512 c1.cutter.CutNext = true 2513 // disconnect c1 to disrupt consensus 2514 network.disconnect(1) 2515 2516 Expect(c1.Order(env, 0)).To(Succeed()) 2517 2518 doneProp := make(chan struct{}) 2519 go func() { 2520 defer GinkgoRecover() 2521 Expect(c1.Order(env, 0)).To(Succeed()) 2522 close(doneProp) 2523 }() 2524 // expect second `Order` to block 2525 Consistently(doneProp).ShouldNot(BeClosed()) 2526 network.exec(func(c *chain) { 2527 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2528 }) 2529 2530 network.connect(1) 2531 c1.clock.Increment(interval) 2532 2533 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2534 network.exec(func(c *chain) { 2535 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2536 }) 2537 }) 2538 2539 It("resets block in flight when steps down from leader", func() { 2540 c1.cutter.CutNext = true 2541 c2.cutter.CutNext = true 2542 // disconnect c1 to disrupt consensus 2543 network.disconnect(1) 2544 2545 Expect(c1.Order(env, 0)).To(Succeed()) 2546 2547 doneProp := make(chan struct{}) 2548 go func() { 2549 defer GinkgoRecover() 2550 2551 Expect(c1.Order(env, 0)).To(Succeed()) 2552 close(doneProp) 2553 }() 2554 // expect second `Order` to block 2555 Consistently(doneProp).ShouldNot(BeClosed()) 2556 network.exec(func(c *chain) { 2557 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2558 }) 2559 2560 network.elect(2) 2561 Expect(c3.Order(env, 0)).To(Succeed()) 2562 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2563 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2564 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2565 2566 network.connect(1) 2567 c2.clock.Increment(interval) 2568 2569 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2570 network.exec(func(c *chain) { 2571 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2572 }) 2573 }) 2574 }) 2575 2576 When("leader is disconnected", func() { 2577 It("proactively steps down to follower", func() { 2578 network.disconnect(1) 2579 2580 By("Ticking leader until it steps down") 2581 Eventually(func() <-chan raft.SoftState { 2582 c1.clock.Increment(interval) 2583 return c1.observe 2584 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower}))) 2585 2586 By("Ensuring it does not accept message due to the cluster being leaderless") 2587 err := c1.Order(env, 0) 2588 Expect(err).To(MatchError("no Raft leader")) 2589 2590 network.elect(2) 2591 2592 // c1 should have lost leadership 2593 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3)) 2594 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0))) 2595 // c2 should become the leader 2596 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2597 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2598 // c2 should continue to remain follower 2599 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2600 2601 network.join(1, true) 2602 network.exec(func(c *chain) { 2603 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3)) 2604 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1))) 2605 }) 2606 2607 err = c1.Order(env, 0) 2608 Expect(err).NotTo(HaveOccurred()) 2609 }) 2610 2611 It("does not deadlock if propose is blocked", func() { 2612 signal := make(chan struct{}) 2613 c1.cutter.CutNext = true 2614 c1.support.SequenceStub = func() uint64 { 2615 signal <- struct{}{} 2616 <-signal 2617 return 0 2618 } 2619 2620 By("Sending a normal transaction") 2621 Expect(c1.Order(env, 0)).To(Succeed()) 2622 2623 Eventually(signal).Should(Receive()) 2624 network.disconnect(1) 2625 2626 By("Ticking leader till it steps down") 2627 Eventually(func() raft.SoftState { 2628 c1.clock.Increment(interval) 2629 return c1.Node.Status().SoftState 2630 }).Should(StateEqual(0, raft.StateFollower)) 2631 2632 close(signal) 2633 2634 Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower))) 2635 c1.support.SequenceStub = nil 2636 network.exec(func(c *chain) { 2637 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2638 }) 2639 2640 By("Re-electing 1 as leader") 2641 network.connect(1) 2642 network.elect(1) 2643 2644 By("Sending another normal transaction") 2645 Expect(c1.Order(env, 0)).To(Succeed()) 2646 2647 network.exec(func(c *chain) { 2648 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2649 }) 2650 }) 2651 }) 2652 2653 When("follower is disconnected", func() { 2654 It("should return error when receiving an env", func() { 2655 network.disconnect(2) 2656 2657 errorC := c2.Errored() 2658 Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed 2659 2660 By("Ticking node 2 until it becomes pre-candidate") 2661 Eventually(func() <-chan raft.SoftState { 2662 c2.clock.Increment(interval) 2663 return c2.observe 2664 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate}))) 2665 2666 Eventually(errorC).Should(BeClosed()) 2667 err := c2.Order(env, 0) 2668 Expect(err).To(HaveOccurred()) 2669 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2670 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2671 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2672 2673 network.connect(2) 2674 c1.clock.Increment(interval) 2675 Expect(errorC).To(BeClosed()) 2676 2677 Eventually(c2.Errored).ShouldNot(BeClosed()) 2678 }) 2679 }) 2680 2681 It("leader retransmits lost messages", func() { 2682 // This tests that heartbeats will trigger leader to retransmit lost MsgApp 2683 2684 c1.cutter.CutNext = true 2685 2686 network.disconnect(1) // drop MsgApp 2687 2688 err := c1.Order(env, 0) 2689 Expect(err).NotTo(HaveOccurred()) 2690 2691 network.exec( 2692 func(c *chain) { 2693 Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0)) 2694 }) 2695 2696 network.connect(1) // reconnect leader 2697 2698 c1.clock.Increment(interval) // trigger a heartbeat 2699 network.exec( 2700 func(c *chain) { 2701 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2702 }) 2703 }) 2704 2705 It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() { 2706 // this ensures that the created blocks are not written out 2707 network.disconnect(1) 2708 2709 c1.cutter.CutNext = true 2710 for i := 0; i < 3; i++ { 2711 Expect(c1.Order(env, 0)).To(Succeed()) 2712 } 2713 2714 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 2715 2716 network.connect(1) 2717 2718 // After FAB-13722, leader would pause replication if it gets notified that message 2719 // delivery to certain node is failed, i.e. connection refused. Replication to that 2720 // follower is resumed if leader receives a MsgHeartbeatResp from it. 2721 // We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we 2722 // would also risk a slow leader stepping down due to excessive ticks. 2723 // 2724 // Instead, we can simply send artificial MsgHeartbeatResp to leader to resume. 2725 m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp} 2726 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id) 2727 m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp} 2728 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id) 2729 2730 network.exec(func(c *chain) { 2731 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2732 }) 2733 }) 2734 2735 It("new leader should wait for in-fight blocks to commit before accepting new env", func() { 2736 // Scenario: when a node is elected as new leader and there are still in-flight blocks, 2737 // it should not immediately start accepting new envelopes, instead it should wait for 2738 // those in-flight blocks to be committed, otherwise we may create uncle block which 2739 // forks and panicks chain. 2740 // 2741 // Steps: 2742 // - start raft cluster with three nodes and genesis block0 2743 // - order env1 on c1, which creates block1 2744 // - drop MsgApp from 1 to 3 2745 // - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1 2746 // - disconnect c1 and elect c2 2747 // - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create 2748 // an uncle block1 based on block0. 2749 // - c2 commits block1 2750 // - c2 accepts env2, and creates block2 2751 // - c2 commits block2 2752 c1.cutter.CutNext = true 2753 c2.cutter.CutNext = true 2754 2755 step1 := c1.getStepFunc() 2756 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2757 stepMsg := &raftpb.Message{} 2758 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2759 2760 if dest == 3 { 2761 return nil 2762 } 2763 2764 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 { 2765 return nil 2766 } 2767 2768 return step1(dest, msg) 2769 }) 2770 2771 Expect(c1.Order(env, 0)).NotTo(HaveOccurred()) 2772 2773 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2774 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2775 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2776 2777 network.disconnect(1) 2778 2779 step2 := c2.getStepFunc() 2780 c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2781 stepMsg := &raftpb.Message{} 2782 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2783 2784 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 { 2785 for _, ent := range stepMsg.Entries { 2786 if len(ent.Data) != 0 { 2787 return nil 2788 } 2789 } 2790 } 2791 return step2(dest, msg) 2792 }) 2793 2794 network.elect(2) 2795 2796 go func() { 2797 defer GinkgoRecover() 2798 Expect(c2.Order(env, 0)).NotTo(HaveOccurred()) 2799 }() 2800 2801 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2802 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2803 2804 c2.setStepFunc(step2) 2805 c2.clock.Increment(interval) 2806 2807 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2808 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2809 2810 b, _ := c2.support.WriteBlockArgsForCall(0) 2811 Expect(b.Header.Number).To(Equal(uint64(1))) 2812 b, _ = c2.support.WriteBlockArgsForCall(1) 2813 Expect(b.Header.Number).To(Equal(uint64(2))) 2814 }) 2815 2816 Context("handling config blocks", func() { 2817 var configEnv *common.Envelope 2818 BeforeEach(func() { 2819 values := map[string]*common.ConfigValue{ 2820 "BatchTimeout": { 2821 Version: 1, 2822 Value: marshalOrPanic(&orderer.BatchTimeout{ 2823 Timeout: "3ms", 2824 }), 2825 }, 2826 } 2827 configEnv = newConfigEnv(channelID, 2828 common.HeaderType_CONFIG, 2829 newConfigUpdateEnv(channelID, nil, values), 2830 ) 2831 }) 2832 2833 It("holds up block creation on leader once a config block has been created and not written out", func() { 2834 // this ensures that the created blocks are not written out 2835 network.disconnect(1) 2836 2837 c1.cutter.CutNext = true 2838 // config block 2839 err := c1.Order(configEnv, 0) 2840 Expect(err).NotTo(HaveOccurred()) 2841 2842 // to avoid data races since we are accessing these within a goroutine 2843 tempEnv := env 2844 tempC1 := c1 2845 2846 done := make(chan struct{}) 2847 2848 // normal block 2849 go func() { 2850 defer GinkgoRecover() 2851 2852 // This should be blocked if config block is not committed 2853 err := tempC1.Order(tempEnv, 0) 2854 Expect(err).NotTo(HaveOccurred()) 2855 2856 close(done) 2857 }() 2858 2859 Consistently(done).ShouldNot(BeClosed()) 2860 2861 network.connect(1) 2862 c1.clock.Increment(interval) 2863 2864 network.exec( 2865 func(c *chain) { 2866 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2867 }) 2868 2869 network.exec( 2870 func(c *chain) { 2871 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2872 }) 2873 }) 2874 2875 It("continues creating blocks on leader after a config block has been successfully written out", func() { 2876 c1.cutter.CutNext = true 2877 // config block 2878 err := c1.Configure(configEnv, 0) 2879 Expect(err).NotTo(HaveOccurred()) 2880 network.exec( 2881 func(c *chain) { 2882 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2883 }) 2884 2885 // normal block following config block 2886 err = c1.Order(env, 0) 2887 Expect(err).NotTo(HaveOccurred()) 2888 network.exec( 2889 func(c *chain) { 2890 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2891 }) 2892 }) 2893 }) 2894 2895 When("Snapshotting is enabled", func() { 2896 BeforeEach(func() { 2897 c1.opts.SnapshotIntervalSize = 1 2898 c1.opts.SnapshotCatchUpEntries = 1 2899 }) 2900 2901 It("keeps running if some entries in memory are purged", func() { 2902 // Scenario: snapshotting is enabled on node 1 and it purges memory storage 2903 // per every snapshot. Cluster should be correctly functioning. 2904 2905 i, err := c1.opts.MemoryStorage.FirstIndex() 2906 Expect(err).NotTo(HaveOccurred()) 2907 Expect(i).To(Equal(uint64(1))) 2908 2909 c1.cutter.CutNext = true 2910 2911 err = c1.Order(env, 0) 2912 Expect(err).NotTo(HaveOccurred()) 2913 2914 network.exec( 2915 func(c *chain) { 2916 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2917 }) 2918 2919 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2920 i, err = c1.opts.MemoryStorage.FirstIndex() 2921 Expect(err).NotTo(HaveOccurred()) 2922 2923 err = c1.Order(env, 0) 2924 Expect(err).NotTo(HaveOccurred()) 2925 2926 network.exec( 2927 func(c *chain) { 2928 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2929 }) 2930 2931 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2932 i, err = c1.opts.MemoryStorage.FirstIndex() 2933 Expect(err).NotTo(HaveOccurred()) 2934 2935 err = c1.Order(env, 0) 2936 Expect(err).NotTo(HaveOccurred()) 2937 2938 network.exec( 2939 func(c *chain) { 2940 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2941 }) 2942 2943 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2944 }) 2945 2946 It("lagged node can catch up using snapshot", func() { 2947 network.disconnect(2) 2948 c1.cutter.CutNext = true 2949 2950 c2Lasti, _ := c2.opts.MemoryStorage.LastIndex() 2951 var blockCnt int 2952 // Order blocks until first index of c1 memory is greater than last index of c2, 2953 // so a snapshot will be sent to c2 when it rejoins network 2954 Eventually(func() bool { 2955 c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex() 2956 if c1Firsti > c2Lasti+1 { 2957 return true 2958 } 2959 2960 Expect(c1.Order(env, 0)).To(Succeed()) 2961 blockCnt++ 2962 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2963 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2964 return false 2965 }, LongEventualTimeout).Should(BeTrue()) 2966 2967 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2968 2969 network.join(2, false) 2970 2971 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2972 indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir) 2973 Expect(indices).To(HaveLen(1)) 2974 gap := indices[0] - c2Lasti 2975 2976 // TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector, 2977 // which calls block puller, is still replying on real clock, and sometimes increment puller 2978 // call count. Therefore we are being more lenient here until suspector starts using fake clock 2979 // so we have more deterministic control over it. 2980 Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap))) 2981 2982 // chain should keeps functioning 2983 Expect(c2.Order(env, 0)).To(Succeed()) 2984 2985 network.exec( 2986 func(c *chain) { 2987 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1)) 2988 }) 2989 }) 2990 }) 2991 2992 Context("failover", func() { 2993 It("follower should step up as leader upon failover", func() { 2994 network.stop(1) 2995 network.elect(2) 2996 2997 By("order envelope on new leader") 2998 c2.cutter.CutNext = true 2999 err := c2.Order(env, 0) 3000 Expect(err).NotTo(HaveOccurred()) 3001 3002 // block should not be produced on chain 1 3003 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3004 3005 // block should be produced on chain 2 & 3 3006 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3007 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3008 3009 By("order envelope on follower") 3010 err = c3.Order(env, 0) 3011 Expect(err).NotTo(HaveOccurred()) 3012 3013 // block should not be produced on chain 1 3014 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3015 3016 // block should be produced on chain 2 & 3 3017 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 3018 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 3019 }) 3020 3021 It("follower cannot be elected if its log is not up-to-date", func() { 3022 network.disconnect(2) 3023 3024 c1.cutter.CutNext = true 3025 err := c1.Order(env, 0) 3026 Expect(err).NotTo(HaveOccurred()) 3027 3028 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3029 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3030 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3031 3032 network.disconnect(1) 3033 network.connect(2) 3034 3035 // node 2 has not caught up with other nodes 3036 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3037 c2.clock.Increment(interval) 3038 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3039 } 3040 3041 // When PreVote is enabled, node 2 would fail to collect enough 3042 // PreVote because its index is not up-to-date. Therefore, it 3043 // does not cause leader change on other nodes. 3044 Consistently(c3.observe).ShouldNot(Receive()) 3045 network.elect(3) // node 3 has newest logs among 2&3, so it can be elected 3046 }) 3047 3048 It("PreVote prevents reconnected node from disturbing network", func() { 3049 network.disconnect(2) 3050 3051 c1.cutter.CutNext = true 3052 err := c1.Order(env, 0) 3053 Expect(err).NotTo(HaveOccurred()) 3054 3055 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3056 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3057 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3058 3059 network.connect(2) 3060 3061 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3062 c2.clock.Increment(interval) 3063 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3064 } 3065 3066 Consistently(c1.observe).ShouldNot(Receive()) 3067 Consistently(c3.observe).ShouldNot(Receive()) 3068 }) 3069 3070 It("follower can catch up and then campaign with success", func() { 3071 network.disconnect(2) 3072 3073 c1.cutter.CutNext = true 3074 for i := 0; i < 10; i++ { 3075 err := c1.Order(env, 0) 3076 Expect(err).NotTo(HaveOccurred()) 3077 } 3078 3079 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3080 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3081 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3082 3083 network.join(2, false) 3084 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3085 3086 network.disconnect(1) 3087 network.elect(2) 3088 }) 3089 3090 It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() { 3091 // enqueue one transaction into 1's blockcutter to test for purging of block cutter 3092 c1.cutter.CutNext = false 3093 err := c1.Order(env, 0) 3094 Expect(err).NotTo(HaveOccurred()) 3095 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 3096 3097 // no block should be written because env is not cut into block yet 3098 c1.clock.WaitForNWatchersAndIncrement(interval, 2) 3099 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3100 3101 network.disconnect(1) 3102 network.elect(2) 3103 network.join(1, true) 3104 3105 Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped 3106 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0)) 3107 // the created block should be discarded since there is a leadership change 3108 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3109 3110 network.disconnect(2) 3111 network.elect(1) 3112 3113 err = c1.Order(env, 0) 3114 Expect(err).NotTo(HaveOccurred()) 3115 3116 // The following group of assertions is redundant - it's here for completeness. 3117 // If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing. 3118 // If the blockcucter has been reset, fast-forwarding won't do anything. 3119 // 3120 // Put differently: 3121 // 3122 // correct: 3123 // stop start fire 3124 // |--------------|---------------------------| 3125 // n*intervals timeout 3126 // (advanced in election) 3127 // 3128 // wrong: 3129 // unstop fire 3130 // |---------------------------| 3131 // timeout 3132 // 3133 // timeout-n*interval n*interval 3134 // |-----------|----------------| 3135 // ^ ^ 3136 // at this point of time it should fire 3137 // timer should not fire at this point 3138 3139 c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2) 3140 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3141 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3142 3143 c1.clock.Increment(interval) 3144 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3145 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3146 }) 3147 3148 It("stale leader should not be able to propose block because of lagged term", func() { 3149 network.disconnect(1) 3150 network.elect(2) 3151 network.connect(1) 3152 3153 c1.cutter.CutNext = true 3154 err := c1.Order(env, 0) 3155 Expect(err).NotTo(HaveOccurred()) 3156 3157 network.exec( 3158 func(c *chain) { 3159 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3160 }) 3161 }) 3162 3163 It("aborts waiting for block to be committed upon leadership lost", func() { 3164 network.disconnect(1) 3165 3166 c1.cutter.CutNext = true 3167 err := c1.Order(env, 0) 3168 Expect(err).NotTo(HaveOccurred()) 3169 3170 network.exec( 3171 func(c *chain) { 3172 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3173 }) 3174 3175 network.elect(2) 3176 network.connect(1) 3177 3178 c2.clock.Increment(interval) 3179 // this check guarantees that signal on resignC is consumed in commitBatches method. 3180 Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower}))) 3181 }) 3182 }) 3183 }) 3184 }) 3185 }) 3186 3187 func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode { 3188 var nodes []cluster.RemoteNode 3189 for i, consenter := range consenterMetadata.Consenters { 3190 // For now, skip ourselves 3191 if i == 0 { 3192 continue 3193 } 3194 serverDER, _ := pem.Decode(consenter.ServerTlsCert) 3195 clientDER, _ := pem.Decode(consenter.ClientTlsCert) 3196 node := cluster.RemoteNode{ 3197 ID: uint64(i + 1), 3198 Endpoint: "localhost:7050", 3199 ServerTLSCert: serverDER.Bytes, 3200 ClientTLSCert: clientDER.Bytes, 3201 } 3202 nodes = append(nodes, node) 3203 } 3204 return nodes 3205 } 3206 3207 func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata { 3208 md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{ 3209 TickInterval: time.Duration(interval).String(), 3210 ElectionTick: ELECTION_TICK, 3211 HeartbeatTick: HEARTBEAT_TICK, 3212 MaxInflightBlocks: 5, 3213 }} 3214 for i := 0; i < nodeCount; i++ { 3215 md.Consenters = append(md.Consenters, &raftprotos.Consenter{ 3216 Host: "localhost", 3217 Port: 7050, 3218 ServerTlsCert: serverTLSCert(tlsCA), 3219 ClientTlsCert: clientTLSCert(tlsCA), 3220 }) 3221 } 3222 return md 3223 } 3224 3225 func serverTLSCert(tlsCA tlsgen.CA) []byte { 3226 cert, err := tlsCA.NewServerCertKeyPair("localhost") 3227 if err != nil { 3228 panic(err) 3229 } 3230 return cert.Cert 3231 } 3232 3233 func clientTLSCert(tlsCA tlsgen.CA) []byte { 3234 cert, err := tlsCA.NewClientCertKeyPair() 3235 if err != nil { 3236 panic(err) 3237 } 3238 return cert.Cert 3239 } 3240 3241 // marshalOrPanic serializes a protobuf message and panics if this 3242 // operation fails 3243 func marshalOrPanic(pb proto.Message) []byte { 3244 data, err := proto.Marshal(pb) 3245 if err != nil { 3246 panic(err) 3247 } 3248 return data 3249 } 3250 3251 // helpers to facilitate tests 3252 type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error 3253 3254 type chain struct { 3255 id uint64 3256 3257 stepLock sync.Mutex 3258 step stepFunc 3259 3260 // msgBuffer serializes ingress messages for a chain 3261 // so they are delivered in the same order 3262 msgBuffer chan *msg 3263 3264 support *consensusmocks.FakeConsenterSupport 3265 cutter *mockblockcutter.Receiver 3266 configurator *mocks.FakeConfigurator 3267 rpc *mocks.FakeRPC 3268 storage *raft.MemoryStorage 3269 clock *fakeclock.FakeClock 3270 opts etcdraft.Options 3271 puller *mocks.FakeBlockPuller 3272 3273 // store written blocks to be returned by mock block puller 3274 ledgerLock sync.RWMutex 3275 ledger map[uint64]*common.Block 3276 ledgerHeight uint64 3277 lastConfigBlockNumber uint64 3278 3279 observe chan raft.SoftState 3280 unstarted chan struct{} 3281 stopped chan struct{} 3282 3283 fakeFields *fakeMetricsFields 3284 3285 *etcdraft.Chain 3286 3287 cryptoProvider bccsp.BCCSP 3288 } 3289 3290 type msg struct { 3291 req *orderer.ConsensusRequest 3292 sender uint64 3293 } 3294 3295 func newChain( 3296 timeout time.Duration, 3297 channel, dataDir string, 3298 id uint64, 3299 raftMetadata *raftprotos.BlockMetadata, 3300 consenters map[uint64]*raftprotos.Consenter, 3301 cryptoProvider bccsp.BCCSP, 3302 support *consensusmocks.FakeConsenterSupport, 3303 ) *chain { 3304 rpc := &mocks.FakeRPC{} 3305 clock := fakeclock.NewFakeClock(time.Now()) 3306 storage := raft.NewMemoryStorage() 3307 3308 fakeFields := newFakeMetricsFields() 3309 3310 opts := etcdraft.Options{ 3311 RaftID: uint64(id), 3312 Clock: clock, 3313 TickInterval: interval, 3314 ElectionTick: ELECTION_TICK, 3315 HeartbeatTick: HEARTBEAT_TICK, 3316 MaxSizePerMsg: 1024 * 1024, 3317 MaxInflightBlocks: 256, 3318 BlockMetadata: raftMetadata, 3319 LeaderCheckInterval: 500 * time.Millisecond, 3320 Consenters: consenters, 3321 Logger: flogging.NewFabricLogger(zap.NewExample()), 3322 MemoryStorage: storage, 3323 WALDir: path.Join(dataDir, "wal"), 3324 SnapDir: path.Join(dataDir, "snapshot"), 3325 Metrics: newFakeMetrics(fakeFields), 3326 } 3327 3328 if support == nil { 3329 support = &consensusmocks.FakeConsenterSupport{} 3330 support.ChannelIDReturns(channel) 3331 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 3332 } 3333 cutter := mockblockcutter.NewReceiver() 3334 close(cutter.Block) 3335 support.BlockCutterReturns(cutter) 3336 3337 // upon leader change, lead is reset to 0 before set to actual 3338 // new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be 3339 // sent on this chan, so we need size to be 2 3340 observe := make(chan raft.SoftState, 2) 3341 3342 configurator := &mocks.FakeConfigurator{} 3343 puller := &mocks.FakeBlockPuller{} 3344 3345 ch := make(chan struct{}) 3346 close(ch) 3347 3348 c := &chain{ 3349 id: id, 3350 support: support, 3351 cutter: cutter, 3352 rpc: rpc, 3353 storage: storage, 3354 observe: observe, 3355 clock: clock, 3356 opts: opts, 3357 unstarted: ch, 3358 stopped: make(chan struct{}), 3359 configurator: configurator, 3360 puller: puller, 3361 ledger: map[uint64]*common.Block{ 3362 0: getSeedBlock(), // Very first block 3363 }, 3364 ledgerHeight: 1, 3365 fakeFields: fakeFields, 3366 cryptoProvider: cryptoProvider, 3367 msgBuffer: make(chan *msg, 500), 3368 } 3369 3370 // receives normal blocks and metadata and appends it into 3371 // the ledger struct to simulate write behaviour 3372 appendNormalBlockToLedger := func(b *common.Block, meta []byte) { 3373 c.ledgerLock.Lock() 3374 defer c.ledgerLock.Unlock() 3375 3376 b = proto.Clone(b).(*common.Block) 3377 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3378 Expect(err).NotTo(HaveOccurred()) 3379 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3380 3381 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3382 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3383 Value: lastConfigValue, 3384 }) 3385 3386 c.ledger[b.Header.Number] = b 3387 if c.ledgerHeight < b.Header.Number+1 { 3388 c.ledgerHeight = b.Header.Number + 1 3389 } 3390 } 3391 3392 // receives config blocks and metadata and appends it into 3393 // the ledger struct to simulate write behaviour 3394 appendConfigBlockToLedger := func(b *common.Block, meta []byte) { 3395 c.ledgerLock.Lock() 3396 defer c.ledgerLock.Unlock() 3397 3398 b = proto.Clone(b).(*common.Block) 3399 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3400 Expect(err).NotTo(HaveOccurred()) 3401 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3402 3403 c.lastConfigBlockNumber = b.Header.Number 3404 3405 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3406 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3407 Value: lastConfigValue, 3408 }) 3409 3410 c.ledger[b.Header.Number] = b 3411 if c.ledgerHeight < b.Header.Number+1 { 3412 c.ledgerHeight = b.Header.Number + 1 3413 } 3414 } 3415 3416 c.support.WriteBlockStub = appendNormalBlockToLedger 3417 c.support.WriteConfigBlockStub = appendConfigBlockToLedger 3418 3419 // returns current ledger height 3420 c.support.HeightStub = func() uint64 { 3421 c.ledgerLock.RLock() 3422 defer c.ledgerLock.RUnlock() 3423 return c.ledgerHeight 3424 } 3425 3426 // reads block from the ledger 3427 c.support.BlockStub = func(number uint64) *common.Block { 3428 c.ledgerLock.RLock() 3429 defer c.ledgerLock.RUnlock() 3430 return c.ledger[number] 3431 } 3432 3433 // consume ingress messages for chain 3434 go func() { 3435 for msg := range c.msgBuffer { 3436 c.Consensus(msg.req, msg.sender) 3437 } 3438 }() 3439 3440 return c 3441 } 3442 3443 func (c *chain) init() { 3444 ch, err := etcdraft.NewChain( 3445 c.support, 3446 c.opts, 3447 c.configurator, 3448 c.rpc, 3449 c.cryptoProvider, 3450 func() (etcdraft.BlockPuller, error) { return c.puller, nil }, 3451 nil, 3452 c.observe, 3453 ) 3454 Expect(err).NotTo(HaveOccurred()) 3455 c.Chain = ch 3456 } 3457 3458 func (c *chain) start() { 3459 c.unstarted = nil 3460 c.Start() 3461 } 3462 3463 func (c *chain) setStepFunc(f stepFunc) { 3464 c.stepLock.Lock() 3465 c.step = f 3466 c.stepLock.Unlock() 3467 } 3468 3469 func (c *chain) getStepFunc() stepFunc { 3470 c.stepLock.Lock() 3471 defer c.stepLock.Unlock() 3472 return c.step 3473 } 3474 3475 type network struct { 3476 sync.RWMutex 3477 3478 leader uint64 3479 chains map[uint64]*chain 3480 3481 // links simulates the configuration of comm layer (link is bi-directional). 3482 // if links[left][right] == true, right can send msg to left. 3483 links map[uint64]map[uint64]bool 3484 // connectivity determines if a node is connected to network. This is used for tests 3485 // to simulate network partition. 3486 connectivity map[uint64]bool 3487 } 3488 3489 func (n *network) link(from []uint64, to uint64) { 3490 links := make(map[uint64]bool) 3491 for _, id := range from { 3492 links[id] = true 3493 } 3494 3495 n.Lock() 3496 defer n.Unlock() 3497 3498 n.links[to] = links 3499 } 3500 3501 func (n *network) linked(from, to uint64) bool { 3502 n.RLock() 3503 defer n.RUnlock() 3504 3505 return n.links[to][from] 3506 } 3507 3508 func (n *network) connect(id uint64) { 3509 n.Lock() 3510 defer n.Unlock() 3511 3512 n.connectivity[id] = true 3513 } 3514 3515 func (n *network) disconnect(id uint64) { 3516 n.Lock() 3517 defer n.Unlock() 3518 3519 n.connectivity[id] = false 3520 } 3521 3522 func (n *network) connected(id uint64) bool { 3523 n.RLock() 3524 defer n.RUnlock() 3525 3526 return n.connectivity[id] 3527 } 3528 3529 func (n *network) addChain(c *chain) { 3530 n.connect(c.id) // chain is connected by default 3531 3532 c.step = func(dest uint64, req *orderer.ConsensusRequest) error { 3533 if !n.linked(c.id, dest) { 3534 return errors.Errorf("connection refused") 3535 } 3536 3537 if !n.connected(c.id) || !n.connected(dest) { 3538 return errors.Errorf("connection lost") 3539 } 3540 3541 n.RLock() 3542 target := n.chains[dest] 3543 n.RUnlock() 3544 target.msgBuffer <- &msg{req: req, sender: c.id} 3545 return nil 3546 } 3547 3548 c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error { 3549 c.stepLock.Lock() 3550 defer c.stepLock.Unlock() 3551 return c.step(dest, msg) 3552 } 3553 3554 c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest) error { 3555 if !n.linked(c.id, dest) { 3556 return errors.Errorf("connection refused") 3557 } 3558 3559 if !n.connected(c.id) || !n.connected(dest) { 3560 return errors.Errorf("connection lost") 3561 } 3562 3563 n.RLock() 3564 target := n.chains[dest] 3565 n.RUnlock() 3566 go func() { 3567 defer GinkgoRecover() 3568 target.Submit(msg, c.id) 3569 }() 3570 return nil 3571 } 3572 3573 c.puller.PullBlockStub = func(i uint64) *common.Block { 3574 n.RLock() 3575 leaderChain := n.chains[n.leader] 3576 n.RUnlock() 3577 3578 leaderChain.ledgerLock.RLock() 3579 defer leaderChain.ledgerLock.RUnlock() 3580 block := leaderChain.ledger[i] 3581 return block 3582 } 3583 3584 c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) { 3585 n.RLock() 3586 leader := n.chains[n.leader] 3587 n.RUnlock() 3588 3589 if leader == nil { 3590 return nil, errors.Errorf("ledger not available") 3591 } 3592 3593 leader.ledgerLock.RLock() 3594 defer leader.ledgerLock.RUnlock() 3595 return map[string]uint64{"leader": leader.ledgerHeight}, nil 3596 } 3597 3598 c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) { 3599 var ids []uint64 3600 for _, node := range nodes { 3601 ids = append(ids, node.ID) 3602 } 3603 n.link(ids, c.id) 3604 }) 3605 3606 n.Lock() 3607 defer n.Unlock() 3608 n.chains[c.id] = c 3609 } 3610 3611 func createNetwork( 3612 timeout time.Duration, 3613 channel, dataDir string, 3614 raftMetadata *raftprotos.BlockMetadata, 3615 consenters map[uint64]*raftprotos.Consenter, 3616 cryptoProvider bccsp.BCCSP, 3617 tlsCA tlsgen.CA, 3618 ) *network { 3619 n := &network{ 3620 chains: make(map[uint64]*chain), 3621 connectivity: make(map[uint64]bool), 3622 links: make(map[uint64]map[uint64]bool), 3623 } 3624 3625 for _, nodeID := range raftMetadata.ConsenterIds { 3626 dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID)) 3627 Expect(err).NotTo(HaveOccurred()) 3628 3629 m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata) 3630 support := &consensusmocks.FakeConsenterSupport{} 3631 support.ChannelIDReturns(channel) 3632 support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil)) 3633 mockOrdererConfig := mockOrdererWithTLSRootCert(timeout, nil, tlsCA) 3634 support.SharedConfigReturns(mockOrdererConfig) 3635 n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider, support)) 3636 } 3637 3638 return n 3639 } 3640 3641 // tests could alter configuration of a chain before creating it 3642 func (n *network) init() { 3643 n.exec(func(c *chain) { c.init() }) 3644 } 3645 3646 func (n *network) start(ids ...uint64) { 3647 nodes := ids 3648 if len(nodes) == 0 { 3649 for i := range n.chains { 3650 nodes = append(nodes, i) 3651 } 3652 } 3653 3654 for _, id := range nodes { 3655 n.chains[id].start() 3656 3657 // When the Raft node bootstraps, it produces a ConfChange 3658 // to add itself, which needs to be consumed with Ready(). 3659 // If there are pending configuration changes in raft, 3660 // it refused to campaign, no matter how many ticks supplied. 3661 // This is not a problem in production code because eventually 3662 // raft.Ready will be consumed as real time goes by. 3663 // 3664 // However, this is problematic when using fake clock and artificial 3665 // ticks. Instead of ticking raft indefinitely until raft.Ready is 3666 // consumed, this check is added to indirectly guarantee 3667 // that first ConfChange is actually consumed and we can safely 3668 // proceed to tick raft. 3669 Eventually(func() error { 3670 _, err := n.chains[id].storage.Entries(1, 1, 1) 3671 return err 3672 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 3673 Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred()) 3674 } 3675 } 3676 3677 func (n *network) stop(ids ...uint64) { 3678 nodes := ids 3679 if len(nodes) == 0 { 3680 for i := range n.chains { 3681 nodes = append(nodes, i) 3682 } 3683 } 3684 3685 for _, id := range nodes { 3686 c := n.chains[id] 3687 c.Halt() 3688 Eventually(c.Errored).Should(BeClosed()) 3689 select { 3690 case <-c.stopped: 3691 default: 3692 close(c.stopped) 3693 } 3694 } 3695 } 3696 3697 func (n *network) exec(f func(c *chain), ids ...uint64) { 3698 if len(ids) == 0 { 3699 for _, c := range n.chains { 3700 f(c) 3701 } 3702 3703 return 3704 } 3705 3706 for _, i := range ids { 3707 f(n.chains[i]) 3708 } 3709 } 3710 3711 // connect a node to network and tick leader to trigger 3712 // a heartbeat so newly joined node can detect leader. 3713 // 3714 // expectLeaderChange controls whether leader change should 3715 // be observed on newly joined node. 3716 // - it should be true if newly joined node was leader 3717 // - it should be false if newly joined node was follower, and 3718 // already knows the leader. 3719 func (n *network) join(id uint64, expectLeaderChange bool) { 3720 n.connect(id) 3721 3722 n.RLock() 3723 leader, follower := n.chains[n.leader], n.chains[id] 3724 n.RUnlock() 3725 3726 step := leader.getStepFunc() 3727 signal := make(chan struct{}) 3728 leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 3729 if dest == id { 3730 // close signal channel when a message targeting newly 3731 // joined node is observed on wire. 3732 select { 3733 case <-signal: 3734 default: 3735 close(signal) 3736 } 3737 } 3738 3739 return step(dest, msg) 3740 }) 3741 3742 // Tick leader so it sends out a heartbeat to new node. 3743 // One tick _may_ not be enough because leader might be busy 3744 // and this tick is droppped on the floor. 3745 Eventually(func() <-chan struct{} { 3746 leader.clock.Increment(interval) 3747 return signal 3748 }, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed()) 3749 3750 leader.setStepFunc(step) 3751 3752 if expectLeaderChange { 3753 Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower}))) 3754 } 3755 3756 // wait for newly joined node to catch up with leader 3757 i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex() 3758 Expect(err).NotTo(HaveOccurred()) 3759 Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i)) 3760 } 3761 3762 // elect deterministically elects a node as leader 3763 func (n *network) elect(id uint64) { 3764 n.RLock() 3765 // skip observing leader change on followers if the same leader is elected as the previous one, 3766 // because this may happen too quickly from a slow follower's point of view, and 0 -> X transition 3767 // may not be omitted at all. 3768 observeFollowers := id != n.leader 3769 candidate := n.chains[id] 3770 var followers []*chain 3771 for _, c := range n.chains { 3772 if c.id != id { 3773 followers = append(followers, c) 3774 } 3775 } 3776 n.RUnlock() 3777 3778 // Send node an artificial MsgTimeoutNow to emulate leadership transfer. 3779 fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id) 3780 candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: id})}, 0) 3781 Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader))) 3782 3783 n.Lock() 3784 n.leader = id 3785 n.Unlock() 3786 3787 if !observeFollowers { 3788 return 3789 } 3790 3791 // now observe leader change on other nodes 3792 for _, c := range followers { 3793 if c.id == id { 3794 continue 3795 } 3796 3797 select { 3798 case <-c.stopped: // skip check if node n is stopped 3799 case <-c.unstarted: // skip check if node is not started yet 3800 default: 3801 if n.linked(c.id, id) && n.connected(c.id) { 3802 Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower))) 3803 } 3804 } 3805 } 3806 3807 } 3808 3809 // sets the configEnv var declared above 3810 func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope { 3811 return &common.Envelope{ 3812 Payload: marshalOrPanic(&common.Payload{ 3813 Header: &common.Header{ 3814 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3815 Type: int32(headerType), 3816 ChannelId: chainID, 3817 }), 3818 }, 3819 Data: marshalOrPanic(&common.ConfigEnvelope{ 3820 LastUpdate: &common.Envelope{ 3821 Payload: marshalOrPanic(&common.Payload{ 3822 Header: &common.Header{ 3823 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3824 Type: int32(common.HeaderType_CONFIG_UPDATE), 3825 ChannelId: chainID, 3826 }), 3827 }, 3828 Data: marshalOrPanic(configUpdateEnv), 3829 }), // common.Payload 3830 }, // LastUpdate 3831 }), 3832 }), 3833 } 3834 } 3835 3836 func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope { 3837 return &common.ConfigUpdateEnvelope{ 3838 ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{ 3839 ChannelId: chainID, 3840 ReadSet: &common.ConfigGroup{ 3841 Groups: map[string]*common.ConfigGroup{ 3842 "Orderer": { 3843 Values: oldValues, 3844 }, 3845 }, 3846 }, 3847 WriteSet: &common.ConfigGroup{ 3848 Groups: map[string]*common.ConfigGroup{ 3849 "Orderer": { 3850 Values: newValues, 3851 }, 3852 }, 3853 }, // WriteSet 3854 }), 3855 } 3856 } 3857 3858 func getSeedBlock() *common.Block { 3859 return &common.Block{ 3860 Header: &common.BlockHeader{}, 3861 Data: &common.BlockData{Data: [][]byte{[]byte("foo")}}, 3862 Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)}, 3863 } 3864 } 3865 3866 func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher { 3867 return Equal(raft.SoftState{Lead: lead, RaftState: state}) 3868 } 3869 3870 func BeFollower() types.GomegaMatcher { 3871 return &StateMatcher{expect: raft.StateFollower} 3872 } 3873 3874 type StateMatcher struct { 3875 expect raft.StateType 3876 } 3877 3878 func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) { 3879 state, ok := actual.(raft.SoftState) 3880 if !ok { 3881 return false, errors.Errorf("StateMatcher expects a raft SoftState") 3882 } 3883 3884 return state.RaftState == stmatcher.expect, nil 3885 } 3886 3887 func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) { 3888 state, ok := actual.(raft.SoftState) 3889 if !ok { 3890 return "StateMatcher expects a raft SoftState" 3891 } 3892 3893 return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect) 3894 } 3895 3896 func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) { 3897 state, ok := actual.(raft.SoftState) 3898 if !ok { 3899 return "StateMatcher expects a raft SoftState" 3900 } 3901 3902 return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect) 3903 } 3904 3905 func noOpBlockPuller() (etcdraft.BlockPuller, error) { 3906 bp := &mocks.FakeBlockPuller{} 3907 return bp, nil 3908 }