github.com/defanghe/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/chain_test.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft_test 8 9 import ( 10 "encoding/pem" 11 "fmt" 12 "io/ioutil" 13 "os" 14 "os/user" 15 "path" 16 "sync" 17 "time" 18 19 "code.cloudfoundry.org/clock/fakeclock" 20 "github.com/golang/protobuf/proto" 21 "github.com/hyperledger/fabric-protos-go/common" 22 "github.com/hyperledger/fabric-protos-go/orderer" 23 raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 24 "github.com/hyperledger/fabric/bccsp" 25 "github.com/hyperledger/fabric/bccsp/factory" 26 "github.com/hyperledger/fabric/bccsp/sw" 27 "github.com/hyperledger/fabric/common/channelconfig" 28 "github.com/hyperledger/fabric/common/crypto/tlsgen" 29 "github.com/hyperledger/fabric/common/flogging" 30 "github.com/hyperledger/fabric/orderer/common/cluster" 31 "github.com/hyperledger/fabric/orderer/consensus/etcdraft" 32 "github.com/hyperledger/fabric/orderer/consensus/etcdraft/mocks" 33 consensusmocks "github.com/hyperledger/fabric/orderer/consensus/mocks" 34 mockblockcutter "github.com/hyperledger/fabric/orderer/mocks/common/blockcutter" 35 "github.com/hyperledger/fabric/protoutil" 36 . "github.com/onsi/ginkgo" 37 . "github.com/onsi/gomega" 38 "github.com/onsi/gomega/types" 39 "github.com/pkg/errors" 40 "go.etcd.io/etcd/raft" 41 "go.etcd.io/etcd/raft/raftpb" 42 "go.uber.org/zap" 43 ) 44 45 const ( 46 interval = 100 * time.Millisecond 47 LongEventualTimeout = 10 * time.Second 48 49 // 10 is the default setting of ELECTION_TICK. 50 // We used to have a small number here (2) to reduce the time for test - we don't 51 // need to tick node 10 times to trigger election - however, we are using another 52 // mechanism to trigger it now which does not depend on time: send an artificial 53 // MsgTimeoutNow to node. 54 ELECTION_TICK = 10 55 HEARTBEAT_TICK = 1 56 ) 57 58 func init() { 59 factory.InitFactories(nil) 60 } 61 62 func mockOrderer(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig { 63 mockOrderer := &mocks.OrdererConfig{} 64 mockOrderer.BatchTimeoutReturns(batchTimeout) 65 mockOrderer.ConsensusMetadataReturns(metadata) 66 return mockOrderer 67 } 68 69 func mockOrdererWithTLSRootCert(batchTimeout time.Duration, metadata []byte, tlsCA tlsgen.CA) *mocks.OrdererConfig { 70 mockOrderer := mockOrderer(batchTimeout, metadata) 71 mockOrg := &mocks.OrdererOrg{} 72 mockMSP := &mocks.MSP{} 73 mockMSP.GetTLSRootCertsReturns([][]byte{tlsCA.CertBytes()}) 74 mockOrg.MSPReturns(mockMSP) 75 mockOrderer.OrganizationsReturns(map[string]channelconfig.OrdererOrg{ 76 "fake-org": mockOrg, 77 }) 78 return mockOrderer 79 } 80 81 // for some test cases we chmod file/dir to test failures caused by exotic permissions. 82 // however this does not work if tests are running as root, i.e. in a container. 83 func skipIfRoot() { 84 u, err := user.Current() 85 Expect(err).NotTo(HaveOccurred()) 86 if u.Uid == "0" { 87 Skip("you are running test as root, there's no way to make files unreadable") 88 } 89 } 90 91 var _ = Describe("Chain", func() { 92 var ( 93 env *common.Envelope 94 channelID string 95 tlsCA tlsgen.CA 96 logger *flogging.FabricLogger 97 ) 98 99 BeforeEach(func() { 100 tlsCA, _ = tlsgen.NewCA() 101 channelID = "test-channel" 102 logger = flogging.NewFabricLogger(zap.NewExample()) 103 env = &common.Envelope{ 104 Payload: marshalOrPanic(&common.Payload{ 105 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 106 Data: []byte("TEST_MESSAGE"), 107 }), 108 } 109 }) 110 111 Describe("Single Raft node", func() { 112 var ( 113 configurator *mocks.FakeConfigurator 114 consenterMetadata *raftprotos.ConfigMetadata 115 consenters map[uint64]*raftprotos.Consenter 116 clock *fakeclock.FakeClock 117 opts etcdraft.Options 118 support *consensusmocks.FakeConsenterSupport 119 cutter *mockblockcutter.Receiver 120 storage *raft.MemoryStorage 121 observeC chan raft.SoftState 122 chain *etcdraft.Chain 123 dataDir string 124 walDir string 125 snapDir string 126 err error 127 fakeFields *fakeMetricsFields 128 cryptoProvider bccsp.BCCSP 129 ) 130 131 BeforeEach(func() { 132 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 133 Expect(err).NotTo(HaveOccurred()) 134 135 configurator = &mocks.FakeConfigurator{} 136 clock = fakeclock.NewFakeClock(time.Now()) 137 storage = raft.NewMemoryStorage() 138 139 dataDir, err = ioutil.TempDir("", "wal-") 140 Expect(err).NotTo(HaveOccurred()) 141 walDir = path.Join(dataDir, "wal") 142 snapDir = path.Join(dataDir, "snapshot") 143 144 observeC = make(chan raft.SoftState, 1) 145 146 support = &consensusmocks.FakeConsenterSupport{} 147 support.ChannelIDReturns(channelID) 148 consenterMetadata = createMetadata(1, tlsCA) 149 support.SharedConfigReturns(mockOrdererWithTLSRootCert(time.Hour, marshalOrPanic(consenterMetadata), tlsCA)) 150 151 cutter = mockblockcutter.NewReceiver() 152 support.BlockCutterReturns(cutter) 153 154 // for block creator initialization 155 support.HeightReturns(1) 156 support.BlockReturns(getSeedBlock()) 157 158 meta := &raftprotos.BlockMetadata{ 159 ConsenterIds: make([]uint64, len(consenterMetadata.Consenters)), 160 NextConsenterId: 1, 161 } 162 163 for i := range meta.ConsenterIds { 164 meta.ConsenterIds[i] = meta.NextConsenterId 165 meta.NextConsenterId++ 166 } 167 168 consenters = map[uint64]*raftprotos.Consenter{} 169 for i, c := range consenterMetadata.Consenters { 170 consenters[meta.ConsenterIds[i]] = c 171 } 172 173 fakeFields = newFakeMetricsFields() 174 175 opts = etcdraft.Options{ 176 RaftID: 1, 177 Clock: clock, 178 TickInterval: interval, 179 ElectionTick: ELECTION_TICK, 180 HeartbeatTick: HEARTBEAT_TICK, 181 MaxSizePerMsg: 1024 * 1024, 182 MaxInflightBlocks: 256, 183 BlockMetadata: meta, 184 Consenters: consenters, 185 Logger: logger, 186 MemoryStorage: storage, 187 WALDir: walDir, 188 SnapDir: snapDir, 189 Metrics: newFakeMetrics(fakeFields), 190 } 191 }) 192 193 campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) { 194 Eventually(func() <-chan raft.SoftState { 195 c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow})}, 0) 196 return observeC 197 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 198 } 199 200 JustBeforeEach(func() { 201 chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 202 Expect(err).NotTo(HaveOccurred()) 203 204 chain.Start() 205 206 // When the Raft node bootstraps, it produces a ConfChange 207 // to add itself, which needs to be consumed with Ready(). 208 // If there are pending configuration changes in raft, 209 // it refuses to campaign, no matter how many ticks elapse. 210 // This is not a problem in the production code because raft.Ready 211 // will be consumed eventually, as the wall clock advances. 212 // 213 // However, this is problematic when using the fake clock and 214 // artificial ticks. Instead of ticking raft indefinitely until 215 // raft.Ready is consumed, this check is added to indirectly guarantee 216 // that the first ConfChange is actually consumed and we can safely 217 // proceed to tick the Raft FSM. 218 Eventually(func() error { 219 _, err := storage.Entries(1, 1, 1) 220 return err 221 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 222 }) 223 224 AfterEach(func() { 225 chain.Halt() 226 Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed()) 227 // Make sure no timer leak 228 Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 229 os.RemoveAll(dataDir) 230 }) 231 232 Context("when a node starts up", func() { 233 It("properly configures the communication layer", func() { 234 expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata) 235 Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1)) 236 _, arg2 := configurator.ConfigureArgsForCall(0) 237 Expect(arg2).To(Equal(expectedNodeConfig)) 238 }) 239 240 It("correctly sets the metrics labels and publishes requisite metrics", func() { 241 type withImplementers interface { 242 WithCallCount() int 243 WithArgsForCall(int) []string 244 } 245 metricsList := []withImplementers{ 246 fakeFields.fakeClusterSize, 247 fakeFields.fakeIsLeader, 248 fakeFields.fakeActiveNodes, 249 fakeFields.fakeCommittedBlockNumber, 250 fakeFields.fakeSnapshotBlockNumber, 251 fakeFields.fakeLeaderChanges, 252 fakeFields.fakeProposalFailures, 253 fakeFields.fakeDataPersistDuration, 254 fakeFields.fakeNormalProposalsReceived, 255 fakeFields.fakeConfigProposalsReceived, 256 } 257 for _, m := range metricsList { 258 Expect(m.WithCallCount()).To(Equal(1)) 259 Expect(func() string { 260 return m.WithArgsForCall(0)[1] 261 }()).To(Equal(channelID)) 262 } 263 264 Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1)) 265 Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1))) 266 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1)) 267 Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0))) 268 Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1)) 269 Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0))) 270 }) 271 }) 272 273 Context("when no Raft leader is elected", func() { 274 It("fails to order envelope", func() { 275 err := chain.Order(env, 0) 276 Expect(err).To(MatchError("no Raft leader")) 277 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 278 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 279 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0)) 280 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 281 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 282 }) 283 284 It("starts proactive campaign", func() { 285 // assert that even tick supplied are less than ELECTION_TIMEOUT, 286 // a leader can still be successfully elected. 287 for i := 0; i < ELECTION_TICK; i++ { 288 clock.Increment(interval) 289 time.Sleep(10 * time.Millisecond) 290 } 291 Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 292 }) 293 }) 294 295 Context("when Raft leader is elected", func() { 296 JustBeforeEach(func() { 297 campaign(chain, observeC) 298 }) 299 300 It("updates metrics upon leader election", func() { 301 Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2)) 302 Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1))) 303 Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1)) 304 Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1))) 305 }) 306 307 It("fails to order envelope if chain is halted", func() { 308 chain.Halt() 309 err := chain.Order(env, 0) 310 Expect(err).To(MatchError("chain is stopped")) 311 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 312 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 313 Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1)) 314 Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1))) 315 }) 316 317 It("produces blocks following batch rules", func() { 318 close(cutter.Block) 319 320 By("cutting next batch directly") 321 cutter.CutNext = true 322 err := chain.Order(env, 0) 323 Expect(err).NotTo(HaveOccurred()) 324 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 325 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 326 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 327 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 328 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 329 330 // There are three calls to DataPersistDuration by now corresponding to the following three 331 // arriving on the Ready channel: 332 // 1. an EntryConfChange to let this node join the Raft cluster 333 // 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader 334 // 3. a block being committed 335 // The duration being emitted is zero since we don't tick the fake clock during this time 336 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3)) 337 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0))) 338 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0))) 339 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0))) 340 341 By("respecting batch timeout") 342 cutter.CutNext = false 343 timeout := time.Second 344 support.SharedConfigReturns(mockOrderer(timeout, nil)) 345 err = chain.Order(env, 0) 346 Expect(err).NotTo(HaveOccurred()) 347 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 348 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 349 350 clock.WaitForNWatchersAndIncrement(timeout, 2) 351 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 352 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 353 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 354 Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4)) 355 Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0))) 356 }) 357 358 It("does not reset timer for every envelope", func() { 359 close(cutter.Block) 360 361 timeout := time.Second 362 support.SharedConfigReturns(mockOrderer(timeout, nil)) 363 364 err := chain.Order(env, 0) 365 Expect(err).NotTo(HaveOccurred()) 366 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 367 368 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 369 370 err = chain.Order(env, 0) 371 Expect(err).NotTo(HaveOccurred()) 372 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2)) 373 374 // the second envelope should not reset the timer; it should 375 // therefore expire if we increment it by just timeout/2 376 clock.Increment(timeout / 2) 377 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 378 }) 379 380 It("does not write a block if halted before timeout", func() { 381 close(cutter.Block) 382 timeout := time.Second 383 support.SharedConfigReturns(mockOrderer(timeout, nil)) 384 385 err := chain.Order(env, 0) 386 Expect(err).NotTo(HaveOccurred()) 387 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 388 389 // wait for timer to start 390 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 391 392 chain.Halt() 393 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 394 }) 395 396 It("stops the timer if a batch is cut", func() { 397 close(cutter.Block) 398 399 timeout := time.Second 400 support.SharedConfigReturns(mockOrderer(timeout, nil)) 401 402 err := chain.Order(env, 0) 403 Expect(err).NotTo(HaveOccurred()) 404 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 405 406 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 407 408 By("force a batch to be cut before timer expires") 409 cutter.CutNext = true 410 err = chain.Order(env, 0) 411 Expect(err).NotTo(HaveOccurred()) 412 413 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 414 b, _ := support.WriteBlockArgsForCall(0) 415 Expect(b.Data.Data).To(HaveLen(2)) 416 Expect(cutter.CurBatch()).To(HaveLen(0)) 417 418 // this should start a fresh timer 419 cutter.CutNext = false 420 err = chain.Order(env, 0) 421 Expect(err).NotTo(HaveOccurred()) 422 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 423 424 clock.WaitForNWatchersAndIncrement(timeout/2, 2) 425 Consistently(support.WriteBlockCallCount).Should(Equal(1)) 426 427 clock.Increment(timeout / 2) 428 429 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 430 b, _ = support.WriteBlockArgsForCall(1) 431 Expect(b.Data.Data).To(HaveLen(1)) 432 }) 433 434 It("cut two batches if incoming envelope does not fit into first batch", func() { 435 close(cutter.Block) 436 437 timeout := time.Second 438 support.SharedConfigReturns(mockOrderer(timeout, nil)) 439 440 err := chain.Order(env, 0) 441 Expect(err).NotTo(HaveOccurred()) 442 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 443 444 cutter.IsolatedTx = true 445 err = chain.Order(env, 0) 446 Expect(err).NotTo(HaveOccurred()) 447 448 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 449 }) 450 451 Context("revalidation", func() { 452 BeforeEach(func() { 453 close(cutter.Block) 454 455 timeout := time.Hour 456 support.SharedConfigReturns(mockOrderer(timeout, nil)) 457 support.SequenceReturns(1) 458 }) 459 460 It("enqueue if envelope is still valid", func() { 461 support.ProcessNormalMsgReturns(1, nil) 462 463 err := chain.Order(env, 0) 464 Expect(err).NotTo(HaveOccurred()) 465 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 466 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 467 }) 468 469 It("does not enqueue if envelope is not valid", func() { 470 support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid")) 471 472 err := chain.Order(env, 0) 473 Expect(err).NotTo(HaveOccurred()) 474 Consistently(cutter.CurBatch).Should(HaveLen(0)) 475 Consistently(clock.WatcherCount).Should(Equal(1)) 476 }) 477 }) 478 479 It("unblocks Errored if chain is halted", func() { 480 errorC := chain.Errored() 481 Expect(errorC).NotTo(BeClosed()) 482 chain.Halt() 483 Eventually(errorC, LongEventualTimeout).Should(BeClosed()) 484 }) 485 486 Describe("Config updates", func() { 487 var ( 488 configEnv *common.Envelope 489 configSeq uint64 490 ) 491 492 Context("when a type A config update comes", func() { 493 Context("for existing channel", func() { 494 // use to prepare the Orderer Values 495 BeforeEach(func() { 496 newValues := map[string]*common.ConfigValue{ 497 "BatchTimeout": { 498 Version: 1, 499 Value: marshalOrPanic(&orderer.BatchTimeout{ 500 Timeout: "3ms", 501 }), 502 }, 503 "ConsensusType": { 504 Version: 4, 505 }, 506 } 507 oldValues := map[string]*common.ConfigValue{ 508 "ConsensusType": { 509 Version: 4, 510 }, 511 } 512 configEnv = newConfigEnv(channelID, 513 common.HeaderType_CONFIG, 514 newConfigUpdateEnv(channelID, oldValues, newValues), 515 ) 516 configSeq = 0 517 }) // BeforeEach block 518 519 Context("without revalidation (i.e. correct config sequence)", func() { 520 Context("without pending normal envelope", func() { 521 It("should create a config block and no normal block", func() { 522 err := chain.Configure(configEnv, configSeq) 523 Expect(err).NotTo(HaveOccurred()) 524 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 525 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 526 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 527 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 528 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call 529 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1))) 530 }) 531 }) 532 533 Context("with pending normal envelope", func() { 534 It("should create a normal block and a config block", func() { 535 // We do not need to block the cutter from ordering in our test case and therefore close this channel. 536 close(cutter.Block) 537 538 By("adding a normal envelope") 539 err := chain.Order(env, 0) 540 Expect(err).NotTo(HaveOccurred()) 541 Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 542 Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 543 Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 544 545 By("adding a config envelope") 546 err = chain.Configure(configEnv, configSeq) 547 Expect(err).NotTo(HaveOccurred()) 548 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 549 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 550 551 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 552 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 553 Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call 554 Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2))) 555 }) 556 }) 557 }) 558 559 Context("with revalidation (i.e. incorrect config sequence)", func() { 560 BeforeEach(func() { 561 close(cutter.Block) 562 support.SequenceReturns(1) // this causes the revalidation 563 }) 564 565 It("should create config block upon correct revalidation", func() { 566 support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation 567 568 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 569 Consistently(clock.WatcherCount).Should(Equal(1)) 570 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 571 }) 572 573 It("should not create config block upon incorrect revalidation", func() { 574 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 575 576 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 577 Consistently(clock.WatcherCount).Should(Equal(1)) 578 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock 579 }) 580 581 It("should not disturb current running timer upon incorrect revalidation", func() { 582 support.ProcessNormalMsgReturns(1, nil) 583 support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence")) 584 585 Expect(chain.Order(env, configSeq)).To(Succeed()) 586 Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2)) 587 588 clock.Increment(30 * time.Minute) 589 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 590 591 Expect(chain.Configure(configEnv, configSeq)).To(Succeed()) 592 Consistently(clock.WatcherCount).Should(Equal(2)) 593 594 Consistently(support.WriteBlockCallCount).Should(Equal(0)) 595 Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) 596 597 clock.Increment(30 * time.Minute) 598 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 599 }) 600 }) 601 }) 602 603 Context("for creating a new channel", func() { 604 // use to prepare the Orderer Values 605 BeforeEach(func() { 606 chainID := "mychannel" 607 values := make(map[string]*common.ConfigValue) 608 configEnv = newConfigEnv(chainID, 609 common.HeaderType_CONFIG, 610 newConfigUpdateEnv(chainID, nil, values), 611 ) 612 configSeq = 0 613 }) // BeforeEach block 614 615 It("should be able to create a channel", func() { 616 err := chain.Configure(configEnv, configSeq) 617 Expect(err).NotTo(HaveOccurred()) 618 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 619 }) 620 }) 621 }) // Context block for type A config 622 623 Context("when a type B config update comes", func() { 624 Context("updating protocol values", func() { 625 // use to prepare the Orderer Values 626 BeforeEach(func() { 627 values := map[string]*common.ConfigValue{ 628 "ConsensusType": { 629 Version: 1, 630 Value: marshalOrPanic(&orderer.ConsensusType{ 631 Metadata: marshalOrPanic(consenterMetadata), 632 }), 633 }, 634 } 635 configEnv = newConfigEnv(channelID, 636 common.HeaderType_CONFIG, 637 newConfigUpdateEnv(channelID, nil, values)) 638 configSeq = 0 639 640 }) // BeforeEach block 641 642 It("should be able to process config update of type B", func() { 643 err := chain.Configure(configEnv, configSeq) 644 Expect(err).NotTo(HaveOccurred()) 645 Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 646 Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 647 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 648 }) 649 }) 650 651 Context("updating consenters set by exactly one node", func() { 652 It("should be able to process config update adding single node", func() { 653 metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata) 654 metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{ 655 Host: "localhost", 656 Port: 7050, 657 ServerTlsCert: serverTLSCert(tlsCA), 658 ClientTlsCert: clientTLSCert(tlsCA), 659 }) 660 661 values := map[string]*common.ConfigValue{ 662 "ConsensusType": { 663 Version: 1, 664 Value: marshalOrPanic(&orderer.ConsensusType{ 665 Metadata: marshalOrPanic(metadata), 666 }), 667 }, 668 } 669 configEnv = newConfigEnv(channelID, 670 common.HeaderType_CONFIG, 671 newConfigUpdateEnv(channelID, nil, values)) 672 configSeq = 0 673 674 err := chain.Configure(configEnv, configSeq) 675 Expect(err).NotTo(HaveOccurred()) 676 Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 677 }) 678 679 }) 680 }) 681 }) 682 683 Describe("Crash Fault Tolerance", func() { 684 var ( 685 raftMetadata *raftprotos.BlockMetadata 686 ) 687 688 BeforeEach(func() { 689 raftMetadata = &raftprotos.BlockMetadata{ 690 ConsenterIds: []uint64{1}, 691 NextConsenterId: 2, 692 } 693 }) 694 695 Describe("when a chain is started with existing WAL", func() { 696 var ( 697 m1 *raftprotos.BlockMetadata 698 m2 *raftprotos.BlockMetadata 699 ) 700 JustBeforeEach(func() { 701 // to generate WAL data, we start a chain, 702 // order several envelopes and then halt the chain. 703 close(cutter.Block) 704 cutter.CutNext = true 705 706 // enque some data to be persisted on disk by raft 707 err := chain.Order(env, uint64(0)) 708 Expect(err).NotTo(HaveOccurred()) 709 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 710 711 _, metadata := support.WriteBlockArgsForCall(0) 712 m1 = &raftprotos.BlockMetadata{} 713 proto.Unmarshal(metadata, m1) 714 715 err = chain.Order(env, uint64(0)) 716 Expect(err).NotTo(HaveOccurred()) 717 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 718 719 _, metadata = support.WriteBlockArgsForCall(1) 720 m2 = &raftprotos.BlockMetadata{} 721 proto.Unmarshal(metadata, m2) 722 723 chain.Halt() 724 }) 725 726 It("replays blocks from committed entries", func() { 727 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 728 c.init() 729 c.Start() 730 defer c.Halt() 731 732 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 733 734 _, metadata := c.support.WriteBlockArgsForCall(0) 735 m := &raftprotos.BlockMetadata{} 736 proto.Unmarshal(metadata, m) 737 Expect(m.RaftIndex).To(Equal(m1.RaftIndex)) 738 739 _, metadata = c.support.WriteBlockArgsForCall(1) 740 m = &raftprotos.BlockMetadata{} 741 proto.Unmarshal(metadata, m) 742 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 743 744 // chain should keep functioning 745 campaign(c.Chain, c.observe) 746 747 c.cutter.CutNext = true 748 749 err := c.Order(env, uint64(0)) 750 Expect(err).NotTo(HaveOccurred()) 751 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 752 753 }) 754 755 It("only replays blocks after Applied index", func() { 756 raftMetadata.RaftIndex = m1.RaftIndex 757 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 758 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 759 760 c.init() 761 c.Start() 762 defer c.Halt() 763 764 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 765 766 _, metadata := c.support.WriteBlockArgsForCall(1) 767 m := &raftprotos.BlockMetadata{} 768 proto.Unmarshal(metadata, m) 769 Expect(m.RaftIndex).To(Equal(m2.RaftIndex)) 770 771 // chain should keep functioning 772 campaign(c.Chain, c.observe) 773 774 c.cutter.CutNext = true 775 776 err := c.Order(env, uint64(0)) 777 Expect(err).NotTo(HaveOccurred()) 778 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 779 }) 780 781 It("does not replay any block if already in sync", func() { 782 raftMetadata.RaftIndex = m2.RaftIndex 783 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 784 c.init() 785 c.Start() 786 defer c.Halt() 787 788 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 789 790 // chain should keep functioning 791 campaign(c.Chain, c.observe) 792 793 c.cutter.CutNext = true 794 795 err := c.Order(env, uint64(0)) 796 Expect(err).NotTo(HaveOccurred()) 797 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 798 }) 799 800 Context("WAL file is not readable", func() { 801 It("fails to load wal", func() { 802 skipIfRoot() 803 804 files, err := ioutil.ReadDir(walDir) 805 Expect(err).NotTo(HaveOccurred()) 806 for _, f := range files { 807 os.Chmod(path.Join(walDir, f.Name()), 0300) 808 } 809 810 c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC) 811 Expect(c).To(BeNil()) 812 Expect(err).To(MatchError(ContainSubstring("permission denied"))) 813 }) 814 }) 815 }) 816 817 Describe("when snapshotting is enabled (snapshot interval is not zero)", func() { 818 var ( 819 ledgerLock sync.Mutex 820 ledger map[uint64]*common.Block 821 ) 822 823 countFiles := func() int { 824 files, err := ioutil.ReadDir(snapDir) 825 Expect(err).NotTo(HaveOccurred()) 826 return len(files) 827 } 828 829 BeforeEach(func() { 830 opts.SnapshotCatchUpEntries = 2 831 832 close(cutter.Block) 833 cutter.CutNext = true 834 835 ledgerLock.Lock() 836 ledger = map[uint64]*common.Block{ 837 0: getSeedBlock(), // genesis block 838 } 839 ledgerLock.Unlock() 840 841 support.WriteBlockStub = func(block *common.Block, meta []byte) { 842 b := proto.Clone(block).(*common.Block) 843 844 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 845 Expect(err).NotTo(HaveOccurred()) 846 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 847 848 ledgerLock.Lock() 849 defer ledgerLock.Unlock() 850 ledger[b.Header.Number] = b 851 } 852 853 support.HeightStub = func() uint64 { 854 ledgerLock.Lock() 855 defer ledgerLock.Unlock() 856 return uint64(len(ledger)) 857 } 858 }) 859 860 Context("Small SnapshotInterval", func() { 861 BeforeEach(func() { 862 opts.SnapshotIntervalSize = 1 863 }) 864 865 It("writes snapshot file to snapDir", func() { 866 // Scenario: start a chain with SnapInterval = 1 byte, expect it to take 867 // one snapshot for each block 868 869 i, _ := opts.MemoryStorage.FirstIndex() 870 871 Expect(chain.Order(env, uint64(0))).To(Succeed()) 872 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 873 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 874 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 875 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call 876 s, _ := opts.MemoryStorage.Snapshot() 877 b := protoutil.UnmarshalBlockOrPanic(s.Data) 878 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number))) 879 880 i, _ = opts.MemoryStorage.FirstIndex() 881 882 Expect(chain.Order(env, uint64(0))).To(Succeed()) 883 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 884 885 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 886 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 887 Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call 888 s, _ = opts.MemoryStorage.Snapshot() 889 b = protoutil.UnmarshalBlockOrPanic(s.Data) 890 Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number))) 891 }) 892 893 It("pauses chain if sync is in progress", func() { 894 // Scenario: 895 // after a snapshot is taken, reboot chain with raftIndex = 0 896 // chain should attempt to sync upon reboot, and blocks on 897 // `WaitReady` API 898 899 i, _ := opts.MemoryStorage.FirstIndex() 900 901 Expect(chain.Order(env, uint64(0))).To(Succeed()) 902 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 903 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 904 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 905 906 i, _ = opts.MemoryStorage.FirstIndex() 907 908 Expect(chain.Order(env, uint64(0))).To(Succeed()) 909 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 910 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 911 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 912 913 chain.Halt() 914 915 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 916 c.init() 917 918 signal := make(chan struct{}) 919 920 c.puller.PullBlockStub = func(i uint64) *common.Block { 921 <-signal // blocking for assertions 922 ledgerLock.Lock() 923 defer ledgerLock.Unlock() 924 if i >= uint64(len(ledger)) { 925 return nil 926 } 927 928 return ledger[i] 929 } 930 931 err := c.WaitReady() 932 Expect(err).To(MatchError("chain is not started")) 933 934 c.Start() 935 defer c.Halt() 936 937 // pull block is called, so chain should be catching up now, WaitReady should block 938 signal <- struct{}{} 939 940 done := make(chan error) 941 go func() { 942 done <- c.WaitReady() 943 }() 944 945 Consistently(done).ShouldNot(Receive()) 946 close(signal) // unblock block puller 947 Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked 948 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 949 }) 950 951 It("restores snapshot w/o extra entries", func() { 952 // Scenario: 953 // after a snapshot is taken, no more entries are appended. 954 // then node is restarted, it loads snapshot, finds its term 955 // and index. While replaying WAL to memory storage, it should 956 // not append any entry because no extra entry was appended 957 // after snapshot was taken. 958 959 Expect(chain.Order(env, uint64(0))).To(Succeed()) 960 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 961 _, metadata := support.WriteBlockArgsForCall(0) 962 m := &raftprotos.BlockMetadata{} 963 proto.Unmarshal(metadata, m) 964 965 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 966 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 967 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 968 Expect(err).NotTo(HaveOccurred()) 969 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 970 Expect(err).NotTo(HaveOccurred()) 971 972 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 973 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 974 975 chain.Halt() 976 977 raftMetadata.RaftIndex = m.RaftIndex 978 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 979 c.opts.SnapshotIntervalSize = 1 980 981 c.init() 982 c.Start() 983 984 // following arithmetic reflects how etcdraft MemoryStorage is implemented 985 // when no entry is appended after snapshot being loaded. 986 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 987 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index)) 988 989 // chain keeps functioning 990 Eventually(func() <-chan raft.SoftState { 991 c.clock.Increment(interval) 992 return c.observe 993 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 994 995 c.cutter.CutNext = true 996 err = c.Order(env, uint64(0)) 997 Expect(err).NotTo(HaveOccurred()) 998 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 999 1000 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1001 c.Halt() 1002 1003 _, metadata = c.support.WriteBlockArgsForCall(0) 1004 m = &raftprotos.BlockMetadata{} 1005 proto.Unmarshal(metadata, m) 1006 raftMetadata.RaftIndex = m.RaftIndex 1007 cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1008 1009 cx.init() 1010 cx.Start() 1011 defer cx.Halt() 1012 1013 // chain keeps functioning 1014 Eventually(func() <-chan raft.SoftState { 1015 cx.clock.Increment(interval) 1016 return cx.observe 1017 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader))) 1018 }) 1019 }) 1020 1021 Context("Large SnapshotInterval", func() { 1022 BeforeEach(func() { 1023 opts.SnapshotIntervalSize = 1024 1024 }) 1025 1026 It("restores snapshot w/ extra entries", func() { 1027 // Scenario: 1028 // after a snapshot is taken, more entries are appended. 1029 // then node is restarted, it loads snapshot, finds its term 1030 // and index. While replaying WAL to memory storage, it should 1031 // append some entries. 1032 1033 largeEnv := &common.Envelope{ 1034 Payload: marshalOrPanic(&common.Payload{ 1035 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1036 Data: make([]byte, 500), 1037 }), 1038 } 1039 1040 By("Ordering two large envelopes to trigger snapshot") 1041 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1042 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1043 1044 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1045 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1046 1047 _, metadata := support.WriteBlockArgsForCall(1) 1048 m := &raftprotos.BlockMetadata{} 1049 proto.Unmarshal(metadata, m) 1050 1051 // check snapshot does exit 1052 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1053 Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1)) 1054 snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created 1055 Expect(err).NotTo(HaveOccurred()) 1056 i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory 1057 Expect(err).NotTo(HaveOccurred()) 1058 1059 // expect storage to preserve SnapshotCatchUpEntries entries before snapshot 1060 Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1)) 1061 1062 By("Ordering another envlope to append new data to memory after snaphost") 1063 Expect(chain.Order(env, uint64(0))).To(Succeed()) 1064 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1065 1066 lasti, _ := opts.MemoryStorage.LastIndex() 1067 1068 chain.Halt() 1069 1070 raftMetadata.RaftIndex = m.RaftIndex 1071 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1072 cnt := support.WriteBlockCallCount() 1073 for i := 0; i < cnt; i++ { 1074 c.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1075 } 1076 1077 By("Restarting the node") 1078 c.init() 1079 c.Start() 1080 defer c.Halt() 1081 1082 By("Checking latest index is larger than index in snapshot") 1083 Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1)) 1084 Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti)) 1085 }) 1086 1087 When("local ledger is in sync with snapshot", func() { 1088 It("does not pull blocks and still respects snapshot interval", func() { 1089 // Scenario: 1090 // - snapshot is taken at block 2 1091 // - order one more envelope (block 3) 1092 // - reboot chain at block 2 1093 // - block 3 should be replayed from wal 1094 // - order another envelope to trigger snapshot, containing block 3 & 4 1095 // Assertions: 1096 // - block puller should NOT be called 1097 // - chain should keep functioning after reboot 1098 // - chain should respect snapshot interval to trigger next snapshot 1099 1100 largeEnv := &common.Envelope{ 1101 Payload: marshalOrPanic(&common.Payload{ 1102 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1103 Data: make([]byte, 500), 1104 }), 1105 } 1106 1107 By("Ordering two large envelopes to trigger snapshot") 1108 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1109 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1110 1111 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1112 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1113 1114 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1115 1116 _, metadata := support.WriteBlockArgsForCall(1) 1117 m := &raftprotos.BlockMetadata{} 1118 proto.Unmarshal(metadata, m) 1119 1120 By("Cutting block [3]") 1121 // order another envelope. this should not trigger snapshot 1122 err = chain.Order(largeEnv, uint64(0)) 1123 Expect(err).NotTo(HaveOccurred()) 1124 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 1125 1126 chain.Halt() 1127 1128 raftMetadata.RaftIndex = m.RaftIndex 1129 c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1130 // replay block 1&2 1131 c.support.WriteBlock(support.WriteBlockArgsForCall(0)) 1132 c.support.WriteBlock(support.WriteBlockArgsForCall(1)) 1133 1134 c.opts.SnapshotIntervalSize = 1024 1135 1136 By("Restarting node at block [2]") 1137 c.init() 1138 c.Start() 1139 defer c.Halt() 1140 1141 // elect leader 1142 campaign(c.Chain, c.observe) 1143 1144 By("Ordering one more block to trigger snapshot") 1145 c.cutter.CutNext = true 1146 err = c.Order(largeEnv, uint64(0)) 1147 Expect(err).NotTo(HaveOccurred()) 1148 1149 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4)) 1150 Expect(c.puller.PullBlockCallCount()).Should(BeZero()) 1151 // old snapshot file is retained 1152 Eventually(countFiles, LongEventualTimeout).Should(Equal(2)) 1153 }) 1154 }) 1155 1156 It("respects snapshot interval after reboot", func() { 1157 largeEnv := &common.Envelope{ 1158 Payload: marshalOrPanic(&common.Payload{ 1159 Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})}, 1160 Data: make([]byte, 500), 1161 }), 1162 } 1163 1164 Expect(chain.Order(largeEnv, uint64(0))).To(Succeed()) 1165 Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1166 // check no snapshot is taken 1167 Consistently(countFiles).Should(Equal(0)) 1168 1169 _, metadata := support.WriteBlockArgsForCall(0) 1170 m := &raftprotos.BlockMetadata{} 1171 proto.Unmarshal(metadata, m) 1172 1173 chain.Halt() 1174 1175 raftMetadata.RaftIndex = m.RaftIndex 1176 c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil) 1177 cnt := support.WriteBlockCallCount() 1178 for i := 0; i < cnt; i++ { 1179 c1.support.WriteBlock(support.WriteBlockArgsForCall(i)) 1180 } 1181 c1.cutter.CutNext = true 1182 c1.opts.SnapshotIntervalSize = 1024 1183 1184 By("Restarting chain") 1185 c1.init() 1186 c1.Start() 1187 // chain keeps functioning 1188 campaign(c1.Chain, c1.observe) 1189 1190 Expect(c1.Order(largeEnv, uint64(0))).To(Succeed()) 1191 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1192 // check snapshot does exit 1193 Eventually(countFiles, LongEventualTimeout).Should(Equal(1)) 1194 }) 1195 }) 1196 }) 1197 }) 1198 1199 Context("Invalid WAL dir", func() { 1200 var support = &consensusmocks.FakeConsenterSupport{} 1201 BeforeEach(func() { 1202 // for block creator initialization 1203 support.HeightReturns(1) 1204 support.BlockReturns(getSeedBlock()) 1205 }) 1206 1207 When("WAL dir is a file", func() { 1208 It("replaces file with fresh WAL dir", func() { 1209 f, err := ioutil.TempFile("", "wal-") 1210 Expect(err).NotTo(HaveOccurred()) 1211 defer os.RemoveAll(f.Name()) 1212 1213 chain, err := etcdraft.NewChain( 1214 support, 1215 etcdraft.Options{ 1216 WALDir: f.Name(), 1217 SnapDir: snapDir, 1218 Logger: logger, 1219 MemoryStorage: storage, 1220 BlockMetadata: &raftprotos.BlockMetadata{}, 1221 Metrics: newFakeMetrics(newFakeMetricsFields()), 1222 }, 1223 configurator, 1224 nil, 1225 cryptoProvider, 1226 nil, 1227 nil, 1228 observeC) 1229 Expect(chain).NotTo(BeNil()) 1230 Expect(err).NotTo(HaveOccurred()) 1231 1232 info, err := os.Stat(f.Name()) 1233 Expect(err).NotTo(HaveOccurred()) 1234 Expect(info.IsDir()).To(BeTrue()) 1235 }) 1236 }) 1237 1238 When("WAL dir is not writeable", func() { 1239 It("replace it with fresh WAL dir", func() { 1240 d, err := ioutil.TempDir("", "wal-") 1241 Expect(err).NotTo(HaveOccurred()) 1242 defer os.RemoveAll(d) 1243 1244 err = os.Chmod(d, 0500) 1245 Expect(err).NotTo(HaveOccurred()) 1246 1247 chain, err := etcdraft.NewChain( 1248 support, 1249 etcdraft.Options{ 1250 WALDir: d, 1251 SnapDir: snapDir, 1252 Logger: logger, 1253 MemoryStorage: storage, 1254 BlockMetadata: &raftprotos.BlockMetadata{}, 1255 Metrics: newFakeMetrics(newFakeMetricsFields()), 1256 }, 1257 nil, 1258 nil, 1259 cryptoProvider, 1260 noOpBlockPuller, 1261 nil, 1262 nil) 1263 Expect(chain).NotTo(BeNil()) 1264 Expect(err).NotTo(HaveOccurred()) 1265 }) 1266 }) 1267 1268 When("WAL parent dir is not writeable", func() { 1269 It("fails to bootstrap fresh raft node", func() { 1270 skipIfRoot() 1271 1272 d, err := ioutil.TempDir("", "wal-") 1273 Expect(err).NotTo(HaveOccurred()) 1274 defer os.RemoveAll(d) 1275 1276 err = os.Chmod(d, 0500) 1277 Expect(err).NotTo(HaveOccurred()) 1278 1279 chain, err := etcdraft.NewChain( 1280 support, 1281 etcdraft.Options{ 1282 WALDir: path.Join(d, "wal-dir"), 1283 SnapDir: snapDir, 1284 Logger: logger, 1285 BlockMetadata: &raftprotos.BlockMetadata{}, 1286 }, 1287 nil, 1288 nil, 1289 cryptoProvider, 1290 noOpBlockPuller, 1291 nil, 1292 nil) 1293 Expect(chain).To(BeNil()) 1294 Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir"))) 1295 }) 1296 }) 1297 }) 1298 }) 1299 }) 1300 1301 Describe("2-node Raft cluster", func() { 1302 var ( 1303 network *network 1304 channelID string 1305 timeout time.Duration 1306 dataDir string 1307 c1, c2 *chain 1308 raftMetadata *raftprotos.BlockMetadata 1309 consenters map[uint64]*raftprotos.Consenter 1310 configEnv *common.Envelope 1311 cryptoProvider bccsp.BCCSP 1312 ) 1313 BeforeEach(func() { 1314 var err error 1315 1316 channelID = "multi-node-channel" 1317 timeout = 10 * time.Second 1318 1319 dataDir, err = ioutil.TempDir("", "raft-test-") 1320 Expect(err).NotTo(HaveOccurred()) 1321 1322 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1323 Expect(err).NotTo(HaveOccurred()) 1324 1325 raftMetadata = &raftprotos.BlockMetadata{ 1326 ConsenterIds: []uint64{1, 2}, 1327 NextConsenterId: 3, 1328 } 1329 1330 consenters = map[uint64]*raftprotos.Consenter{ 1331 1: { 1332 Host: "localhost", 1333 Port: 7051, 1334 ClientTlsCert: clientTLSCert(tlsCA), 1335 ServerTlsCert: serverTLSCert(tlsCA), 1336 }, 1337 2: { 1338 Host: "localhost", 1339 Port: 7051, 1340 ClientTlsCert: clientTLSCert(tlsCA), 1341 ServerTlsCert: serverTLSCert(tlsCA), 1342 }, 1343 } 1344 1345 metadata := &raftprotos.ConfigMetadata{ 1346 Options: &raftprotos.Options{ 1347 TickInterval: "500ms", 1348 ElectionTick: 10, 1349 HeartbeatTick: 1, 1350 MaxInflightBlocks: 5, 1351 SnapshotIntervalSize: 200, 1352 }, 1353 Consenters: []*raftprotos.Consenter{consenters[2]}, 1354 } 1355 value := map[string]*common.ConfigValue{ 1356 "ConsensusType": { 1357 Version: 1, 1358 Value: marshalOrPanic(&orderer.ConsensusType{ 1359 Metadata: marshalOrPanic(metadata), 1360 }), 1361 }, 1362 } 1363 // prepare config update to remove 1 1364 configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1365 1366 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA) 1367 c1, c2 = network.chains[1], network.chains[2] 1368 c1.cutter.CutNext = true 1369 network.init() 1370 network.start() 1371 }) 1372 1373 AfterEach(func() { 1374 network.stop() 1375 network.exec(func(c *chain) { 1376 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1377 }) 1378 1379 os.RemoveAll(dataDir) 1380 }) 1381 1382 It("can remove leader by reconfiguring cluster", func() { 1383 network.elect(1) 1384 1385 // trigger status dissemination 1386 Eventually(func() int { 1387 c1.clock.Increment(interval) 1388 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1389 }, LongEventualTimeout).Should(Equal(2)) 1390 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1391 1392 By("Configuring cluster to remove node") 1393 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1394 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1395 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 1396 1397 Eventually(func() <-chan raft.SoftState { 1398 c2.clock.Increment(interval) 1399 return c2.observe 1400 }, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader))) 1401 1402 By("Asserting leader can still serve requests as single-node cluster") 1403 c2.cutter.CutNext = true 1404 Expect(c2.Order(env, 0)).To(Succeed()) 1405 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1406 }) 1407 1408 It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() { 1409 network.elect(1) 1410 1411 step1 := c1.getStepFunc() 1412 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1413 stepMsg := &raftpb.Message{} 1414 if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil { 1415 return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err) 1416 } 1417 1418 if stepMsg.Type == raftpb.MsgTimeoutNow { 1419 return nil 1420 } 1421 1422 return step1(dest, msg) 1423 }) 1424 1425 By("Configuring cluster to remove node") 1426 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1427 Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1428 c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1429 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1430 1431 c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1432 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1433 close(c1.stopped) // mark c1 stopped in network 1434 1435 network.elect(2) 1436 1437 By("Asserting leader can still serve requests as single-node cluster") 1438 c2.cutter.CutNext = true 1439 Expect(c2.Order(env, 0)).To(Succeed()) 1440 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1441 }) 1442 1443 It("can remove follower by reconfiguring cluster", func() { 1444 network.elect(2) 1445 1446 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1447 network.exec(func(c *chain) { 1448 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1449 }) 1450 1451 Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1452 Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed()) 1453 1454 By("Asserting leader can still serve requests as single-node cluster") 1455 c2.cutter.CutNext = true 1456 Expect(c2.Order(env, 0)).To(Succeed()) 1457 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1458 }) 1459 }) 1460 1461 Describe("3-node Raft cluster", func() { 1462 var ( 1463 network *network 1464 channelID string 1465 timeout time.Duration 1466 dataDir string 1467 c1, c2, c3 *chain 1468 raftMetadata *raftprotos.BlockMetadata 1469 consenters map[uint64]*raftprotos.Consenter 1470 cryptoProvider bccsp.BCCSP 1471 ) 1472 1473 BeforeEach(func() { 1474 var err error 1475 1476 channelID = "multi-node-channel" 1477 timeout = 10 * time.Second 1478 1479 dataDir, err = ioutil.TempDir("", "raft-test-") 1480 Expect(err).NotTo(HaveOccurred()) 1481 1482 raftMetadata = &raftprotos.BlockMetadata{ 1483 ConsenterIds: []uint64{1, 2, 3}, 1484 NextConsenterId: 4, 1485 } 1486 1487 cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore()) 1488 Expect(err).NotTo(HaveOccurred()) 1489 1490 consenters = map[uint64]*raftprotos.Consenter{ 1491 1: { 1492 Host: "localhost", 1493 Port: 7051, 1494 ClientTlsCert: clientTLSCert(tlsCA), 1495 ServerTlsCert: serverTLSCert(tlsCA), 1496 }, 1497 2: { 1498 Host: "localhost", 1499 Port: 7051, 1500 ClientTlsCert: clientTLSCert(tlsCA), 1501 ServerTlsCert: serverTLSCert(tlsCA), 1502 }, 1503 3: { 1504 Host: "localhost", 1505 Port: 7051, 1506 ClientTlsCert: clientTLSCert(tlsCA), 1507 ServerTlsCert: serverTLSCert(tlsCA), 1508 }, 1509 } 1510 1511 network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA) 1512 c1 = network.chains[1] 1513 c2 = network.chains[2] 1514 c3 = network.chains[3] 1515 }) 1516 1517 AfterEach(func() { 1518 network.stop() 1519 network.exec(func(c *chain) { 1520 Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero()) 1521 }) 1522 1523 os.RemoveAll(dataDir) 1524 }) 1525 1526 When("2/3 nodes are running", func() { 1527 It("late node can catch up", func() { 1528 network.init() 1529 network.start(1, 2) 1530 network.elect(1) 1531 1532 // trigger status dissemination 1533 Eventually(func() int { 1534 c1.clock.Increment(interval) 1535 return c2.fakeFields.fakeActiveNodes.SetCallCount() 1536 }, LongEventualTimeout).Should(Equal(2)) 1537 Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2))) 1538 1539 c1.cutter.CutNext = true 1540 err := c1.Order(env, 0) 1541 Expect(err).NotTo(HaveOccurred()) 1542 1543 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1544 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1545 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1546 1547 network.start(3) 1548 1549 c1.clock.Increment(interval) 1550 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1551 1552 network.stop() 1553 }) 1554 1555 It("late node receives snapshot from leader", func() { 1556 c1.opts.SnapshotIntervalSize = 1 1557 c1.opts.SnapshotCatchUpEntries = 1 1558 1559 c1.cutter.CutNext = true 1560 1561 var blocksLock sync.Mutex 1562 blocks := make(map[uint64]*common.Block) // storing written blocks for block puller 1563 1564 c1.support.WriteBlockStub = func(b *common.Block, meta []byte) { 1565 blocksLock.Lock() 1566 defer blocksLock.Unlock() 1567 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 1568 Expect(err).NotTo(HaveOccurred()) 1569 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 1570 blocks[b.Header.Number] = b 1571 } 1572 1573 c3.puller.PullBlockStub = func(i uint64) *common.Block { 1574 blocksLock.Lock() 1575 defer blocksLock.Unlock() 1576 b, exist := blocks[i] 1577 if !exist { 1578 return nil 1579 } 1580 1581 return b 1582 } 1583 1584 network.init() 1585 network.start(1, 2) 1586 network.elect(1) 1587 1588 err := c1.Order(env, 0) 1589 Expect(err).NotTo(HaveOccurred()) 1590 1591 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1592 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 1593 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1594 1595 err = c1.Order(env, 0) 1596 Expect(err).NotTo(HaveOccurred()) 1597 1598 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1599 Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1600 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 1601 1602 network.start(3) 1603 1604 c1.clock.Increment(interval) 1605 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2)) 1606 1607 network.stop() 1608 }) 1609 }) 1610 1611 When("reconfiguring raft cluster", func() { 1612 const ( 1613 defaultTimeout = 5 * time.Second 1614 ) 1615 var ( 1616 options = &raftprotos.Options{ 1617 TickInterval: "500ms", 1618 ElectionTick: 10, 1619 HeartbeatTick: 1, 1620 MaxInflightBlocks: 5, 1621 SnapshotIntervalSize: 200, 1622 } 1623 updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue { 1624 return map[string]*common.ConfigValue{ 1625 "ConsensusType": { 1626 Version: 1, 1627 Value: marshalOrPanic(&orderer.ConsensusType{ 1628 Metadata: marshalOrPanic(metadata), 1629 }), 1630 }, 1631 } 1632 } 1633 addConsenterConfigValue = func() map[string]*common.ConfigValue { 1634 metadata := &raftprotos.ConfigMetadata{Options: options} 1635 for _, consenter := range consenters { 1636 metadata.Consenters = append(metadata.Consenters, consenter) 1637 } 1638 1639 newConsenter := &raftprotos.Consenter{ 1640 Host: "localhost", 1641 Port: 7050, 1642 ServerTlsCert: serverTLSCert(tlsCA), 1643 ClientTlsCert: clientTLSCert(tlsCA), 1644 } 1645 metadata.Consenters = append(metadata.Consenters, newConsenter) 1646 return updateRaftConfigValue(metadata) 1647 } 1648 removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue { 1649 metadata := &raftprotos.ConfigMetadata{Options: options} 1650 for nodeID, consenter := range consenters { 1651 if nodeID == id { 1652 continue 1653 } 1654 metadata.Consenters = append(metadata.Consenters, consenter) 1655 } 1656 return updateRaftConfigValue(metadata) 1657 } 1658 createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope { 1659 configEnv := newConfigEnv("another-channel", 1660 common.HeaderType_CONFIG, 1661 newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata))) 1662 1663 // Wrap config env in Orderer transaction 1664 return &common.Envelope{ 1665 Payload: marshalOrPanic(&common.Payload{ 1666 Header: &common.Header{ 1667 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 1668 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 1669 ChannelId: channelID, 1670 }), 1671 }, 1672 Data: marshalOrPanic(configEnv), 1673 }), 1674 } 1675 } 1676 ) 1677 1678 BeforeEach(func() { 1679 network.exec(func(c *chain) { 1680 c.opts.EvictionSuspicion = time.Millisecond * 100 1681 c.opts.LeaderCheckInterval = time.Millisecond * 100 1682 }) 1683 1684 network.init() 1685 network.start() 1686 network.elect(1) 1687 1688 By("Submitting first tx to cut the block") 1689 c1.cutter.CutNext = true 1690 err := c1.Order(env, 0) 1691 Expect(err).NotTo(HaveOccurred()) 1692 1693 c1.clock.Increment(interval) 1694 1695 network.exec( 1696 func(c *chain) { 1697 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1698 }) 1699 }) 1700 1701 AfterEach(func() { 1702 network.stop() 1703 }) 1704 1705 Context("channel creation", func() { 1706 It("succeeds with valid config metadata", func() { 1707 metadata := &raftprotos.ConfigMetadata{Options: options} 1708 for _, consenter := range consenters { 1709 metadata.Consenters = append(metadata.Consenters, consenter) 1710 } 1711 1712 Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed()) 1713 network.exec(func(c *chain) { 1714 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1715 }) 1716 }) 1717 1718 }) 1719 1720 Context("reconfiguration", func() { 1721 It("can rotate certificate by adding and removing 1 node in one config update", func() { 1722 metadata := &raftprotos.ConfigMetadata{Options: options} 1723 for id, consenter := range consenters { 1724 if id == 2 { 1725 // remove second consenter 1726 continue 1727 } 1728 metadata.Consenters = append(metadata.Consenters, consenter) 1729 } 1730 1731 // add new consenter 1732 newConsenter := &raftprotos.Consenter{ 1733 Host: "localhost", 1734 Port: 7050, 1735 ServerTlsCert: serverTLSCert(tlsCA), 1736 ClientTlsCert: clientTLSCert(tlsCA), 1737 } 1738 metadata.Consenters = append(metadata.Consenters, newConsenter) 1739 1740 value := map[string]*common.ConfigValue{ 1741 "ConsensusType": { 1742 Version: 1, 1743 Value: marshalOrPanic(&orderer.ConsensusType{ 1744 Metadata: marshalOrPanic(metadata), 1745 }), 1746 }, 1747 } 1748 1749 By("creating new configuration with removed node and new one") 1750 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1751 c1.cutter.CutNext = true 1752 1753 By("sending config transaction") 1754 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1755 1756 network.exec(func(c *chain) { 1757 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1758 }) 1759 }) 1760 1761 It("rotates leader certificate and triggers leadership transfer", func() { 1762 metadata := &raftprotos.ConfigMetadata{Options: options} 1763 for id, consenter := range consenters { 1764 if id == 1 { 1765 // remove second consenter 1766 continue 1767 } 1768 metadata.Consenters = append(metadata.Consenters, consenter) 1769 } 1770 1771 // add new consenter 1772 newConsenter := &raftprotos.Consenter{ 1773 Host: "localhost", 1774 Port: 7050, 1775 ServerTlsCert: serverTLSCert(tlsCA), 1776 ClientTlsCert: clientTLSCert(tlsCA), 1777 } 1778 metadata.Consenters = append(metadata.Consenters, newConsenter) 1779 1780 value := map[string]*common.ConfigValue{ 1781 "ConsensusType": { 1782 Version: 1, 1783 Value: marshalOrPanic(&orderer.ConsensusType{ 1784 Metadata: marshalOrPanic(metadata), 1785 }), 1786 }, 1787 } 1788 1789 By("creating new configuration with removed node and new one") 1790 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1791 c1.cutter.CutNext = true 1792 1793 By("sending config transaction") 1794 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1795 1796 Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower())) 1797 network.exec(func(c *chain) { 1798 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1799 }) 1800 }) 1801 1802 When("Leader is disconnected after cert rotation", func() { 1803 It("still configures communication after failed leader transfer attempt", func() { 1804 metadata := &raftprotos.ConfigMetadata{Options: options} 1805 for id, consenter := range consenters { 1806 if id == 1 { 1807 // remove second consenter 1808 continue 1809 } 1810 metadata.Consenters = append(metadata.Consenters, consenter) 1811 } 1812 1813 // add new consenter 1814 newConsenter := &raftprotos.Consenter{ 1815 Host: "localhost", 1816 Port: 7050, 1817 ServerTlsCert: serverTLSCert(tlsCA), 1818 ClientTlsCert: clientTLSCert(tlsCA), 1819 } 1820 metadata.Consenters = append(metadata.Consenters, newConsenter) 1821 1822 value := map[string]*common.ConfigValue{ 1823 "ConsensusType": { 1824 Version: 1, 1825 Value: marshalOrPanic(&orderer.ConsensusType{ 1826 Metadata: marshalOrPanic(metadata), 1827 }), 1828 }, 1829 } 1830 1831 By("creating new configuration with removed node and new one") 1832 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1833 c1.cutter.CutNext = true 1834 1835 step1 := c1.getStepFunc() 1836 count := c1.rpc.SendConsensusCallCount() // record current step call count 1837 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 1838 // disconnect network after 4 MsgApp are sent by c1: 1839 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 1840 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 1841 if c1.rpc.SendConsensusCallCount() == count+4 { 1842 defer network.disconnect(1) 1843 } 1844 1845 return step1(dest, msg) 1846 }) 1847 1848 network.exec(func(c *chain) { 1849 Consistently(c.clock.WatcherCount).Should(Equal(1)) 1850 }) 1851 1852 By("sending config transaction") 1853 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1854 1855 Consistently(c1.observe).ShouldNot(Receive()) 1856 network.exec(func(c *chain) { 1857 // wait for timeout timer to start 1858 c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2) 1859 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1860 }) 1861 }) 1862 }) 1863 1864 When("Follower is disconnected while leader cert is being rotated", func() { 1865 It("still configures communication and transfer leader", func() { 1866 metadata := &raftprotos.ConfigMetadata{Options: options} 1867 for id, consenter := range consenters { 1868 if id == 1 { 1869 // remove second consenter 1870 continue 1871 } 1872 metadata.Consenters = append(metadata.Consenters, consenter) 1873 } 1874 1875 // add new consenter 1876 newConsenter := &raftprotos.Consenter{ 1877 Host: "localhost", 1878 Port: 7050, 1879 ServerTlsCert: serverTLSCert(tlsCA), 1880 ClientTlsCert: clientTLSCert(tlsCA), 1881 } 1882 metadata.Consenters = append(metadata.Consenters, newConsenter) 1883 1884 value := map[string]*common.ConfigValue{ 1885 "ConsensusType": { 1886 Version: 1, 1887 Value: marshalOrPanic(&orderer.ConsensusType{ 1888 Metadata: marshalOrPanic(metadata), 1889 }), 1890 }, 1891 } 1892 1893 cnt := c1.rpc.SendConsensusCallCount() 1894 network.disconnect(3) 1895 1896 // Trigger some heartbeats to be sent so that leader notices 1897 // failed message delivery to 3, and mark it as Paused. 1898 // This is to ensure leadership is transferred to 2. 1899 Eventually(func() int { 1900 c1.clock.Increment(interval) 1901 return c1.rpc.SendConsensusCallCount() 1902 }, LongEventualTimeout).Should(BeNumerically(">=", cnt+5)) 1903 1904 By("creating new configuration with removed node and new one") 1905 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value)) 1906 c1.cutter.CutNext = true 1907 1908 By("sending config transaction") 1909 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 1910 1911 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower))) 1912 network.Lock() 1913 network.leader = 2 // manually set network leader 1914 network.Unlock() 1915 network.disconnect(1) 1916 1917 network.exec(func(c *chain) { 1918 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1919 Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1920 }, 1, 2) 1921 1922 network.join(3, true) 1923 Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 1924 Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2)) 1925 1926 By("Ordering normal transaction") 1927 c2.cutter.CutNext = true 1928 Expect(c3.Order(env, 0)).To(Succeed()) 1929 network.exec(func(c *chain) { 1930 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 1931 }, 2, 3) 1932 }) 1933 }) 1934 1935 It("adding node to the cluster", func() { 1936 addConsenterUpdate := addConsenterConfigValue() 1937 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate)) 1938 c1.cutter.CutNext = true 1939 1940 By("sending config transaction") 1941 err := c1.Configure(configEnv, 0) 1942 Expect(err).NotTo(HaveOccurred()) 1943 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1)) 1944 Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 1945 1946 network.exec(func(c *chain) { 1947 Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 1948 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 1949 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4))) 1950 }) 1951 1952 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 1953 meta := &common.Metadata{Value: raftmetabytes} 1954 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 1955 Expect(err).NotTo(HaveOccurred()) 1956 1957 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 1958 // if we join a node to existing network, it MUST already obtained blocks 1959 // till the config block that adds this node to cluster. 1960 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 1961 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 1962 c4.init() 1963 1964 network.addChain(c4) 1965 c4.Start() 1966 1967 // ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added 1968 // to leader's node list right away. An immediate tick does not trigger a heartbeat 1969 // being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins 1970 // the cluster successfully. 1971 Eventually(func() <-chan raft.SoftState { 1972 c1.clock.Increment(interval) 1973 return c4.observe 1974 }, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower}))) 1975 1976 Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1)) 1977 Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1)) 1978 1979 By("submitting new transaction to follower") 1980 c1.cutter.CutNext = true 1981 err = c4.Order(env, 0) 1982 Expect(err).NotTo(HaveOccurred()) 1983 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 1984 Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 1985 1986 network.exec(func(c *chain) { 1987 Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2)) 1988 }) 1989 }) 1990 1991 It("does not reconfigure raft cluster if it's a channel creation tx", func() { 1992 configEnv := newConfigEnv("another-channel", 1993 common.HeaderType_CONFIG, 1994 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2))) 1995 1996 // Wrap config env in Orderer transaction 1997 channelCreationEnv := &common.Envelope{ 1998 Payload: marshalOrPanic(&common.Payload{ 1999 Header: &common.Header{ 2000 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 2001 Type: int32(common.HeaderType_ORDERER_TRANSACTION), 2002 ChannelId: channelID, 2003 }), 2004 }, 2005 Data: marshalOrPanic(configEnv), 2006 }), 2007 } 2008 2009 c1.cutter.CutNext = true 2010 2011 Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed()) 2012 network.exec(func(c *chain) { 2013 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2014 }) 2015 2016 // assert c2 is not evicted 2017 Consistently(c2.Errored).ShouldNot(BeClosed()) 2018 Expect(c2.Order(env, 0)).To(Succeed()) 2019 2020 network.exec(func(c *chain) { 2021 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2022 }) 2023 }) 2024 2025 It("stop leader and continue reconfiguration failing over to new leader", func() { 2026 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2027 // configure chain support mock to disconnect c1 right after it writes configuration block 2028 // into the ledger, this to simulate failover. 2029 // Next boostraping a new node c4 to join a cluster and creating config transaction, submitting 2030 // it to the leader. Once leader writes configuration block it fails and leadership transferred to 2031 // c2. 2032 // Test asserts that new node c4, will join the cluster and c2 will handle failover of 2033 // re-configuration. Later we connecting c1 back and making sure it capable of catching up with 2034 // new configuration and successfully rejoins replica set. 2035 2036 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2037 c1.cutter.CutNext = true 2038 2039 step1 := c1.getStepFunc() 2040 count := c1.rpc.SendConsensusCallCount() // record current step call count 2041 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2042 // disconnect network after 4 MsgApp are sent by c1: 2043 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2044 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2045 if c1.rpc.SendConsensusCallCount() == count+4 { 2046 defer network.disconnect(1) 2047 } 2048 2049 return step1(dest, msg) 2050 }) 2051 2052 By("sending config transaction") 2053 err := c1.Configure(configEnv, 0) 2054 Expect(err).NotTo(HaveOccurred()) 2055 2056 // every node has written config block to the OSN ledger 2057 network.exec( 2058 func(c *chain) { 2059 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2060 }) 2061 2062 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2063 c1.setStepFunc(step1) 2064 2065 // elect node with higher index 2066 i2, _ := c2.storage.LastIndex() // err is always nil 2067 i3, _ := c3.storage.LastIndex() 2068 candidate := uint64(2) 2069 if i3 > i2 { 2070 candidate = 3 2071 } 2072 network.chains[candidate].cutter.CutNext = true 2073 network.elect(candidate) 2074 2075 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2076 meta := &common.Metadata{Value: raftmetabytes} 2077 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2078 Expect(err).NotTo(HaveOccurred()) 2079 2080 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2081 // if we join a node to existing network, it MUST already obtained blocks 2082 // till the config block that adds this node to cluster. 2083 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2084 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2085 c4.init() 2086 2087 network.addChain(c4) 2088 c4.start() 2089 Expect(c4.WaitReady()).To(Succeed()) 2090 network.join(4, true) 2091 2092 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2093 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2094 2095 By("submitting new transaction to follower") 2096 err = c4.Order(env, 0) 2097 Expect(err).NotTo(HaveOccurred()) 2098 2099 // rest nodes are alive include a newly added, hence should write 2 blocks 2100 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2101 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2102 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2103 2104 // node 1 has been stopped should not write any block 2105 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2106 2107 network.join(1, true) 2108 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2109 }) 2110 2111 It("stop cluster quorum and continue reconfiguration after the restart", func() { 2112 // Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader 2113 // configure chain support mock to stop cluster after config block is committed. 2114 // Restart the cluster and ensure it picks up updates and capable to finish reconfiguration. 2115 2116 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2117 c1.cutter.CutNext = true 2118 2119 step1 := c1.getStepFunc() 2120 count := c1.rpc.SendConsensusCallCount() // record current step call count 2121 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2122 // disconnect network after 4 MsgApp are sent by c1: 2123 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2124 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2125 if c1.rpc.SendConsensusCallCount() == count+4 { 2126 defer func() { 2127 network.disconnect(1) 2128 network.disconnect(2) 2129 network.disconnect(3) 2130 }() 2131 } 2132 2133 return step1(dest, msg) 2134 }) 2135 2136 By("sending config transaction") 2137 err := c1.Configure(configEnv, 0) 2138 Expect(err).NotTo(HaveOccurred()) 2139 2140 // every node has written config block to the OSN ledger 2141 network.exec( 2142 func(c *chain) { 2143 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2144 }) 2145 2146 // assert conf change proposals have been dropped, before proceed to reconnect network 2147 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2148 c1.setStepFunc(step1) 2149 2150 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2151 meta := &common.Metadata{Value: raftmetabytes} 2152 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2153 Expect(err).NotTo(HaveOccurred()) 2154 2155 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2156 // if we join a node to existing network, it MUST already obtained blocks 2157 // till the config block that adds this node to cluster. 2158 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2159 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2160 c4.init() 2161 2162 network.addChain(c4) 2163 2164 By("reconnecting nodes back") 2165 for i := uint64(1); i < 4; i++ { 2166 network.connect(i) 2167 } 2168 2169 // elect node with higher index 2170 i2, _ := c2.storage.LastIndex() // err is always nil 2171 i3, _ := c3.storage.LastIndex() 2172 candidate := uint64(2) 2173 if i3 > i2 { 2174 candidate = 3 2175 } 2176 network.chains[candidate].cutter.CutNext = true 2177 network.elect(candidate) 2178 2179 c4.start() 2180 Expect(c4.WaitReady()).To(Succeed()) 2181 network.join(4, false) 2182 2183 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2184 Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2185 2186 By("submitting new transaction to follower") 2187 err = c4.Order(env, 0) 2188 Expect(err).NotTo(HaveOccurred()) 2189 2190 // rest nodes are alive include a newly added, hence should write 2 blocks 2191 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2192 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2193 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2194 Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2195 }) 2196 2197 It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() { 2198 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2199 // Prepare config update transaction which removes leader (nodeID = 1), then leader 2200 // fails right after it commits configuration block. 2201 2202 configEnv := newConfigEnv(channelID, 2203 common.HeaderType_CONFIG, 2204 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2205 2206 c1.cutter.CutNext = true 2207 2208 step1 := c1.getStepFunc() 2209 count := c1.rpc.SendConsensusCallCount() // record current step call count 2210 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2211 // disconnect network after 4 MsgApp are sent by c1: 2212 // - 2 MsgApp to c2 & c3 that replicate data to raft followers 2213 // - 2 MsgApp to c2 & c3 that instructs followers to commit data 2214 if c1.rpc.SendConsensusCallCount() == count+4 { 2215 defer network.disconnect(1) 2216 } 2217 2218 return step1(dest, msg) 2219 }) 2220 2221 By("sending config transaction") 2222 err := c1.Configure(configEnv, 0) 2223 Expect(err).NotTo(HaveOccurred()) 2224 2225 network.exec(func(c *chain) { 2226 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2227 }) 2228 2229 Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6)) 2230 c1.setStepFunc(step1) 2231 2232 // elect node with higher index 2233 i2, _ := c2.storage.LastIndex() // err is always nil 2234 i3, _ := c3.storage.LastIndex() 2235 candidate := uint64(2) 2236 if i3 > i2 { 2237 candidate = 3 2238 } 2239 network.chains[candidate].cutter.CutNext = true 2240 network.elect(candidate) 2241 2242 By("submitting new transaction to follower") 2243 err = c3.Order(env, 0) 2244 Expect(err).NotTo(HaveOccurred()) 2245 2246 // rest nodes are alive include a newly added, hence should write 2 blocks 2247 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2248 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2249 }) 2250 2251 It("removes leader from replica set", func() { 2252 // Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader. 2253 // Prepare config update transaction which removes leader (nodeID = 1), this to 2254 // ensure we handle re-configuration of node removal correctly and remaining two 2255 // nodes still capable to form functional quorum and Raft capable of making further progress. 2256 // Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e. 2257 // should not be able to get updates or forward transactions. 2258 2259 configEnv := newConfigEnv(channelID, 2260 common.HeaderType_CONFIG, 2261 newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1 2262 2263 c1.cutter.CutNext = true 2264 2265 By("sending config transaction") 2266 err := c1.Configure(configEnv, 0) 2267 Expect(err).NotTo(HaveOccurred()) 2268 2269 // every node has written config block to the OSN ledger 2270 network.exec( 2271 func(c *chain) { 2272 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2273 Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2)) 2274 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2))) 2275 }) 2276 2277 // Assert c1 has exited 2278 c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2) 2279 Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed()) 2280 close(c1.stopped) 2281 2282 var newLeader, remainingFollower *chain 2283 for newLeader == nil || remainingFollower == nil { 2284 var state raft.SoftState 2285 select { 2286 case state = <-c2.observe: 2287 case state = <-c3.observe: 2288 case <-time.After(LongEventualTimeout): 2289 Fail("Expected a new leader to present") 2290 } 2291 2292 if state.RaftState == raft.StateLeader && state.Lead != raft.None { 2293 newLeader = network.chains[state.Lead] 2294 } 2295 2296 if state.RaftState == raft.StateFollower && state.Lead != raft.None { 2297 remainingFollower = network.chains[state.Lead] 2298 } 2299 } 2300 2301 By("submitting transaction to new leader") 2302 newLeader.cutter.CutNext = true 2303 err = newLeader.Order(env, 0) 2304 Expect(err).NotTo(HaveOccurred()) 2305 2306 Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2307 Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2308 // node 1 has been stopped should not write any block 2309 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2310 2311 By("trying to submit to new node, expected to fail") 2312 c1.cutter.CutNext = true 2313 err = c1.Order(env, 0) 2314 Expect(err).To(HaveOccurred()) 2315 2316 // number of block writes should remain the same 2317 Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2)) 2318 Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2)) 2319 Consistently(c1.support.WriteBlockCallCount).Should(Equal(1)) 2320 }) 2321 2322 It("does not deadlock if leader steps down while config block is in-flight", func() { 2323 configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue())) 2324 c1.cutter.CutNext = true 2325 2326 signal := make(chan struct{}) 2327 stub := c1.support.WriteConfigBlockStub 2328 c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) { 2329 signal <- struct{}{} 2330 <-signal 2331 stub(b, meta) 2332 } 2333 2334 By("Sending config transaction") 2335 Expect(c1.Configure(configEnv, 0)).To(Succeed()) 2336 2337 Eventually(signal, LongEventualTimeout).Should(Receive()) 2338 network.disconnect(1) 2339 2340 By("Ticking leader till it steps down") 2341 Eventually(func() raft.SoftState { 2342 c1.clock.Increment(interval) 2343 return c1.Node.Status().SoftState 2344 }, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower)) 2345 2346 close(signal) 2347 2348 Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower))) 2349 2350 By("Re-electing 1 as leader") 2351 network.connect(1) 2352 network.elect(1) 2353 2354 _, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0) 2355 meta := &common.Metadata{Value: raftmetabytes} 2356 raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil) 2357 Expect(err).NotTo(HaveOccurred()) 2358 2359 c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil) 2360 // if we join a node to existing network, it MUST already obtained blocks 2361 // till the config block that adds this node to cluster. 2362 c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0)) 2363 c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0)) 2364 c4.init() 2365 2366 network.addChain(c4) 2367 c4.Start() 2368 2369 Eventually(func() <-chan raft.SoftState { 2370 c1.clock.Increment(interval) 2371 return c4.observe 2372 }, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower))) 2373 2374 By("Submitting tx to confirm network is still working") 2375 Expect(c1.Order(env, 0)).To(Succeed()) 2376 2377 network.exec(func(c *chain) { 2378 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2379 Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2380 }) 2381 }) 2382 }) 2383 }) 2384 2385 When("3/3 nodes are running", func() { 2386 JustBeforeEach(func() { 2387 network.init() 2388 network.start() 2389 network.elect(1) 2390 }) 2391 2392 AfterEach(func() { 2393 network.stop() 2394 }) 2395 2396 It("correctly sets the cluster size and leadership metrics", func() { 2397 // the network should see only one leadership change 2398 network.exec(func(c *chain) { 2399 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1)) 2400 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1))) 2401 Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1)) 2402 Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3))) 2403 }) 2404 // c1 should be the leader 2405 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2406 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2407 // c2 and c3 should continue to remain followers 2408 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2409 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2410 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2411 Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0))) 2412 }) 2413 2414 It("orders envelope on leader", func() { 2415 By("instructed to cut next block") 2416 c1.cutter.CutNext = true 2417 err := c1.Order(env, 0) 2418 Expect(err).NotTo(HaveOccurred()) 2419 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2420 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2421 2422 network.exec( 2423 func(c *chain) { 2424 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2425 }) 2426 2427 By("respect batch timeout") 2428 c1.cutter.CutNext = false 2429 2430 err = c1.Order(env, 0) 2431 Expect(err).NotTo(HaveOccurred()) 2432 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2433 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2434 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2435 2436 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2437 network.exec( 2438 func(c *chain) { 2439 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2440 }) 2441 }) 2442 2443 It("orders envelope on follower", func() { 2444 By("instructed to cut next block") 2445 c1.cutter.CutNext = true 2446 err := c2.Order(env, 0) 2447 Expect(err).NotTo(HaveOccurred()) 2448 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2449 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2450 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2451 2452 network.exec( 2453 func(c *chain) { 2454 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2455 }) 2456 2457 By("respect batch timeout") 2458 c1.cutter.CutNext = false 2459 2460 err = c2.Order(env, 0) 2461 Expect(err).NotTo(HaveOccurred()) 2462 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2)) 2463 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1))) 2464 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2465 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 2466 2467 c1.clock.WaitForNWatchersAndIncrement(timeout, 2) 2468 network.exec( 2469 func(c *chain) { 2470 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2471 }) 2472 }) 2473 2474 When("MaxInflightBlocks is reached", func() { 2475 BeforeEach(func() { 2476 network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 }) 2477 }) 2478 2479 It("waits for in flight blocks to be committed", func() { 2480 c1.cutter.CutNext = true 2481 // disconnect c1 to disrupt consensus 2482 network.disconnect(1) 2483 2484 Expect(c1.Order(env, 0)).To(Succeed()) 2485 2486 doneProp := make(chan struct{}) 2487 go func() { 2488 defer GinkgoRecover() 2489 Expect(c1.Order(env, 0)).To(Succeed()) 2490 close(doneProp) 2491 }() 2492 // expect second `Order` to block 2493 Consistently(doneProp).ShouldNot(BeClosed()) 2494 network.exec(func(c *chain) { 2495 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2496 }) 2497 2498 network.connect(1) 2499 c1.clock.Increment(interval) 2500 2501 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2502 network.exec(func(c *chain) { 2503 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2504 }) 2505 }) 2506 2507 It("resets block in flight when steps down from leader", func() { 2508 c1.cutter.CutNext = true 2509 c2.cutter.CutNext = true 2510 // disconnect c1 to disrupt consensus 2511 network.disconnect(1) 2512 2513 Expect(c1.Order(env, 0)).To(Succeed()) 2514 2515 doneProp := make(chan struct{}) 2516 go func() { 2517 defer GinkgoRecover() 2518 2519 Expect(c1.Order(env, 0)).To(Succeed()) 2520 close(doneProp) 2521 }() 2522 // expect second `Order` to block 2523 Consistently(doneProp).ShouldNot(BeClosed()) 2524 network.exec(func(c *chain) { 2525 Consistently(c.support.WriteBlockCallCount).Should(BeZero()) 2526 }) 2527 2528 network.elect(2) 2529 Expect(c3.Order(env, 0)).To(Succeed()) 2530 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2531 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2532 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2533 2534 network.connect(1) 2535 c2.clock.Increment(interval) 2536 2537 Eventually(doneProp, LongEventualTimeout).Should(BeClosed()) 2538 network.exec(func(c *chain) { 2539 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2540 }) 2541 }) 2542 }) 2543 2544 When("leader is disconnected", func() { 2545 It("proactively steps down to follower", func() { 2546 network.disconnect(1) 2547 2548 By("Ticking leader until it steps down") 2549 Eventually(func() <-chan raft.SoftState { 2550 c1.clock.Increment(interval) 2551 return c1.observe 2552 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower}))) 2553 2554 By("Ensuring it does not accept message due to the cluster being leaderless") 2555 err := c1.Order(env, 0) 2556 Expect(err).To(MatchError("no Raft leader")) 2557 2558 network.elect(2) 2559 2560 // c1 should have lost leadership 2561 Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3)) 2562 Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0))) 2563 // c2 should become the leader 2564 Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2)) 2565 Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1))) 2566 // c2 should continue to remain follower 2567 Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1)) 2568 2569 network.join(1, true) 2570 network.exec(func(c *chain) { 2571 Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3)) 2572 Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1))) 2573 }) 2574 2575 err = c1.Order(env, 0) 2576 Expect(err).NotTo(HaveOccurred()) 2577 }) 2578 2579 It("does not deadlock if propose is blocked", func() { 2580 signal := make(chan struct{}) 2581 c1.cutter.CutNext = true 2582 c1.support.SequenceStub = func() uint64 { 2583 signal <- struct{}{} 2584 <-signal 2585 return 0 2586 } 2587 2588 By("Sending a normal transaction") 2589 Expect(c1.Order(env, 0)).To(Succeed()) 2590 2591 Eventually(signal).Should(Receive()) 2592 network.disconnect(1) 2593 2594 By("Ticking leader till it steps down") 2595 Eventually(func() raft.SoftState { 2596 c1.clock.Increment(interval) 2597 return c1.Node.Status().SoftState 2598 }).Should(StateEqual(0, raft.StateFollower)) 2599 2600 close(signal) 2601 2602 Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower))) 2603 c1.support.SequenceStub = nil 2604 network.exec(func(c *chain) { 2605 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2606 }) 2607 2608 By("Re-electing 1 as leader") 2609 network.connect(1) 2610 network.elect(1) 2611 2612 By("Sending another normal transaction") 2613 Expect(c1.Order(env, 0)).To(Succeed()) 2614 2615 network.exec(func(c *chain) { 2616 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2617 }) 2618 }) 2619 }) 2620 2621 When("follower is disconnected", func() { 2622 It("should return error when receiving an env", func() { 2623 network.disconnect(2) 2624 2625 errorC := c2.Errored() 2626 Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed 2627 2628 By("Ticking node 2 until it becomes pre-candidate") 2629 Eventually(func() <-chan raft.SoftState { 2630 c2.clock.Increment(interval) 2631 return c2.observe 2632 }, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate}))) 2633 2634 Eventually(errorC).Should(BeClosed()) 2635 err := c2.Order(env, 0) 2636 Expect(err).To(HaveOccurred()) 2637 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1)) 2638 Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1))) 2639 Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0)) 2640 2641 network.connect(2) 2642 c1.clock.Increment(interval) 2643 Expect(errorC).To(BeClosed()) 2644 2645 Eventually(c2.Errored).ShouldNot(BeClosed()) 2646 }) 2647 }) 2648 2649 It("leader retransmits lost messages", func() { 2650 // This tests that heartbeats will trigger leader to retransmit lost MsgApp 2651 2652 c1.cutter.CutNext = true 2653 2654 network.disconnect(1) // drop MsgApp 2655 2656 err := c1.Order(env, 0) 2657 Expect(err).NotTo(HaveOccurred()) 2658 2659 network.exec( 2660 func(c *chain) { 2661 Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0)) 2662 }) 2663 2664 network.connect(1) // reconnect leader 2665 2666 c1.clock.Increment(interval) // trigger a heartbeat 2667 network.exec( 2668 func(c *chain) { 2669 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2670 }) 2671 }) 2672 2673 It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() { 2674 // this ensures that the created blocks are not written out 2675 network.disconnect(1) 2676 2677 c1.cutter.CutNext = true 2678 for i := 0; i < 3; i++ { 2679 Expect(c1.Order(env, 0)).To(Succeed()) 2680 } 2681 2682 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 2683 2684 network.connect(1) 2685 2686 // After FAB-13722, leader would pause replication if it gets notified that message 2687 // delivery to certain node is failed, i.e. connection refused. Replication to that 2688 // follower is resumed if leader receives a MsgHeartbeatResp from it. 2689 // We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we 2690 // would also risk a slow leader stepping down due to excessive ticks. 2691 // 2692 // Instead, we can simply send artificial MsgHeartbeatResp to leader to resume. 2693 m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp} 2694 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id) 2695 m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp} 2696 c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id) 2697 2698 network.exec(func(c *chain) { 2699 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2700 }) 2701 }) 2702 2703 It("new leader should wait for in-fight blocks to commit before accepting new env", func() { 2704 // Scenario: when a node is elected as new leader and there are still in-flight blocks, 2705 // it should not immediately start accepting new envelopes, instead it should wait for 2706 // those in-flight blocks to be committed, otherwise we may create uncle block which 2707 // forks and panicks chain. 2708 // 2709 // Steps: 2710 // - start raft cluster with three nodes and genesis block0 2711 // - order env1 on c1, which creates block1 2712 // - drop MsgApp from 1 to 3 2713 // - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1 2714 // - disconnect c1 and elect c2 2715 // - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create 2716 // an uncle block1 based on block0. 2717 // - c2 commits block1 2718 // - c2 accepts env2, and creates block2 2719 // - c2 commits block2 2720 c1.cutter.CutNext = true 2721 c2.cutter.CutNext = true 2722 2723 step1 := c1.getStepFunc() 2724 c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2725 stepMsg := &raftpb.Message{} 2726 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2727 2728 if dest == 3 { 2729 return nil 2730 } 2731 2732 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 { 2733 return nil 2734 } 2735 2736 return step1(dest, msg) 2737 }) 2738 2739 Expect(c1.Order(env, 0)).NotTo(HaveOccurred()) 2740 2741 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2742 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2743 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2744 2745 network.disconnect(1) 2746 2747 step2 := c2.getStepFunc() 2748 c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 2749 stepMsg := &raftpb.Message{} 2750 Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred()) 2751 2752 if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 { 2753 for _, ent := range stepMsg.Entries { 2754 if len(ent.Data) != 0 { 2755 return nil 2756 } 2757 } 2758 } 2759 return step2(dest, msg) 2760 }) 2761 2762 network.elect(2) 2763 2764 go func() { 2765 defer GinkgoRecover() 2766 Expect(c2.Order(env, 0)).NotTo(HaveOccurred()) 2767 }() 2768 2769 Consistently(c2.support.WriteBlockCallCount).Should(Equal(0)) 2770 Consistently(c3.support.WriteBlockCallCount).Should(Equal(0)) 2771 2772 c2.setStepFunc(step2) 2773 c2.clock.Increment(interval) 2774 2775 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2776 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2777 2778 b, _ := c2.support.WriteBlockArgsForCall(0) 2779 Expect(b.Header.Number).To(Equal(uint64(1))) 2780 b, _ = c2.support.WriteBlockArgsForCall(1) 2781 Expect(b.Header.Number).To(Equal(uint64(2))) 2782 }) 2783 2784 Context("handling config blocks", func() { 2785 var configEnv *common.Envelope 2786 BeforeEach(func() { 2787 values := map[string]*common.ConfigValue{ 2788 "BatchTimeout": { 2789 Version: 1, 2790 Value: marshalOrPanic(&orderer.BatchTimeout{ 2791 Timeout: "3ms", 2792 }), 2793 }, 2794 } 2795 configEnv = newConfigEnv(channelID, 2796 common.HeaderType_CONFIG, 2797 newConfigUpdateEnv(channelID, nil, values), 2798 ) 2799 }) 2800 2801 It("holds up block creation on leader once a config block has been created and not written out", func() { 2802 // this ensures that the created blocks are not written out 2803 network.disconnect(1) 2804 2805 c1.cutter.CutNext = true 2806 // config block 2807 err := c1.Order(configEnv, 0) 2808 Expect(err).NotTo(HaveOccurred()) 2809 2810 // to avoid data races since we are accessing these within a goroutine 2811 tempEnv := env 2812 tempC1 := c1 2813 2814 done := make(chan struct{}) 2815 2816 // normal block 2817 go func() { 2818 defer GinkgoRecover() 2819 2820 // This should be blocked if config block is not committed 2821 err := tempC1.Order(tempEnv, 0) 2822 Expect(err).NotTo(HaveOccurred()) 2823 2824 close(done) 2825 }() 2826 2827 Consistently(done).ShouldNot(BeClosed()) 2828 2829 network.connect(1) 2830 c1.clock.Increment(interval) 2831 2832 network.exec( 2833 func(c *chain) { 2834 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2835 }) 2836 2837 network.exec( 2838 func(c *chain) { 2839 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2840 }) 2841 }) 2842 2843 It("continues creating blocks on leader after a config block has been successfully written out", func() { 2844 c1.cutter.CutNext = true 2845 // config block 2846 err := c1.Configure(configEnv, 0) 2847 Expect(err).NotTo(HaveOccurred()) 2848 network.exec( 2849 func(c *chain) { 2850 Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2851 }) 2852 2853 // normal block following config block 2854 err = c1.Order(env, 0) 2855 Expect(err).NotTo(HaveOccurred()) 2856 network.exec( 2857 func(c *chain) { 2858 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2859 }) 2860 }) 2861 }) 2862 2863 When("Snapshotting is enabled", func() { 2864 BeforeEach(func() { 2865 c1.opts.SnapshotIntervalSize = 1 2866 c1.opts.SnapshotCatchUpEntries = 1 2867 }) 2868 2869 It("keeps running if some entries in memory are purged", func() { 2870 // Scenario: snapshotting is enabled on node 1 and it purges memory storage 2871 // per every snapshot. Cluster should be correctly functioning. 2872 2873 i, err := c1.opts.MemoryStorage.FirstIndex() 2874 Expect(err).NotTo(HaveOccurred()) 2875 Expect(i).To(Equal(uint64(1))) 2876 2877 c1.cutter.CutNext = true 2878 2879 err = c1.Order(env, 0) 2880 Expect(err).NotTo(HaveOccurred()) 2881 2882 network.exec( 2883 func(c *chain) { 2884 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 2885 }) 2886 2887 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2888 i, err = c1.opts.MemoryStorage.FirstIndex() 2889 Expect(err).NotTo(HaveOccurred()) 2890 2891 err = c1.Order(env, 0) 2892 Expect(err).NotTo(HaveOccurred()) 2893 2894 network.exec( 2895 func(c *chain) { 2896 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2897 }) 2898 2899 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2900 i, err = c1.opts.MemoryStorage.FirstIndex() 2901 Expect(err).NotTo(HaveOccurred()) 2902 2903 err = c1.Order(env, 0) 2904 Expect(err).NotTo(HaveOccurred()) 2905 2906 network.exec( 2907 func(c *chain) { 2908 Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3)) 2909 }) 2910 2911 Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i)) 2912 }) 2913 2914 It("lagged node can catch up using snapshot", func() { 2915 network.disconnect(2) 2916 c1.cutter.CutNext = true 2917 2918 c2Lasti, _ := c2.opts.MemoryStorage.LastIndex() 2919 var blockCnt int 2920 // Order blocks until first index of c1 memory is greater than last index of c2, 2921 // so a snapshot will be sent to c2 when it rejoins network 2922 Eventually(func() bool { 2923 c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex() 2924 if c1Firsti > c2Lasti+1 { 2925 return true 2926 } 2927 2928 Expect(c1.Order(env, 0)).To(Succeed()) 2929 blockCnt++ 2930 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2931 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2932 return false 2933 }, LongEventualTimeout).Should(BeTrue()) 2934 2935 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2936 2937 network.join(2, false) 2938 2939 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt)) 2940 indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir) 2941 Expect(indices).To(HaveLen(1)) 2942 gap := indices[0] - c2Lasti 2943 2944 // TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector, 2945 // which calls block puller, is still replying on real clock, and sometimes increment puller 2946 // call count. Therefore we are being more lenient here until suspector starts using fake clock 2947 // so we have more deterministic control over it. 2948 Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap))) 2949 2950 // chain should keeps functioning 2951 Expect(c2.Order(env, 0)).To(Succeed()) 2952 2953 network.exec( 2954 func(c *chain) { 2955 Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1)) 2956 }) 2957 }) 2958 }) 2959 2960 Context("failover", func() { 2961 It("follower should step up as leader upon failover", func() { 2962 network.stop(1) 2963 network.elect(2) 2964 2965 By("order envelope on new leader") 2966 c2.cutter.CutNext = true 2967 err := c2.Order(env, 0) 2968 Expect(err).NotTo(HaveOccurred()) 2969 2970 // block should not be produced on chain 1 2971 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2972 2973 // block should be produced on chain 2 & 3 2974 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2975 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2976 2977 By("order envelope on follower") 2978 err = c3.Order(env, 0) 2979 Expect(err).NotTo(HaveOccurred()) 2980 2981 // block should not be produced on chain 1 2982 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2983 2984 // block should be produced on chain 2 & 3 2985 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2986 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2)) 2987 }) 2988 2989 It("follower cannot be elected if its log is not up-to-date", func() { 2990 network.disconnect(2) 2991 2992 c1.cutter.CutNext = true 2993 err := c1.Order(env, 0) 2994 Expect(err).NotTo(HaveOccurred()) 2995 2996 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2997 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 2998 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 2999 3000 network.disconnect(1) 3001 network.connect(2) 3002 3003 // node 2 has not caught up with other nodes 3004 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3005 c2.clock.Increment(interval) 3006 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3007 } 3008 3009 // When PreVote is enabled, node 2 would fail to collect enough 3010 // PreVote because its index is not up-to-date. Therefore, it 3011 // does not cause leader change on other nodes. 3012 Consistently(c3.observe).ShouldNot(Receive()) 3013 network.elect(3) // node 3 has newest logs among 2&3, so it can be elected 3014 }) 3015 3016 It("PreVote prevents reconnected node from disturbing network", func() { 3017 network.disconnect(2) 3018 3019 c1.cutter.CutNext = true 3020 err := c1.Order(env, 0) 3021 Expect(err).NotTo(HaveOccurred()) 3022 3023 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3024 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3025 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1)) 3026 3027 network.connect(2) 3028 3029 for tick := 0; tick < 2*ELECTION_TICK-1; tick++ { 3030 c2.clock.Increment(interval) 3031 Consistently(c2.observe).ShouldNot(Receive(Equal(2))) 3032 } 3033 3034 Consistently(c1.observe).ShouldNot(Receive()) 3035 Consistently(c3.observe).ShouldNot(Receive()) 3036 }) 3037 3038 It("follower can catch up and then campaign with success", func() { 3039 network.disconnect(2) 3040 3041 c1.cutter.CutNext = true 3042 for i := 0; i < 10; i++ { 3043 err := c1.Order(env, 0) 3044 Expect(err).NotTo(HaveOccurred()) 3045 } 3046 3047 Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3048 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0)) 3049 Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3050 3051 network.join(2, false) 3052 Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10)) 3053 3054 network.disconnect(1) 3055 network.elect(2) 3056 }) 3057 3058 It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() { 3059 // enqueue one transaction into 1's blockcutter to test for purging of block cutter 3060 c1.cutter.CutNext = false 3061 err := c1.Order(env, 0) 3062 Expect(err).NotTo(HaveOccurred()) 3063 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1)) 3064 3065 // no block should be written because env is not cut into block yet 3066 c1.clock.WaitForNWatchersAndIncrement(interval, 2) 3067 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3068 3069 network.disconnect(1) 3070 network.elect(2) 3071 network.join(1, true) 3072 3073 Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped 3074 Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0)) 3075 // the created block should be discarded since there is a leadership change 3076 Consistently(c1.support.WriteBlockCallCount).Should(Equal(0)) 3077 3078 network.disconnect(2) 3079 network.elect(1) 3080 3081 err = c1.Order(env, 0) 3082 Expect(err).NotTo(HaveOccurred()) 3083 3084 // The following group of assertions is redundant - it's here for completeness. 3085 // If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing. 3086 // If the blockcucter has been reset, fast-forwarding won't do anything. 3087 // 3088 // Put differently: 3089 // 3090 // correct: 3091 // stop start fire 3092 // |--------------|---------------------------| 3093 // n*intervals timeout 3094 // (advanced in election) 3095 // 3096 // wrong: 3097 // unstop fire 3098 // |---------------------------| 3099 // timeout 3100 // 3101 // timeout-n*interval n*interval 3102 // |-----------|----------------| 3103 // ^ ^ 3104 // at this point of time it should fire 3105 // timer should not fire at this point 3106 3107 c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2) 3108 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3109 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0)) 3110 3111 c1.clock.Increment(interval) 3112 Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3113 Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1)) 3114 }) 3115 3116 It("stale leader should not be able to propose block because of lagged term", func() { 3117 network.disconnect(1) 3118 network.elect(2) 3119 network.connect(1) 3120 3121 c1.cutter.CutNext = true 3122 err := c1.Order(env, 0) 3123 Expect(err).NotTo(HaveOccurred()) 3124 3125 network.exec( 3126 func(c *chain) { 3127 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3128 }) 3129 }) 3130 3131 It("aborts waiting for block to be committed upon leadership lost", func() { 3132 network.disconnect(1) 3133 3134 c1.cutter.CutNext = true 3135 err := c1.Order(env, 0) 3136 Expect(err).NotTo(HaveOccurred()) 3137 3138 network.exec( 3139 func(c *chain) { 3140 Consistently(c.support.WriteBlockCallCount).Should(Equal(0)) 3141 }) 3142 3143 network.elect(2) 3144 network.connect(1) 3145 3146 c2.clock.Increment(interval) 3147 // this check guarantees that signal on resignC is consumed in commitBatches method. 3148 Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower}))) 3149 }) 3150 }) 3151 }) 3152 }) 3153 }) 3154 3155 func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode { 3156 var nodes []cluster.RemoteNode 3157 for i, consenter := range consenterMetadata.Consenters { 3158 // For now, skip ourselves 3159 if i == 0 { 3160 continue 3161 } 3162 serverDER, _ := pem.Decode(consenter.ServerTlsCert) 3163 clientDER, _ := pem.Decode(consenter.ClientTlsCert) 3164 node := cluster.RemoteNode{ 3165 ID: uint64(i + 1), 3166 Endpoint: "localhost:7050", 3167 ServerTLSCert: serverDER.Bytes, 3168 ClientTLSCert: clientDER.Bytes, 3169 } 3170 nodes = append(nodes, node) 3171 } 3172 return nodes 3173 } 3174 3175 func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata { 3176 md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{ 3177 TickInterval: time.Duration(interval).String(), 3178 ElectionTick: ELECTION_TICK, 3179 HeartbeatTick: HEARTBEAT_TICK, 3180 MaxInflightBlocks: 5, 3181 }} 3182 for i := 0; i < nodeCount; i++ { 3183 md.Consenters = append(md.Consenters, &raftprotos.Consenter{ 3184 Host: "localhost", 3185 Port: 7050, 3186 ServerTlsCert: serverTLSCert(tlsCA), 3187 ClientTlsCert: clientTLSCert(tlsCA), 3188 }) 3189 } 3190 return md 3191 } 3192 3193 func serverTLSCert(tlsCA tlsgen.CA) []byte { 3194 cert, err := tlsCA.NewServerCertKeyPair("localhost") 3195 if err != nil { 3196 panic(err) 3197 } 3198 return cert.Cert 3199 } 3200 3201 func clientTLSCert(tlsCA tlsgen.CA) []byte { 3202 cert, err := tlsCA.NewClientCertKeyPair() 3203 if err != nil { 3204 panic(err) 3205 } 3206 return cert.Cert 3207 } 3208 3209 // marshalOrPanic serializes a protobuf message and panics if this 3210 // operation fails 3211 func marshalOrPanic(pb proto.Message) []byte { 3212 data, err := proto.Marshal(pb) 3213 if err != nil { 3214 panic(err) 3215 } 3216 return data 3217 } 3218 3219 // helpers to facilitate tests 3220 type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error 3221 3222 type chain struct { 3223 id uint64 3224 3225 stepLock sync.Mutex 3226 step stepFunc 3227 3228 support *consensusmocks.FakeConsenterSupport 3229 cutter *mockblockcutter.Receiver 3230 configurator *mocks.FakeConfigurator 3231 rpc *mocks.FakeRPC 3232 storage *raft.MemoryStorage 3233 clock *fakeclock.FakeClock 3234 opts etcdraft.Options 3235 puller *mocks.FakeBlockPuller 3236 3237 // store written blocks to be returned by mock block puller 3238 ledgerLock sync.RWMutex 3239 ledger map[uint64]*common.Block 3240 ledgerHeight uint64 3241 lastConfigBlockNumber uint64 3242 3243 observe chan raft.SoftState 3244 unstarted chan struct{} 3245 stopped chan struct{} 3246 3247 fakeFields *fakeMetricsFields 3248 3249 *etcdraft.Chain 3250 3251 cryptoProvider bccsp.BCCSP 3252 } 3253 3254 func newChain( 3255 timeout time.Duration, 3256 channel, dataDir string, 3257 id uint64, 3258 raftMetadata *raftprotos.BlockMetadata, 3259 consenters map[uint64]*raftprotos.Consenter, 3260 cryptoProvider bccsp.BCCSP, 3261 support *consensusmocks.FakeConsenterSupport, 3262 ) *chain { 3263 rpc := &mocks.FakeRPC{} 3264 clock := fakeclock.NewFakeClock(time.Now()) 3265 storage := raft.NewMemoryStorage() 3266 3267 fakeFields := newFakeMetricsFields() 3268 3269 opts := etcdraft.Options{ 3270 RaftID: uint64(id), 3271 Clock: clock, 3272 TickInterval: interval, 3273 ElectionTick: ELECTION_TICK, 3274 HeartbeatTick: HEARTBEAT_TICK, 3275 MaxSizePerMsg: 1024 * 1024, 3276 MaxInflightBlocks: 256, 3277 BlockMetadata: raftMetadata, 3278 LeaderCheckInterval: 500 * time.Millisecond, 3279 Consenters: consenters, 3280 Logger: flogging.NewFabricLogger(zap.NewExample()), 3281 MemoryStorage: storage, 3282 WALDir: path.Join(dataDir, "wal"), 3283 SnapDir: path.Join(dataDir, "snapshot"), 3284 Metrics: newFakeMetrics(fakeFields), 3285 } 3286 3287 if support == nil { 3288 support = &consensusmocks.FakeConsenterSupport{} 3289 support.ChannelIDReturns(channel) 3290 support.SharedConfigReturns(mockOrderer(timeout, nil)) 3291 } 3292 cutter := mockblockcutter.NewReceiver() 3293 close(cutter.Block) 3294 support.BlockCutterReturns(cutter) 3295 3296 // upon leader change, lead is reset to 0 before set to actual 3297 // new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be 3298 // sent on this chan, so we need size to be 2 3299 observe := make(chan raft.SoftState, 2) 3300 3301 configurator := &mocks.FakeConfigurator{} 3302 puller := &mocks.FakeBlockPuller{} 3303 3304 ch := make(chan struct{}) 3305 close(ch) 3306 3307 c := &chain{ 3308 id: id, 3309 support: support, 3310 cutter: cutter, 3311 rpc: rpc, 3312 storage: storage, 3313 observe: observe, 3314 clock: clock, 3315 opts: opts, 3316 unstarted: ch, 3317 stopped: make(chan struct{}), 3318 configurator: configurator, 3319 puller: puller, 3320 ledger: map[uint64]*common.Block{ 3321 0: getSeedBlock(), // Very first block 3322 }, 3323 ledgerHeight: 1, 3324 fakeFields: fakeFields, 3325 cryptoProvider: cryptoProvider, 3326 } 3327 3328 // receives normal blocks and metadata and appends it into 3329 // the ledger struct to simulate write behaviour 3330 appendNormalBlockToLedger := func(b *common.Block, meta []byte) { 3331 c.ledgerLock.Lock() 3332 defer c.ledgerLock.Unlock() 3333 3334 b = proto.Clone(b).(*common.Block) 3335 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3336 Expect(err).NotTo(HaveOccurred()) 3337 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3338 3339 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3340 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3341 Value: lastConfigValue, 3342 }) 3343 3344 c.ledger[b.Header.Number] = b 3345 if c.ledgerHeight < b.Header.Number+1 { 3346 c.ledgerHeight = b.Header.Number + 1 3347 } 3348 } 3349 3350 // receives config blocks and metadata and appends it into 3351 // the ledger struct to simulate write behaviour 3352 appendConfigBlockToLedger := func(b *common.Block, meta []byte) { 3353 c.ledgerLock.Lock() 3354 defer c.ledgerLock.Unlock() 3355 3356 b = proto.Clone(b).(*common.Block) 3357 bytes, err := proto.Marshal(&common.Metadata{Value: meta}) 3358 Expect(err).NotTo(HaveOccurred()) 3359 b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes 3360 3361 c.lastConfigBlockNumber = b.Header.Number 3362 3363 lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber}) 3364 b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{ 3365 Value: lastConfigValue, 3366 }) 3367 3368 c.ledger[b.Header.Number] = b 3369 if c.ledgerHeight < b.Header.Number+1 { 3370 c.ledgerHeight = b.Header.Number + 1 3371 } 3372 } 3373 3374 c.support.WriteBlockStub = appendNormalBlockToLedger 3375 c.support.WriteConfigBlockStub = appendConfigBlockToLedger 3376 3377 // returns current ledger height 3378 c.support.HeightStub = func() uint64 { 3379 c.ledgerLock.RLock() 3380 defer c.ledgerLock.RUnlock() 3381 return c.ledgerHeight 3382 } 3383 3384 // reads block from the ledger 3385 c.support.BlockStub = func(number uint64) *common.Block { 3386 c.ledgerLock.RLock() 3387 defer c.ledgerLock.RUnlock() 3388 return c.ledger[number] 3389 } 3390 3391 return c 3392 } 3393 3394 func (c *chain) init() { 3395 ch, err := etcdraft.NewChain( 3396 c.support, 3397 c.opts, 3398 c.configurator, 3399 c.rpc, 3400 c.cryptoProvider, 3401 func() (etcdraft.BlockPuller, error) { return c.puller, nil }, 3402 nil, 3403 c.observe, 3404 ) 3405 Expect(err).NotTo(HaveOccurred()) 3406 c.Chain = ch 3407 } 3408 3409 func (c *chain) start() { 3410 c.unstarted = nil 3411 c.Start() 3412 } 3413 3414 func (c *chain) setStepFunc(f stepFunc) { 3415 c.stepLock.Lock() 3416 c.step = f 3417 c.stepLock.Unlock() 3418 } 3419 3420 func (c *chain) getStepFunc() stepFunc { 3421 c.stepLock.Lock() 3422 defer c.stepLock.Unlock() 3423 return c.step 3424 } 3425 3426 type network struct { 3427 sync.RWMutex 3428 3429 leader uint64 3430 chains map[uint64]*chain 3431 3432 // links simulates the configuration of comm layer (link is bi-directional). 3433 // if links[left][right] == true, right can send msg to left. 3434 links map[uint64]map[uint64]bool 3435 // connectivity determines if a node is connected to network. This is used for tests 3436 // to simulate network partition. 3437 connectivity map[uint64]bool 3438 } 3439 3440 func (n *network) link(from []uint64, to uint64) { 3441 links := make(map[uint64]bool) 3442 for _, id := range from { 3443 links[id] = true 3444 } 3445 3446 n.Lock() 3447 defer n.Unlock() 3448 3449 n.links[to] = links 3450 } 3451 3452 func (n *network) linked(from, to uint64) bool { 3453 n.RLock() 3454 defer n.RUnlock() 3455 3456 return n.links[to][from] 3457 } 3458 3459 func (n *network) connect(id uint64) { 3460 n.Lock() 3461 defer n.Unlock() 3462 3463 n.connectivity[id] = true 3464 } 3465 3466 func (n *network) disconnect(id uint64) { 3467 n.Lock() 3468 defer n.Unlock() 3469 3470 n.connectivity[id] = false 3471 } 3472 3473 func (n *network) connected(id uint64) bool { 3474 n.RLock() 3475 defer n.RUnlock() 3476 3477 return n.connectivity[id] 3478 } 3479 3480 func (n *network) addChain(c *chain) { 3481 n.connect(c.id) // chain is connected by default 3482 3483 c.step = func(dest uint64, msg *orderer.ConsensusRequest) error { 3484 if !n.linked(c.id, dest) { 3485 return errors.Errorf("connection refused") 3486 } 3487 3488 if !n.connected(c.id) || !n.connected(dest) { 3489 return errors.Errorf("connection lost") 3490 } 3491 3492 n.RLock() 3493 target := n.chains[dest] 3494 n.RUnlock() 3495 go func() { 3496 defer GinkgoRecover() 3497 target.Consensus(msg, c.id) 3498 }() 3499 return nil 3500 } 3501 3502 c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error { 3503 c.stepLock.Lock() 3504 defer c.stepLock.Unlock() 3505 return c.step(dest, msg) 3506 } 3507 3508 c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest) error { 3509 if !n.linked(c.id, dest) { 3510 return errors.Errorf("connection refused") 3511 } 3512 3513 if !n.connected(c.id) || !n.connected(dest) { 3514 return errors.Errorf("connection lost") 3515 } 3516 3517 n.RLock() 3518 target := n.chains[dest] 3519 n.RUnlock() 3520 go func() { 3521 defer GinkgoRecover() 3522 target.Submit(msg, c.id) 3523 }() 3524 return nil 3525 } 3526 3527 c.puller.PullBlockStub = func(i uint64) *common.Block { 3528 n.RLock() 3529 leaderChain := n.chains[n.leader] 3530 n.RUnlock() 3531 3532 leaderChain.ledgerLock.RLock() 3533 defer leaderChain.ledgerLock.RUnlock() 3534 block := leaderChain.ledger[i] 3535 return block 3536 } 3537 3538 c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) { 3539 n.RLock() 3540 leader := n.chains[n.leader] 3541 n.RUnlock() 3542 3543 if leader == nil { 3544 return nil, errors.Errorf("ledger not available") 3545 } 3546 3547 leader.ledgerLock.RLock() 3548 defer leader.ledgerLock.RUnlock() 3549 return map[string]uint64{"leader": leader.ledgerHeight}, nil 3550 } 3551 3552 c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) { 3553 var ids []uint64 3554 for _, node := range nodes { 3555 ids = append(ids, node.ID) 3556 } 3557 n.link(ids, c.id) 3558 }) 3559 3560 n.Lock() 3561 defer n.Unlock() 3562 n.chains[c.id] = c 3563 } 3564 3565 func createNetwork( 3566 timeout time.Duration, 3567 channel, dataDir string, 3568 raftMetadata *raftprotos.BlockMetadata, 3569 consenters map[uint64]*raftprotos.Consenter, 3570 cryptoProvider bccsp.BCCSP, 3571 tlsCA tlsgen.CA, 3572 ) *network { 3573 n := &network{ 3574 chains: make(map[uint64]*chain), 3575 connectivity: make(map[uint64]bool), 3576 links: make(map[uint64]map[uint64]bool), 3577 } 3578 3579 for _, nodeID := range raftMetadata.ConsenterIds { 3580 dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID)) 3581 Expect(err).NotTo(HaveOccurred()) 3582 3583 m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata) 3584 support := &consensusmocks.FakeConsenterSupport{} 3585 support.ChannelIDReturns(channel) 3586 support.SharedConfigReturns(mockOrderer(timeout, nil)) 3587 mockOrdererConfig := mockOrdererWithTLSRootCert(timeout, nil, tlsCA) 3588 support.SharedConfigReturns(mockOrdererConfig) 3589 n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider, support)) 3590 } 3591 3592 return n 3593 } 3594 3595 // tests could alter configuration of a chain before creating it 3596 func (n *network) init() { 3597 n.exec(func(c *chain) { c.init() }) 3598 } 3599 3600 func (n *network) start(ids ...uint64) { 3601 nodes := ids 3602 if len(nodes) == 0 { 3603 for i := range n.chains { 3604 nodes = append(nodes, i) 3605 } 3606 } 3607 3608 for _, id := range nodes { 3609 n.chains[id].start() 3610 3611 // When the Raft node bootstraps, it produces a ConfChange 3612 // to add itself, which needs to be consumed with Ready(). 3613 // If there are pending configuration changes in raft, 3614 // it refused to campaign, no matter how many ticks supplied. 3615 // This is not a problem in production code because eventually 3616 // raft.Ready will be consumed as real time goes by. 3617 // 3618 // However, this is problematic when using fake clock and artificial 3619 // ticks. Instead of ticking raft indefinitely until raft.Ready is 3620 // consumed, this check is added to indirectly guarantee 3621 // that first ConfChange is actually consumed and we can safely 3622 // proceed to tick raft. 3623 Eventually(func() error { 3624 _, err := n.chains[id].storage.Entries(1, 1, 1) 3625 return err 3626 }, LongEventualTimeout).ShouldNot(HaveOccurred()) 3627 Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred()) 3628 } 3629 } 3630 3631 func (n *network) stop(ids ...uint64) { 3632 nodes := ids 3633 if len(nodes) == 0 { 3634 for i := range n.chains { 3635 nodes = append(nodes, i) 3636 } 3637 } 3638 3639 for _, id := range nodes { 3640 c := n.chains[id] 3641 c.Halt() 3642 Eventually(c.Errored).Should(BeClosed()) 3643 select { 3644 case <-c.stopped: 3645 default: 3646 close(c.stopped) 3647 } 3648 } 3649 } 3650 3651 func (n *network) exec(f func(c *chain), ids ...uint64) { 3652 if len(ids) == 0 { 3653 for _, c := range n.chains { 3654 f(c) 3655 } 3656 3657 return 3658 } 3659 3660 for _, i := range ids { 3661 f(n.chains[i]) 3662 } 3663 } 3664 3665 // connect a node to network and tick leader to trigger 3666 // a heartbeat so newly joined node can detect leader. 3667 // 3668 // expectLeaderChange controls whether leader change should 3669 // be observed on newly joined node. 3670 // - it should be true if newly joined node was leader 3671 // - it should be false if newly joined node was follower, and 3672 // already knows the leader. 3673 func (n *network) join(id uint64, expectLeaderChange bool) { 3674 n.connect(id) 3675 3676 n.RLock() 3677 leader, follower := n.chains[n.leader], n.chains[id] 3678 n.RUnlock() 3679 3680 step := leader.getStepFunc() 3681 signal := make(chan struct{}) 3682 leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error { 3683 if dest == id { 3684 // close signal channel when a message targeting newly 3685 // joined node is observed on wire. 3686 select { 3687 case <-signal: 3688 default: 3689 close(signal) 3690 } 3691 } 3692 3693 return step(dest, msg) 3694 }) 3695 3696 // Tick leader so it sends out a heartbeat to new node. 3697 // One tick _may_ not be enough because leader might be busy 3698 // and this tick is droppped on the floor. 3699 Eventually(func() <-chan struct{} { 3700 leader.clock.Increment(interval) 3701 return signal 3702 }, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed()) 3703 3704 leader.setStepFunc(step) 3705 3706 if expectLeaderChange { 3707 Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower}))) 3708 } 3709 3710 // wait for newly joined node to catch up with leader 3711 i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex() 3712 Expect(err).NotTo(HaveOccurred()) 3713 Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i)) 3714 } 3715 3716 // elect deterministically elects a node as leader 3717 func (n *network) elect(id uint64) { 3718 n.RLock() 3719 // skip observing leader change on followers if the same leader is elected as the previous one, 3720 // because this may happen too quickly from a slow follower's point of view, and 0 -> X transition 3721 // may not be omitted at all. 3722 observeFollowers := id != n.leader 3723 candidate := n.chains[id] 3724 var followers []*chain 3725 for _, c := range n.chains { 3726 if c.id != id { 3727 followers = append(followers, c) 3728 } 3729 } 3730 n.RUnlock() 3731 3732 // Send node an artificial MsgTimeoutNow to emulate leadership transfer. 3733 fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id) 3734 candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow})}, 0) 3735 Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader))) 3736 3737 n.Lock() 3738 n.leader = id 3739 n.Unlock() 3740 3741 if !observeFollowers { 3742 return 3743 } 3744 3745 // now observe leader change on other nodes 3746 for _, c := range followers { 3747 if c.id == id { 3748 continue 3749 } 3750 3751 select { 3752 case <-c.stopped: // skip check if node n is stopped 3753 case <-c.unstarted: // skip check if node is not started yet 3754 default: 3755 if n.linked(c.id, id) && n.connected(c.id) { 3756 Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower))) 3757 } 3758 } 3759 } 3760 3761 } 3762 3763 // sets the configEnv var declared above 3764 func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope { 3765 return &common.Envelope{ 3766 Payload: marshalOrPanic(&common.Payload{ 3767 Header: &common.Header{ 3768 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3769 Type: int32(headerType), 3770 ChannelId: chainID, 3771 }), 3772 }, 3773 Data: marshalOrPanic(&common.ConfigEnvelope{ 3774 LastUpdate: &common.Envelope{ 3775 Payload: marshalOrPanic(&common.Payload{ 3776 Header: &common.Header{ 3777 ChannelHeader: marshalOrPanic(&common.ChannelHeader{ 3778 Type: int32(common.HeaderType_CONFIG_UPDATE), 3779 ChannelId: chainID, 3780 }), 3781 }, 3782 Data: marshalOrPanic(configUpdateEnv), 3783 }), // common.Payload 3784 }, // LastUpdate 3785 }), 3786 }), 3787 } 3788 } 3789 3790 func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope { 3791 return &common.ConfigUpdateEnvelope{ 3792 ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{ 3793 ChannelId: chainID, 3794 ReadSet: &common.ConfigGroup{ 3795 Groups: map[string]*common.ConfigGroup{ 3796 "Orderer": { 3797 Values: oldValues, 3798 }, 3799 }, 3800 }, 3801 WriteSet: &common.ConfigGroup{ 3802 Groups: map[string]*common.ConfigGroup{ 3803 "Orderer": { 3804 Values: newValues, 3805 }, 3806 }, 3807 }, // WriteSet 3808 }), 3809 } 3810 } 3811 3812 func getSeedBlock() *common.Block { 3813 return &common.Block{ 3814 Header: &common.BlockHeader{}, 3815 Data: &common.BlockData{Data: [][]byte{[]byte("foo")}}, 3816 Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)}, 3817 } 3818 } 3819 3820 func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher { 3821 return Equal(raft.SoftState{Lead: lead, RaftState: state}) 3822 } 3823 3824 func BeLeader() types.GomegaMatcher { 3825 return &StateMatcher{expect: raft.StateLeader} 3826 } 3827 3828 func BeFollower() types.GomegaMatcher { 3829 return &StateMatcher{expect: raft.StateFollower} 3830 } 3831 3832 type StateMatcher struct { 3833 expect raft.StateType 3834 } 3835 3836 func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) { 3837 state, ok := actual.(raft.SoftState) 3838 if !ok { 3839 return false, errors.Errorf("StateMatcher expects a raft SoftState") 3840 } 3841 3842 return state.RaftState == stmatcher.expect, nil 3843 } 3844 3845 func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) { 3846 state, ok := actual.(raft.SoftState) 3847 if !ok { 3848 return "StateMatcher expects a raft SoftState" 3849 } 3850 3851 return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect) 3852 } 3853 3854 func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) { 3855 state, ok := actual.(raft.SoftState) 3856 if !ok { 3857 return "StateMatcher expects a raft SoftState" 3858 } 3859 3860 return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect) 3861 } 3862 3863 func noOpBlockPuller() (etcdraft.BlockPuller, error) { 3864 bp := &mocks.FakeBlockPuller{} 3865 return bp, nil 3866 }