github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/consensus/etcdraft/chain_test.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft_test
     8  
     9  import (
    10  	"encoding/pem"
    11  	"fmt"
    12  	"io/ioutil"
    13  	"os"
    14  	"os/user"
    15  	"path"
    16  	"sync"
    17  	"time"
    18  
    19  	"code.cloudfoundry.org/clock/fakeclock"
    20  	"github.com/golang/protobuf/proto"
    21  	"github.com/hyperledger/fabric-protos-go/common"
    22  	"github.com/hyperledger/fabric-protos-go/orderer"
    23  	raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    24  	"github.com/osdi23p228/fabric/bccsp"
    25  	"github.com/osdi23p228/fabric/bccsp/factory"
    26  	"github.com/osdi23p228/fabric/bccsp/sw"
    27  	"github.com/osdi23p228/fabric/common/channelconfig"
    28  	"github.com/osdi23p228/fabric/common/crypto/tlsgen"
    29  	"github.com/osdi23p228/fabric/common/flogging"
    30  	"github.com/osdi23p228/fabric/orderer/common/cluster"
    31  	orderer_types "github.com/osdi23p228/fabric/orderer/common/types"
    32  	"github.com/osdi23p228/fabric/orderer/consensus/etcdraft"
    33  	"github.com/osdi23p228/fabric/orderer/consensus/etcdraft/mocks"
    34  	consensusmocks "github.com/osdi23p228/fabric/orderer/consensus/mocks"
    35  	mockblockcutter "github.com/osdi23p228/fabric/orderer/mocks/common/blockcutter"
    36  	"github.com/osdi23p228/fabric/protoutil"
    37  	. "github.com/onsi/ginkgo"
    38  	. "github.com/onsi/gomega"
    39  	"github.com/onsi/gomega/types"
    40  	"github.com/pkg/errors"
    41  	"go.etcd.io/etcd/raft"
    42  	"go.etcd.io/etcd/raft/raftpb"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  const (
    47  	interval            = 100 * time.Millisecond
    48  	LongEventualTimeout = 10 * time.Second
    49  
    50  	// 10 is the default setting of ELECTION_TICK.
    51  	// We used to have a small number here (2) to reduce the time for test - we don't
    52  	// need to tick node 10 times to trigger election - however, we are using another
    53  	// mechanism to trigger it now which does not depend on time: send an artificial
    54  	// MsgTimeoutNow to node.
    55  	ELECTION_TICK  = 10
    56  	HEARTBEAT_TICK = 1
    57  )
    58  
    59  func init() {
    60  	factory.InitFactories(nil)
    61  }
    62  
    63  func mockOrderer(metadata []byte) *mocks.OrdererConfig {
    64  	return mockOrdererWithBatchTimeout(time.Second, metadata)
    65  }
    66  
    67  func mockOrdererWithBatchTimeout(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig {
    68  	mockOrderer := &mocks.OrdererConfig{}
    69  	mockOrderer.BatchTimeoutReturns(batchTimeout)
    70  	mockOrderer.ConsensusMetadataReturns(metadata)
    71  	return mockOrderer
    72  }
    73  
    74  func mockOrdererWithTLSRootCert(batchTimeout time.Duration, metadata []byte, tlsCA tlsgen.CA) *mocks.OrdererConfig {
    75  	mockOrderer := mockOrdererWithBatchTimeout(batchTimeout, metadata)
    76  	mockOrg := &mocks.OrdererOrg{}
    77  	mockMSP := &mocks.MSP{}
    78  	mockMSP.GetTLSRootCertsReturns([][]byte{tlsCA.CertBytes()})
    79  	mockOrg.MSPReturns(mockMSP)
    80  	mockOrderer.OrganizationsReturns(map[string]channelconfig.OrdererOrg{
    81  		"fake-org": mockOrg,
    82  	})
    83  	return mockOrderer
    84  }
    85  
    86  // for some test cases we chmod file/dir to test failures caused by exotic permissions.
    87  // however this does not work if tests are running as root, i.e. in a container.
    88  func skipIfRoot() {
    89  	u, err := user.Current()
    90  	Expect(err).NotTo(HaveOccurred())
    91  	if u.Uid == "0" {
    92  		Skip("you are running test as root, there's no way to make files unreadable")
    93  	}
    94  }
    95  
    96  var _ = Describe("Chain", func() {
    97  	var (
    98  		env       *common.Envelope
    99  		channelID string
   100  		tlsCA     tlsgen.CA
   101  		logger    *flogging.FabricLogger
   102  	)
   103  
   104  	BeforeEach(func() {
   105  		tlsCA, _ = tlsgen.NewCA()
   106  		channelID = "test-channel"
   107  		logger = flogging.NewFabricLogger(zap.NewExample())
   108  		env = &common.Envelope{
   109  			Payload: marshalOrPanic(&common.Payload{
   110  				Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
   111  				Data:   []byte("TEST_MESSAGE"),
   112  			}),
   113  		}
   114  	})
   115  
   116  	Describe("Single Raft node", func() {
   117  		var (
   118  			configurator      *mocks.FakeConfigurator
   119  			consenterMetadata *raftprotos.ConfigMetadata
   120  			consenters        map[uint64]*raftprotos.Consenter
   121  			clock             *fakeclock.FakeClock
   122  			opts              etcdraft.Options
   123  			support           *consensusmocks.FakeConsenterSupport
   124  			cutter            *mockblockcutter.Receiver
   125  			storage           *raft.MemoryStorage
   126  			observeC          chan raft.SoftState
   127  			chain             *etcdraft.Chain
   128  			dataDir           string
   129  			walDir            string
   130  			snapDir           string
   131  			err               error
   132  			fakeFields        *fakeMetricsFields
   133  			cryptoProvider    bccsp.BCCSP
   134  		)
   135  
   136  		BeforeEach(func() {
   137  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
   138  			Expect(err).NotTo(HaveOccurred())
   139  
   140  			configurator = &mocks.FakeConfigurator{}
   141  			clock = fakeclock.NewFakeClock(time.Now())
   142  			storage = raft.NewMemoryStorage()
   143  
   144  			dataDir, err = ioutil.TempDir("", "wal-")
   145  			Expect(err).NotTo(HaveOccurred())
   146  			walDir = path.Join(dataDir, "wal")
   147  			snapDir = path.Join(dataDir, "snapshot")
   148  
   149  			observeC = make(chan raft.SoftState, 1)
   150  
   151  			support = &consensusmocks.FakeConsenterSupport{}
   152  			support.ChannelIDReturns(channelID)
   153  			consenterMetadata = createMetadata(1, tlsCA)
   154  			support.SharedConfigReturns(mockOrdererWithTLSRootCert(time.Hour, marshalOrPanic(consenterMetadata), tlsCA))
   155  
   156  			cutter = mockblockcutter.NewReceiver()
   157  			support.BlockCutterReturns(cutter)
   158  
   159  			// for block creator initialization
   160  			support.HeightReturns(1)
   161  			support.BlockReturns(getSeedBlock())
   162  
   163  			meta := &raftprotos.BlockMetadata{
   164  				ConsenterIds:    make([]uint64, len(consenterMetadata.Consenters)),
   165  				NextConsenterId: 1,
   166  			}
   167  
   168  			for i := range meta.ConsenterIds {
   169  				meta.ConsenterIds[i] = meta.NextConsenterId
   170  				meta.NextConsenterId++
   171  			}
   172  
   173  			consenters = map[uint64]*raftprotos.Consenter{}
   174  			for i, c := range consenterMetadata.Consenters {
   175  				consenters[meta.ConsenterIds[i]] = c
   176  			}
   177  
   178  			fakeFields = newFakeMetricsFields()
   179  
   180  			opts = etcdraft.Options{
   181  				RaftID:            1,
   182  				Clock:             clock,
   183  				TickInterval:      interval,
   184  				ElectionTick:      ELECTION_TICK,
   185  				HeartbeatTick:     HEARTBEAT_TICK,
   186  				MaxSizePerMsg:     1024 * 1024,
   187  				MaxInflightBlocks: 256,
   188  				BlockMetadata:     meta,
   189  				Consenters:        consenters,
   190  				Logger:            logger,
   191  				MemoryStorage:     storage,
   192  				WALDir:            walDir,
   193  				SnapDir:           snapDir,
   194  				Metrics:           newFakeMetrics(fakeFields),
   195  			}
   196  		})
   197  
   198  		campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) {
   199  			Eventually(func() <-chan raft.SoftState {
   200  				c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: 1})}, 0)
   201  				return observeC
   202  			}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
   203  		}
   204  
   205  		JustBeforeEach(func() {
   206  			chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC)
   207  			Expect(err).NotTo(HaveOccurred())
   208  
   209  			chain.Start()
   210  			cRel, status := chain.StatusReport()
   211  			Expect(cRel).To(Equal(orderer_types.ClusterRelationMember))
   212  			Expect(status).To(Equal(orderer_types.StatusActive))
   213  
   214  			// When the Raft node bootstraps, it produces a ConfChange
   215  			// to add itself, which needs to be consumed with Ready().
   216  			// If there are pending configuration changes in raft,
   217  			// it refuses to campaign, no matter how many ticks elapse.
   218  			// This is not a problem in the production code because raft.Ready
   219  			// will be consumed eventually, as the wall clock advances.
   220  			//
   221  			// However, this is problematic when using the fake clock and
   222  			// artificial ticks. Instead of ticking raft indefinitely until
   223  			// raft.Ready is consumed, this check is added to indirectly guarantee
   224  			// that the first ConfChange is actually consumed and we can safely
   225  			// proceed to tick the Raft FSM.
   226  			Eventually(func() error {
   227  				_, err := storage.Entries(1, 1, 1)
   228  				return err
   229  			}, LongEventualTimeout).ShouldNot(HaveOccurred())
   230  		})
   231  
   232  		AfterEach(func() {
   233  			chain.Halt()
   234  			Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed())
   235  			// Make sure no timer leak
   236  			Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero())
   237  			os.RemoveAll(dataDir)
   238  		})
   239  
   240  		Context("when a node starts up", func() {
   241  			It("properly configures the communication layer", func() {
   242  				expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata)
   243  				Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1))
   244  				_, arg2 := configurator.ConfigureArgsForCall(0)
   245  				Expect(arg2).To(Equal(expectedNodeConfig))
   246  			})
   247  
   248  			It("correctly sets the metrics labels and publishes requisite metrics", func() {
   249  				type withImplementers interface {
   250  					WithCallCount() int
   251  					WithArgsForCall(int) []string
   252  				}
   253  				metricsList := []withImplementers{
   254  					fakeFields.fakeClusterSize,
   255  					fakeFields.fakeIsLeader,
   256  					fakeFields.fakeActiveNodes,
   257  					fakeFields.fakeCommittedBlockNumber,
   258  					fakeFields.fakeSnapshotBlockNumber,
   259  					fakeFields.fakeLeaderChanges,
   260  					fakeFields.fakeProposalFailures,
   261  					fakeFields.fakeDataPersistDuration,
   262  					fakeFields.fakeNormalProposalsReceived,
   263  					fakeFields.fakeConfigProposalsReceived,
   264  				}
   265  				for _, m := range metricsList {
   266  					Expect(m.WithCallCount()).To(Equal(1))
   267  					Expect(func() string {
   268  						return m.WithArgsForCall(0)[1]
   269  					}()).To(Equal(channelID))
   270  				}
   271  
   272  				Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1))
   273  				Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1)))
   274  				Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1))
   275  				Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0)))
   276  				Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1))
   277  				Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0)))
   278  			})
   279  		})
   280  
   281  		Context("when no Raft leader is elected", func() {
   282  			It("fails to order envelope", func() {
   283  				err := chain.Order(env, 0)
   284  				Expect(err).To(MatchError("no Raft leader"))
   285  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   286  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   287  				Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0))
   288  				Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1))
   289  				Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1)))
   290  			})
   291  
   292  			It("starts proactive campaign", func() {
   293  				// assert that even tick supplied are less than ELECTION_TIMEOUT,
   294  				// a leader can still be successfully elected.
   295  				for i := 0; i < ELECTION_TICK; i++ {
   296  					clock.Increment(interval)
   297  					time.Sleep(10 * time.Millisecond)
   298  				}
   299  				Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
   300  			})
   301  		})
   302  
   303  		Context("when Raft leader is elected", func() {
   304  			JustBeforeEach(func() {
   305  				campaign(chain, observeC)
   306  			})
   307  
   308  			It("updates metrics upon leader election", func() {
   309  				Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2))
   310  				Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1)))
   311  				Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1))
   312  				Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1)))
   313  			})
   314  
   315  			It("fails to order envelope if chain is halted", func() {
   316  				chain.Halt()
   317  				err := chain.Order(env, 0)
   318  				Expect(err).To(MatchError("chain is stopped"))
   319  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   320  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   321  				Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1))
   322  				Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1)))
   323  			})
   324  
   325  			It("produces blocks following batch rules", func() {
   326  				close(cutter.Block)
   327  
   328  				By("cutting next batch directly")
   329  				cutter.CutNext = true
   330  				err := chain.Order(env, 0)
   331  				Expect(err).NotTo(HaveOccurred())
   332  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   333  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   334  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   335  				Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call
   336  				Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1)))
   337  
   338  				// There are three calls to DataPersistDuration by now corresponding to the following three
   339  				// arriving on the Ready channel:
   340  				// 1. an EntryConfChange to let this node join the Raft cluster
   341  				// 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader
   342  				// 3. a block being committed
   343  				// The duration being emitted is zero since we don't tick the fake clock during this time
   344  				Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3))
   345  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0)))
   346  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0)))
   347  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0)))
   348  
   349  				By("respecting batch timeout")
   350  				cutter.CutNext = false
   351  				timeout := time.Second
   352  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   353  				err = chain.Order(env, 0)
   354  				Expect(err).NotTo(HaveOccurred())
   355  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
   356  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
   357  
   358  				clock.WaitForNWatchersAndIncrement(timeout, 2)
   359  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   360  				Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call
   361  				Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2)))
   362  				Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4))
   363  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0)))
   364  			})
   365  
   366  			It("does not reset timer for every envelope", func() {
   367  				close(cutter.Block)
   368  
   369  				timeout := time.Second
   370  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   371  
   372  				err := chain.Order(env, 0)
   373  				Expect(err).NotTo(HaveOccurred())
   374  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   375  
   376  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   377  
   378  				err = chain.Order(env, 0)
   379  				Expect(err).NotTo(HaveOccurred())
   380  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2))
   381  
   382  				// the second envelope should not reset the timer; it should
   383  				// therefore expire if we increment it by just timeout/2
   384  				clock.Increment(timeout / 2)
   385  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   386  			})
   387  
   388  			It("does not write a block if halted before timeout", func() {
   389  				close(cutter.Block)
   390  				timeout := time.Second
   391  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   392  
   393  				err := chain.Order(env, 0)
   394  				Expect(err).NotTo(HaveOccurred())
   395  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   396  
   397  				// wait for timer to start
   398  				Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   399  
   400  				chain.Halt()
   401  				Consistently(support.WriteBlockCallCount).Should(Equal(0))
   402  			})
   403  
   404  			It("stops the timer if a batch is cut", func() {
   405  				close(cutter.Block)
   406  
   407  				timeout := time.Second
   408  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   409  
   410  				err := chain.Order(env, 0)
   411  				Expect(err).NotTo(HaveOccurred())
   412  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   413  
   414  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   415  
   416  				By("force a batch to be cut before timer expires")
   417  				cutter.CutNext = true
   418  				err = chain.Order(env, 0)
   419  				Expect(err).NotTo(HaveOccurred())
   420  
   421  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   422  				b, _ := support.WriteBlockArgsForCall(0)
   423  				Expect(b.Data.Data).To(HaveLen(2))
   424  				Expect(cutter.CurBatch()).To(HaveLen(0))
   425  
   426  				// this should start a fresh timer
   427  				cutter.CutNext = false
   428  				err = chain.Order(env, 0)
   429  				Expect(err).NotTo(HaveOccurred())
   430  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   431  
   432  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   433  				Consistently(support.WriteBlockCallCount).Should(Equal(1))
   434  
   435  				clock.Increment(timeout / 2)
   436  
   437  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   438  				b, _ = support.WriteBlockArgsForCall(1)
   439  				Expect(b.Data.Data).To(HaveLen(1))
   440  			})
   441  
   442  			It("cut two batches if incoming envelope does not fit into first batch", func() {
   443  				close(cutter.Block)
   444  
   445  				timeout := time.Second
   446  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   447  
   448  				err := chain.Order(env, 0)
   449  				Expect(err).NotTo(HaveOccurred())
   450  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   451  
   452  				cutter.IsolatedTx = true
   453  				err = chain.Order(env, 0)
   454  				Expect(err).NotTo(HaveOccurred())
   455  
   456  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   457  			})
   458  
   459  			Context("revalidation", func() {
   460  				BeforeEach(func() {
   461  					close(cutter.Block)
   462  
   463  					timeout := time.Hour
   464  					support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   465  					support.SequenceReturns(1)
   466  				})
   467  
   468  				It("enqueue if envelope is still valid", func() {
   469  					support.ProcessNormalMsgReturns(1, nil)
   470  
   471  					err := chain.Order(env, 0)
   472  					Expect(err).NotTo(HaveOccurred())
   473  					Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   474  					Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   475  				})
   476  
   477  				It("does not enqueue if envelope is not valid", func() {
   478  					support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid"))
   479  
   480  					err := chain.Order(env, 0)
   481  					Expect(err).NotTo(HaveOccurred())
   482  					Consistently(cutter.CurBatch).Should(HaveLen(0))
   483  					Consistently(clock.WatcherCount).Should(Equal(1))
   484  				})
   485  			})
   486  
   487  			It("unblocks Errored if chain is halted", func() {
   488  				errorC := chain.Errored()
   489  				Expect(errorC).NotTo(BeClosed())
   490  				chain.Halt()
   491  				Eventually(errorC, LongEventualTimeout).Should(BeClosed())
   492  			})
   493  
   494  			Describe("Config updates", func() {
   495  				var (
   496  					configEnv *common.Envelope
   497  					configSeq uint64
   498  				)
   499  
   500  				Context("when a type A config update comes", func() {
   501  					Context("for existing channel", func() {
   502  						// use to prepare the Orderer Values
   503  						BeforeEach(func() {
   504  							newValues := map[string]*common.ConfigValue{
   505  								"BatchTimeout": {
   506  									Version: 1,
   507  									Value: marshalOrPanic(&orderer.BatchTimeout{
   508  										Timeout: "3ms",
   509  									}),
   510  								},
   511  								"ConsensusType": {
   512  									Version: 4,
   513  								},
   514  							}
   515  							oldValues := map[string]*common.ConfigValue{
   516  								"ConsensusType": {
   517  									Version: 4,
   518  								},
   519  							}
   520  							configEnv = newConfigEnv(channelID,
   521  								common.HeaderType_CONFIG,
   522  								newConfigUpdateEnv(channelID, oldValues, newValues),
   523  							)
   524  							configSeq = 0
   525  						}) // BeforeEach block
   526  
   527  						Context("without revalidation (i.e. correct config sequence)", func() {
   528  							Context("without pending normal envelope", func() {
   529  								It("should create a config block and no normal block", func() {
   530  									err := chain.Configure(configEnv, configSeq)
   531  									Expect(err).NotTo(HaveOccurred())
   532  									Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   533  									Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   534  									Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   535  									Consistently(support.WriteBlockCallCount).Should(Equal(0))
   536  									Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call
   537  									Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1)))
   538  								})
   539  							})
   540  
   541  							Context("with pending normal envelope", func() {
   542  								It("should create a normal block and a config block", func() {
   543  									// We do not need to block the cutter from ordering in our test case and therefore close this channel.
   544  									close(cutter.Block)
   545  
   546  									By("adding a normal envelope")
   547  									err := chain.Order(env, 0)
   548  									Expect(err).NotTo(HaveOccurred())
   549  									Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   550  									Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   551  									Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   552  
   553  									By("adding a config envelope")
   554  									err = chain.Configure(configEnv, configSeq)
   555  									Expect(err).NotTo(HaveOccurred())
   556  									Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   557  									Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   558  
   559  									Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   560  									Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   561  									Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call
   562  									Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2)))
   563  								})
   564  							})
   565  						})
   566  
   567  						Context("with revalidation (i.e. incorrect config sequence)", func() {
   568  							BeforeEach(func() {
   569  								close(cutter.Block)
   570  								support.SequenceReturns(1) // this causes the revalidation
   571  							})
   572  
   573  							It("should create config block upon correct revalidation", func() {
   574  								support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation
   575  
   576  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   577  								Consistently(clock.WatcherCount).Should(Equal(1))
   578  								Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   579  							})
   580  
   581  							It("should not create config block upon incorrect revalidation", func() {
   582  								support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence"))
   583  
   584  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   585  								Consistently(clock.WatcherCount).Should(Equal(1))
   586  								Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock
   587  							})
   588  
   589  							It("should not disturb current running timer upon incorrect revalidation", func() {
   590  								support.ProcessNormalMsgReturns(1, nil)
   591  								support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence"))
   592  
   593  								Expect(chain.Order(env, configSeq)).To(Succeed())
   594  								Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   595  
   596  								clock.Increment(30 * time.Minute)
   597  								Consistently(support.WriteBlockCallCount).Should(Equal(0))
   598  
   599  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   600  								Consistently(clock.WatcherCount).Should(Equal(2))
   601  
   602  								Consistently(support.WriteBlockCallCount).Should(Equal(0))
   603  								Consistently(support.WriteConfigBlockCallCount).Should(Equal(0))
   604  
   605  								clock.Increment(30 * time.Minute)
   606  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   607  							})
   608  						})
   609  					})
   610  
   611  					Context("for creating a new channel", func() {
   612  						// use to prepare the Orderer Values
   613  						BeforeEach(func() {
   614  							chainID := "mychannel"
   615  							values := make(map[string]*common.ConfigValue)
   616  							configEnv = newConfigEnv(chainID,
   617  								common.HeaderType_CONFIG,
   618  								newConfigUpdateEnv(chainID, nil, values),
   619  							)
   620  							configSeq = 0
   621  						}) // BeforeEach block
   622  
   623  						It("should be able to create a channel", func() {
   624  							err := chain.Configure(configEnv, configSeq)
   625  							Expect(err).NotTo(HaveOccurred())
   626  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   627  						})
   628  					})
   629  				}) // Context block for type A config
   630  
   631  				Context("when a type B config update comes", func() {
   632  					Context("updating protocol values", func() {
   633  						// use to prepare the Orderer Values
   634  						BeforeEach(func() {
   635  							values := map[string]*common.ConfigValue{
   636  								"ConsensusType": {
   637  									Version: 1,
   638  									Value: marshalOrPanic(&orderer.ConsensusType{
   639  										Metadata: marshalOrPanic(consenterMetadata),
   640  									}),
   641  								},
   642  							}
   643  							configEnv = newConfigEnv(channelID,
   644  								common.HeaderType_CONFIG,
   645  								newConfigUpdateEnv(channelID, nil, values))
   646  							configSeq = 0
   647  
   648  						}) // BeforeEach block
   649  
   650  						It("should be able to process config update of type B", func() {
   651  							err := chain.Configure(configEnv, configSeq)
   652  							Expect(err).NotTo(HaveOccurred())
   653  							Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   654  							Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   655  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   656  						})
   657  					})
   658  
   659  					Context("updating consenters set by exactly one node", func() {
   660  						It("should be able to process config update adding single node", func() {
   661  							metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata)
   662  							metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{
   663  								Host:          "localhost",
   664  								Port:          7050,
   665  								ServerTlsCert: serverTLSCert(tlsCA),
   666  								ClientTlsCert: clientTLSCert(tlsCA),
   667  							})
   668  
   669  							values := map[string]*common.ConfigValue{
   670  								"ConsensusType": {
   671  									Version: 1,
   672  									Value: marshalOrPanic(&orderer.ConsensusType{
   673  										Metadata: marshalOrPanic(metadata),
   674  									}),
   675  								},
   676  							}
   677  							configEnv = newConfigEnv(channelID,
   678  								common.HeaderType_CONFIG,
   679  								newConfigUpdateEnv(channelID, nil, values))
   680  							configSeq = 0
   681  
   682  							err := chain.Configure(configEnv, configSeq)
   683  							Expect(err).NotTo(HaveOccurred())
   684  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   685  						})
   686  
   687  					})
   688  				})
   689  			})
   690  
   691  			Describe("Crash Fault Tolerance", func() {
   692  				var (
   693  					raftMetadata *raftprotos.BlockMetadata
   694  				)
   695  
   696  				BeforeEach(func() {
   697  					raftMetadata = &raftprotos.BlockMetadata{
   698  						ConsenterIds:    []uint64{1},
   699  						NextConsenterId: 2,
   700  					}
   701  				})
   702  
   703  				Describe("when a chain is started with existing WAL", func() {
   704  					var (
   705  						m1 *raftprotos.BlockMetadata
   706  						m2 *raftprotos.BlockMetadata
   707  					)
   708  					JustBeforeEach(func() {
   709  						// to generate WAL data, we start a chain,
   710  						// order several envelopes and then halt the chain.
   711  						close(cutter.Block)
   712  						cutter.CutNext = true
   713  
   714  						// enque some data to be persisted on disk by raft
   715  						err := chain.Order(env, uint64(0))
   716  						Expect(err).NotTo(HaveOccurred())
   717  						Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   718  
   719  						_, metadata := support.WriteBlockArgsForCall(0)
   720  						m1 = &raftprotos.BlockMetadata{}
   721  						proto.Unmarshal(metadata, m1)
   722  
   723  						err = chain.Order(env, uint64(0))
   724  						Expect(err).NotTo(HaveOccurred())
   725  						Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   726  
   727  						_, metadata = support.WriteBlockArgsForCall(1)
   728  						m2 = &raftprotos.BlockMetadata{}
   729  						proto.Unmarshal(metadata, m2)
   730  
   731  						chain.Halt()
   732  					})
   733  
   734  					It("replays blocks from committed entries", func() {
   735  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
   736  						c.init()
   737  						c.Start()
   738  						defer c.Halt()
   739  
   740  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   741  
   742  						_, metadata := c.support.WriteBlockArgsForCall(0)
   743  						m := &raftprotos.BlockMetadata{}
   744  						proto.Unmarshal(metadata, m)
   745  						Expect(m.RaftIndex).To(Equal(m1.RaftIndex))
   746  
   747  						_, metadata = c.support.WriteBlockArgsForCall(1)
   748  						m = &raftprotos.BlockMetadata{}
   749  						proto.Unmarshal(metadata, m)
   750  						Expect(m.RaftIndex).To(Equal(m2.RaftIndex))
   751  
   752  						// chain should keep functioning
   753  						campaign(c.Chain, c.observe)
   754  
   755  						c.cutter.CutNext = true
   756  
   757  						err := c.Order(env, uint64(0))
   758  						Expect(err).NotTo(HaveOccurred())
   759  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
   760  
   761  					})
   762  
   763  					It("only replays blocks after Applied index", func() {
   764  						raftMetadata.RaftIndex = m1.RaftIndex
   765  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
   766  						c.support.WriteBlock(support.WriteBlockArgsForCall(0))
   767  
   768  						c.init()
   769  						c.Start()
   770  						defer c.Halt()
   771  
   772  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   773  
   774  						_, metadata := c.support.WriteBlockArgsForCall(1)
   775  						m := &raftprotos.BlockMetadata{}
   776  						proto.Unmarshal(metadata, m)
   777  						Expect(m.RaftIndex).To(Equal(m2.RaftIndex))
   778  
   779  						// chain should keep functioning
   780  						campaign(c.Chain, c.observe)
   781  
   782  						c.cutter.CutNext = true
   783  
   784  						err := c.Order(env, uint64(0))
   785  						Expect(err).NotTo(HaveOccurred())
   786  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
   787  					})
   788  
   789  					It("does not replay any block if already in sync", func() {
   790  						raftMetadata.RaftIndex = m2.RaftIndex
   791  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
   792  						c.init()
   793  						c.Start()
   794  						defer c.Halt()
   795  
   796  						Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
   797  
   798  						// chain should keep functioning
   799  						campaign(c.Chain, c.observe)
   800  
   801  						c.cutter.CutNext = true
   802  
   803  						err := c.Order(env, uint64(0))
   804  						Expect(err).NotTo(HaveOccurred())
   805  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   806  					})
   807  
   808  					Context("WAL file is not readable", func() {
   809  						It("fails to load wal", func() {
   810  							skipIfRoot()
   811  
   812  							files, err := ioutil.ReadDir(walDir)
   813  							Expect(err).NotTo(HaveOccurred())
   814  							for _, f := range files {
   815  								os.Chmod(path.Join(walDir, f.Name()), 0300)
   816  							}
   817  
   818  							c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC)
   819  							Expect(c).To(BeNil())
   820  							Expect(err).To(MatchError(ContainSubstring("permission denied")))
   821  						})
   822  					})
   823  				})
   824  
   825  				Describe("when snapshotting is enabled (snapshot interval is not zero)", func() {
   826  					var (
   827  						ledgerLock sync.Mutex
   828  						ledger     map[uint64]*common.Block
   829  					)
   830  
   831  					countFiles := func() int {
   832  						files, err := ioutil.ReadDir(snapDir)
   833  						Expect(err).NotTo(HaveOccurred())
   834  						return len(files)
   835  					}
   836  
   837  					BeforeEach(func() {
   838  						opts.SnapshotCatchUpEntries = 2
   839  
   840  						close(cutter.Block)
   841  						cutter.CutNext = true
   842  
   843  						ledgerLock.Lock()
   844  						ledger = map[uint64]*common.Block{
   845  							0: getSeedBlock(), // genesis block
   846  						}
   847  						ledgerLock.Unlock()
   848  
   849  						support.WriteBlockStub = func(block *common.Block, meta []byte) {
   850  							b := proto.Clone(block).(*common.Block)
   851  
   852  							bytes, err := proto.Marshal(&common.Metadata{Value: meta})
   853  							Expect(err).NotTo(HaveOccurred())
   854  							b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
   855  
   856  							ledgerLock.Lock()
   857  							defer ledgerLock.Unlock()
   858  							ledger[b.Header.Number] = b
   859  						}
   860  
   861  						support.HeightStub = func() uint64 {
   862  							ledgerLock.Lock()
   863  							defer ledgerLock.Unlock()
   864  							return uint64(len(ledger))
   865  						}
   866  					})
   867  
   868  					Context("Small SnapshotInterval", func() {
   869  						BeforeEach(func() {
   870  							opts.SnapshotIntervalSize = 1
   871  						})
   872  
   873  						It("writes snapshot file to snapDir", func() {
   874  							// Scenario: start a chain with SnapInterval = 1 byte, expect it to take
   875  							// one snapshot for each block
   876  
   877  							i, _ := opts.MemoryStorage.FirstIndex()
   878  
   879  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   880  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   881  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   882  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   883  							Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call
   884  							s, _ := opts.MemoryStorage.Snapshot()
   885  							b := protoutil.UnmarshalBlockOrPanic(s.Data)
   886  							Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number)))
   887  
   888  							i, _ = opts.MemoryStorage.FirstIndex()
   889  
   890  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   891  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   892  
   893  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
   894  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   895  							Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call
   896  							s, _ = opts.MemoryStorage.Snapshot()
   897  							b = protoutil.UnmarshalBlockOrPanic(s.Data)
   898  							Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number)))
   899  						})
   900  
   901  						It("pauses chain if sync is in progress", func() {
   902  							// Scenario:
   903  							// after a snapshot is taken, reboot chain with raftIndex = 0
   904  							// chain should attempt to sync upon reboot, and blocks on
   905  							// `WaitReady` API
   906  
   907  							i, _ := opts.MemoryStorage.FirstIndex()
   908  
   909  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   910  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   911  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   912  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   913  
   914  							i, _ = opts.MemoryStorage.FirstIndex()
   915  
   916  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   917  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   918  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
   919  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   920  
   921  							chain.Halt()
   922  
   923  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
   924  							c.init()
   925  
   926  							signal := make(chan struct{})
   927  
   928  							c.puller.PullBlockStub = func(i uint64) *common.Block {
   929  								<-signal // blocking for assertions
   930  								ledgerLock.Lock()
   931  								defer ledgerLock.Unlock()
   932  								if i >= uint64(len(ledger)) {
   933  									return nil
   934  								}
   935  
   936  								// This is a false assumption - single node shouldn't be able to pull block from anywhere.
   937  								// However, this test is mainly to assert that chain should attempt catchup upon start,
   938  								// so we could live with it.
   939  								return ledger[i]
   940  							}
   941  
   942  							err := c.WaitReady()
   943  							Expect(err).To(MatchError("chain is not started"))
   944  
   945  							c.Start()
   946  							defer c.Halt()
   947  
   948  							// pull block is called, so chain should be catching up now, WaitReady should block
   949  							signal <- struct{}{}
   950  
   951  							done := make(chan error)
   952  							go func() {
   953  								done <- c.WaitReady()
   954  							}()
   955  
   956  							Consistently(done).ShouldNot(Receive())
   957  							close(signal)                         // unblock block puller
   958  							Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked
   959  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   960  						})
   961  
   962  						It("commits block from snapshot if it's missing from ledger", func() {
   963  							// Scenario:
   964  							// Single node exists right after a snapshot is taken, while the block
   965  							// in it hasn't been successfully persisted into ledger (there can be one
   966  							// async block write in-flight). Then the node is restarted, and catches
   967  							// up using the block in snapshot.
   968  
   969  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   970  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   971  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   972  
   973  							chain.Halt()
   974  
   975  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
   976  							c.init()
   977  							c.Start()
   978  							defer c.Halt()
   979  
   980  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   981  						})
   982  
   983  						It("restores snapshot w/o extra entries", func() {
   984  							// Scenario:
   985  							// after a snapshot is taken, no more entries are appended.
   986  							// then node is restarted, it loads snapshot, finds its term
   987  							// and index. While replaying WAL to memory storage, it should
   988  							// not append any entry because no extra entry was appended
   989  							// after snapshot was taken.
   990  
   991  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   992  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   993  							_, metadata := support.WriteBlockArgsForCall(0)
   994  							m := &raftprotos.BlockMetadata{}
   995  							proto.Unmarshal(metadata, m)
   996  
   997  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   998  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1))
   999  							snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created
  1000  							Expect(err).NotTo(HaveOccurred())
  1001  							i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory
  1002  							Expect(err).NotTo(HaveOccurred())
  1003  
  1004  							// expect storage to preserve SnapshotCatchUpEntries entries before snapshot
  1005  							Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1))
  1006  
  1007  							chain.Halt()
  1008  
  1009  							raftMetadata.RaftIndex = m.RaftIndex
  1010  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
  1011  							c.opts.SnapshotIntervalSize = 1
  1012  
  1013  							c.init()
  1014  							c.Start()
  1015  
  1016  							// following arithmetic reflects how etcdraft MemoryStorage is implemented
  1017  							// when no entry is appended after snapshot being loaded.
  1018  							Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1))
  1019  							Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index))
  1020  
  1021  							// chain keeps functioning
  1022  							Eventually(func() <-chan raft.SoftState {
  1023  								c.clock.Increment(interval)
  1024  								return c.observe
  1025  							}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
  1026  
  1027  							c.cutter.CutNext = true
  1028  							err = c.Order(env, uint64(0))
  1029  							Expect(err).NotTo(HaveOccurred())
  1030  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1031  
  1032  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
  1033  							c.Halt()
  1034  
  1035  							_, metadata = c.support.WriteBlockArgsForCall(0)
  1036  							m = &raftprotos.BlockMetadata{}
  1037  							proto.Unmarshal(metadata, m)
  1038  							raftMetadata.RaftIndex = m.RaftIndex
  1039  							cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
  1040  
  1041  							cx.init()
  1042  							cx.Start()
  1043  							defer cx.Halt()
  1044  
  1045  							// chain keeps functioning
  1046  							Eventually(func() <-chan raft.SoftState {
  1047  								cx.clock.Increment(interval)
  1048  								return cx.observe
  1049  							}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
  1050  						})
  1051  					})
  1052  
  1053  					Context("Large SnapshotInterval", func() {
  1054  						BeforeEach(func() {
  1055  							opts.SnapshotIntervalSize = 1024
  1056  						})
  1057  
  1058  						It("restores snapshot w/ extra entries", func() {
  1059  							// Scenario:
  1060  							// after a snapshot is taken, more entries are appended.
  1061  							// then node is restarted, it loads snapshot, finds its term
  1062  							// and index. While replaying WAL to memory storage, it should
  1063  							// append some entries.
  1064  
  1065  							largeEnv := &common.Envelope{
  1066  								Payload: marshalOrPanic(&common.Payload{
  1067  									Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1068  									Data:   make([]byte, 500),
  1069  								}),
  1070  							}
  1071  
  1072  							By("Ordering two large envelopes to trigger snapshot")
  1073  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1074  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1075  
  1076  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1077  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1078  
  1079  							_, metadata := support.WriteBlockArgsForCall(1)
  1080  							m := &raftprotos.BlockMetadata{}
  1081  							proto.Unmarshal(metadata, m)
  1082  
  1083  							// check snapshot does exit
  1084  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1085  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1))
  1086  							snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created
  1087  							Expect(err).NotTo(HaveOccurred())
  1088  							i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory
  1089  							Expect(err).NotTo(HaveOccurred())
  1090  
  1091  							// expect storage to preserve SnapshotCatchUpEntries entries before snapshot
  1092  							Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1))
  1093  
  1094  							By("Ordering another envlope to append new data to memory after snaphost")
  1095  							Expect(chain.Order(env, uint64(0))).To(Succeed())
  1096  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  1097  
  1098  							lasti, _ := opts.MemoryStorage.LastIndex()
  1099  
  1100  							chain.Halt()
  1101  
  1102  							raftMetadata.RaftIndex = m.RaftIndex
  1103  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
  1104  							cnt := support.WriteBlockCallCount()
  1105  							for i := 0; i < cnt; i++ {
  1106  								c.support.WriteBlock(support.WriteBlockArgsForCall(i))
  1107  							}
  1108  
  1109  							By("Restarting the node")
  1110  							c.init()
  1111  							c.Start()
  1112  							defer c.Halt()
  1113  
  1114  							By("Checking latest index is larger than index in snapshot")
  1115  							Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1))
  1116  							Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti))
  1117  						})
  1118  
  1119  						When("local ledger is in sync with snapshot", func() {
  1120  							It("does not pull blocks and still respects snapshot interval", func() {
  1121  								// Scenario:
  1122  								// - snapshot is taken at block 2
  1123  								// - order one more envelope (block 3)
  1124  								// - reboot chain at block 2
  1125  								// - block 3 should be replayed from wal
  1126  								// - order another envelope to trigger snapshot, containing block 3 & 4
  1127  								// Assertions:
  1128  								// - block puller should NOT be called
  1129  								// - chain should keep functioning after reboot
  1130  								// - chain should respect snapshot interval to trigger next snapshot
  1131  
  1132  								largeEnv := &common.Envelope{
  1133  									Payload: marshalOrPanic(&common.Payload{
  1134  										Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1135  										Data:   make([]byte, 500),
  1136  									}),
  1137  								}
  1138  
  1139  								By("Ordering two large envelopes to trigger snapshot")
  1140  								Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1141  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1142  
  1143  								Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1144  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1145  
  1146  								Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1147  
  1148  								_, metadata := support.WriteBlockArgsForCall(1)
  1149  								m := &raftprotos.BlockMetadata{}
  1150  								proto.Unmarshal(metadata, m)
  1151  
  1152  								By("Cutting block [3]")
  1153  								// order another envelope. this should not trigger snapshot
  1154  								err = chain.Order(largeEnv, uint64(0))
  1155  								Expect(err).NotTo(HaveOccurred())
  1156  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  1157  
  1158  								chain.Halt()
  1159  
  1160  								raftMetadata.RaftIndex = m.RaftIndex
  1161  								c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
  1162  								// replay block 1&2
  1163  								c.support.WriteBlock(support.WriteBlockArgsForCall(0))
  1164  								c.support.WriteBlock(support.WriteBlockArgsForCall(1))
  1165  
  1166  								c.opts.SnapshotIntervalSize = 1024
  1167  
  1168  								By("Restarting node at block [2]")
  1169  								c.init()
  1170  								c.Start()
  1171  								defer c.Halt()
  1172  
  1173  								// elect leader
  1174  								campaign(c.Chain, c.observe)
  1175  
  1176  								By("Ordering one more block to trigger snapshot")
  1177  								c.cutter.CutNext = true
  1178  								err = c.Order(largeEnv, uint64(0))
  1179  								Expect(err).NotTo(HaveOccurred())
  1180  
  1181  								Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4))
  1182  								Expect(c.puller.PullBlockCallCount()).Should(BeZero())
  1183  								// old snapshot file is retained
  1184  								Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
  1185  							})
  1186  						})
  1187  
  1188  						It("respects snapshot interval after reboot", func() {
  1189  							largeEnv := &common.Envelope{
  1190  								Payload: marshalOrPanic(&common.Payload{
  1191  									Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1192  									Data:   make([]byte, 500),
  1193  								}),
  1194  							}
  1195  
  1196  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1197  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1198  							// check no snapshot is taken
  1199  							Consistently(countFiles).Should(Equal(0))
  1200  
  1201  							_, metadata := support.WriteBlockArgsForCall(0)
  1202  							m := &raftprotos.BlockMetadata{}
  1203  							proto.Unmarshal(metadata, m)
  1204  
  1205  							chain.Halt()
  1206  
  1207  							raftMetadata.RaftIndex = m.RaftIndex
  1208  							c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil)
  1209  							cnt := support.WriteBlockCallCount()
  1210  							for i := 0; i < cnt; i++ {
  1211  								c1.support.WriteBlock(support.WriteBlockArgsForCall(i))
  1212  							}
  1213  							c1.cutter.CutNext = true
  1214  							c1.opts.SnapshotIntervalSize = 1024
  1215  
  1216  							By("Restarting chain")
  1217  							c1.init()
  1218  							c1.Start()
  1219  							// chain keeps functioning
  1220  							campaign(c1.Chain, c1.observe)
  1221  
  1222  							Expect(c1.Order(largeEnv, uint64(0))).To(Succeed())
  1223  							Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1224  							// check snapshot does exit
  1225  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1226  						})
  1227  					})
  1228  				})
  1229  			})
  1230  
  1231  			Context("Invalid WAL dir", func() {
  1232  				var support = &consensusmocks.FakeConsenterSupport{}
  1233  				BeforeEach(func() {
  1234  					// for block creator initialization
  1235  					support.HeightReturns(1)
  1236  					support.BlockReturns(getSeedBlock())
  1237  				})
  1238  
  1239  				When("WAL dir is a file", func() {
  1240  					It("replaces file with fresh WAL dir", func() {
  1241  						f, err := ioutil.TempFile("", "wal-")
  1242  						Expect(err).NotTo(HaveOccurred())
  1243  						defer os.RemoveAll(f.Name())
  1244  
  1245  						chain, err := etcdraft.NewChain(
  1246  							support,
  1247  							etcdraft.Options{
  1248  								WALDir:        f.Name(),
  1249  								SnapDir:       snapDir,
  1250  								Logger:        logger,
  1251  								MemoryStorage: storage,
  1252  								BlockMetadata: &raftprotos.BlockMetadata{},
  1253  								Metrics:       newFakeMetrics(newFakeMetricsFields()),
  1254  							},
  1255  							configurator,
  1256  							nil,
  1257  							cryptoProvider,
  1258  							nil,
  1259  							nil,
  1260  							observeC)
  1261  						Expect(chain).NotTo(BeNil())
  1262  						Expect(err).NotTo(HaveOccurred())
  1263  
  1264  						info, err := os.Stat(f.Name())
  1265  						Expect(err).NotTo(HaveOccurred())
  1266  						Expect(info.IsDir()).To(BeTrue())
  1267  					})
  1268  				})
  1269  
  1270  				When("WAL dir is not writeable", func() {
  1271  					It("replace it with fresh WAL dir", func() {
  1272  						d, err := ioutil.TempDir("", "wal-")
  1273  						Expect(err).NotTo(HaveOccurred())
  1274  						defer os.RemoveAll(d)
  1275  
  1276  						err = os.Chmod(d, 0500)
  1277  						Expect(err).NotTo(HaveOccurred())
  1278  
  1279  						chain, err := etcdraft.NewChain(
  1280  							support,
  1281  							etcdraft.Options{
  1282  								WALDir:        d,
  1283  								SnapDir:       snapDir,
  1284  								Logger:        logger,
  1285  								MemoryStorage: storage,
  1286  								BlockMetadata: &raftprotos.BlockMetadata{},
  1287  								Metrics:       newFakeMetrics(newFakeMetricsFields()),
  1288  							},
  1289  							nil,
  1290  							nil,
  1291  							cryptoProvider,
  1292  							noOpBlockPuller,
  1293  							nil,
  1294  							nil)
  1295  						Expect(chain).NotTo(BeNil())
  1296  						Expect(err).NotTo(HaveOccurred())
  1297  					})
  1298  				})
  1299  
  1300  				When("WAL parent dir is not writeable", func() {
  1301  					It("fails to bootstrap fresh raft node", func() {
  1302  						skipIfRoot()
  1303  
  1304  						d, err := ioutil.TempDir("", "wal-")
  1305  						Expect(err).NotTo(HaveOccurred())
  1306  						defer os.RemoveAll(d)
  1307  
  1308  						err = os.Chmod(d, 0500)
  1309  						Expect(err).NotTo(HaveOccurred())
  1310  
  1311  						chain, err := etcdraft.NewChain(
  1312  							support,
  1313  							etcdraft.Options{
  1314  								WALDir:        path.Join(d, "wal-dir"),
  1315  								SnapDir:       snapDir,
  1316  								Logger:        logger,
  1317  								BlockMetadata: &raftprotos.BlockMetadata{},
  1318  							},
  1319  							nil,
  1320  							nil,
  1321  							cryptoProvider,
  1322  							noOpBlockPuller,
  1323  							nil,
  1324  							nil)
  1325  						Expect(chain).To(BeNil())
  1326  						Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir")))
  1327  					})
  1328  				})
  1329  			})
  1330  		})
  1331  	})
  1332  
  1333  	Describe("2-node Raft cluster", func() {
  1334  		var (
  1335  			network        *network
  1336  			channelID      string
  1337  			timeout        time.Duration
  1338  			dataDir        string
  1339  			c1, c2         *chain
  1340  			raftMetadata   *raftprotos.BlockMetadata
  1341  			consenters     map[uint64]*raftprotos.Consenter
  1342  			configEnv      *common.Envelope
  1343  			cryptoProvider bccsp.BCCSP
  1344  		)
  1345  		BeforeEach(func() {
  1346  			var err error
  1347  
  1348  			channelID = "multi-node-channel"
  1349  			timeout = 10 * time.Second
  1350  
  1351  			dataDir, err = ioutil.TempDir("", "raft-test-")
  1352  			Expect(err).NotTo(HaveOccurred())
  1353  
  1354  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
  1355  			Expect(err).NotTo(HaveOccurred())
  1356  
  1357  			raftMetadata = &raftprotos.BlockMetadata{
  1358  				ConsenterIds:    []uint64{1, 2},
  1359  				NextConsenterId: 3,
  1360  			}
  1361  
  1362  			consenters = map[uint64]*raftprotos.Consenter{
  1363  				1: {
  1364  					Host:          "localhost",
  1365  					Port:          7051,
  1366  					ClientTlsCert: clientTLSCert(tlsCA),
  1367  					ServerTlsCert: serverTLSCert(tlsCA),
  1368  				},
  1369  				2: {
  1370  					Host:          "localhost",
  1371  					Port:          7051,
  1372  					ClientTlsCert: clientTLSCert(tlsCA),
  1373  					ServerTlsCert: serverTLSCert(tlsCA),
  1374  				},
  1375  			}
  1376  
  1377  			metadata := &raftprotos.ConfigMetadata{
  1378  				Options: &raftprotos.Options{
  1379  					TickInterval:         "500ms",
  1380  					ElectionTick:         10,
  1381  					HeartbeatTick:        1,
  1382  					MaxInflightBlocks:    5,
  1383  					SnapshotIntervalSize: 200,
  1384  				},
  1385  				Consenters: []*raftprotos.Consenter{consenters[2]},
  1386  			}
  1387  			value := map[string]*common.ConfigValue{
  1388  				"ConsensusType": {
  1389  					Version: 1,
  1390  					Value: marshalOrPanic(&orderer.ConsensusType{
  1391  						Metadata: marshalOrPanic(metadata),
  1392  					}),
  1393  				},
  1394  			}
  1395  			// prepare config update to remove 1
  1396  			configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1397  
  1398  			network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA)
  1399  			c1, c2 = network.chains[1], network.chains[2]
  1400  			c1.cutter.CutNext = true
  1401  			network.init()
  1402  			network.start()
  1403  		})
  1404  
  1405  		AfterEach(func() {
  1406  			network.stop()
  1407  			network.exec(func(c *chain) {
  1408  				Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero())
  1409  			})
  1410  
  1411  			os.RemoveAll(dataDir)
  1412  		})
  1413  
  1414  		It("can remove leader by reconfiguring cluster", func() {
  1415  			network.elect(1)
  1416  
  1417  			// trigger status dissemination
  1418  			Eventually(func() int {
  1419  				c1.clock.Increment(interval)
  1420  				return c2.fakeFields.fakeActiveNodes.SetCallCount()
  1421  			}, LongEventualTimeout).Should(Equal(2))
  1422  			Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2)))
  1423  
  1424  			By("Configuring cluster to remove node")
  1425  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1426  			Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1427  			c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2)
  1428  
  1429  			Eventually(func() <-chan raft.SoftState {
  1430  				c2.clock.Increment(interval)
  1431  				return c2.observe
  1432  			}, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader)))
  1433  
  1434  			By("Asserting leader can still serve requests as single-node cluster")
  1435  			c2.cutter.CutNext = true
  1436  			Expect(c2.Order(env, 0)).To(Succeed())
  1437  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1438  		})
  1439  
  1440  		It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() {
  1441  			network.elect(1)
  1442  
  1443  			step1 := c1.getStepFunc()
  1444  			c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  1445  				stepMsg := &raftpb.Message{}
  1446  				if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil {
  1447  					return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err)
  1448  				}
  1449  
  1450  				if stepMsg.Type == raftpb.MsgTimeoutNow {
  1451  					return nil
  1452  				}
  1453  
  1454  				return step1(dest, msg)
  1455  			})
  1456  
  1457  			By("Configuring cluster to remove node")
  1458  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1459  			Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1460  			c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1461  			Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1462  
  1463  			c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1464  			Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed())
  1465  			close(c1.stopped) // mark c1 stopped in network
  1466  
  1467  			network.elect(2)
  1468  
  1469  			By("Asserting leader can still serve requests as single-node cluster")
  1470  			c2.cutter.CutNext = true
  1471  			Expect(c2.Order(env, 0)).To(Succeed())
  1472  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1473  		})
  1474  
  1475  		It("can remove follower by reconfiguring cluster", func() {
  1476  			network.elect(2)
  1477  
  1478  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1479  			network.exec(func(c *chain) {
  1480  				Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1481  			})
  1482  
  1483  			Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1484  			Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed())
  1485  
  1486  			By("Asserting leader can still serve requests as single-node cluster")
  1487  			c2.cutter.CutNext = true
  1488  			Expect(c2.Order(env, 0)).To(Succeed())
  1489  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1490  		})
  1491  	})
  1492  
  1493  	Describe("3-node Raft cluster", func() {
  1494  		var (
  1495  			network        *network
  1496  			channelID      string
  1497  			timeout        time.Duration
  1498  			dataDir        string
  1499  			c1, c2, c3     *chain
  1500  			raftMetadata   *raftprotos.BlockMetadata
  1501  			consenters     map[uint64]*raftprotos.Consenter
  1502  			cryptoProvider bccsp.BCCSP
  1503  		)
  1504  
  1505  		BeforeEach(func() {
  1506  			var err error
  1507  
  1508  			channelID = "multi-node-channel"
  1509  			timeout = 10 * time.Second
  1510  
  1511  			dataDir, err = ioutil.TempDir("", "raft-test-")
  1512  			Expect(err).NotTo(HaveOccurred())
  1513  
  1514  			raftMetadata = &raftprotos.BlockMetadata{
  1515  				ConsenterIds:    []uint64{1, 2, 3},
  1516  				NextConsenterId: 4,
  1517  			}
  1518  
  1519  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
  1520  			Expect(err).NotTo(HaveOccurred())
  1521  
  1522  			consenters = map[uint64]*raftprotos.Consenter{
  1523  				1: {
  1524  					Host:          "localhost",
  1525  					Port:          7051,
  1526  					ClientTlsCert: clientTLSCert(tlsCA),
  1527  					ServerTlsCert: serverTLSCert(tlsCA),
  1528  				},
  1529  				2: {
  1530  					Host:          "localhost",
  1531  					Port:          7051,
  1532  					ClientTlsCert: clientTLSCert(tlsCA),
  1533  					ServerTlsCert: serverTLSCert(tlsCA),
  1534  				},
  1535  				3: {
  1536  					Host:          "localhost",
  1537  					Port:          7051,
  1538  					ClientTlsCert: clientTLSCert(tlsCA),
  1539  					ServerTlsCert: serverTLSCert(tlsCA),
  1540  				},
  1541  			}
  1542  
  1543  			network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA)
  1544  			c1 = network.chains[1]
  1545  			c2 = network.chains[2]
  1546  			c3 = network.chains[3]
  1547  		})
  1548  
  1549  		AfterEach(func() {
  1550  			network.stop()
  1551  			network.exec(func(c *chain) {
  1552  				Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero())
  1553  			})
  1554  
  1555  			os.RemoveAll(dataDir)
  1556  		})
  1557  
  1558  		When("2/3 nodes are running", func() {
  1559  			It("late node can catch up", func() {
  1560  				network.init()
  1561  				network.start(1, 2)
  1562  				network.elect(1)
  1563  
  1564  				// trigger status dissemination
  1565  				Eventually(func() int {
  1566  					c1.clock.Increment(interval)
  1567  					return c2.fakeFields.fakeActiveNodes.SetCallCount()
  1568  				}, LongEventualTimeout).Should(Equal(2))
  1569  				Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2)))
  1570  
  1571  				c1.cutter.CutNext = true
  1572  				err := c1.Order(env, 0)
  1573  				Expect(err).NotTo(HaveOccurred())
  1574  
  1575  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1576  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1577  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1578  
  1579  				network.start(3)
  1580  
  1581  				c1.clock.Increment(interval)
  1582  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1583  
  1584  				network.stop()
  1585  			})
  1586  
  1587  			It("late node receives snapshot from leader", func() {
  1588  				c1.opts.SnapshotIntervalSize = 1
  1589  				c1.opts.SnapshotCatchUpEntries = 1
  1590  
  1591  				c1.cutter.CutNext = true
  1592  
  1593  				var blocksLock sync.Mutex
  1594  				blocks := make(map[uint64]*common.Block) // storing written blocks for block puller
  1595  
  1596  				c1.support.WriteBlockStub = func(b *common.Block, meta []byte) {
  1597  					blocksLock.Lock()
  1598  					defer blocksLock.Unlock()
  1599  					bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  1600  					Expect(err).NotTo(HaveOccurred())
  1601  					b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  1602  					blocks[b.Header.Number] = b
  1603  				}
  1604  
  1605  				c3.puller.PullBlockStub = func(i uint64) *common.Block {
  1606  					blocksLock.Lock()
  1607  					defer blocksLock.Unlock()
  1608  					b, exist := blocks[i]
  1609  					if !exist {
  1610  						return nil
  1611  					}
  1612  
  1613  					return b
  1614  				}
  1615  
  1616  				network.init()
  1617  				network.start(1, 2)
  1618  				network.elect(1)
  1619  
  1620  				err := c1.Order(env, 0)
  1621  				Expect(err).NotTo(HaveOccurred())
  1622  
  1623  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1624  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1625  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1626  
  1627  				err = c1.Order(env, 0)
  1628  				Expect(err).NotTo(HaveOccurred())
  1629  
  1630  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1631  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1632  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1633  
  1634  				network.start(3)
  1635  
  1636  				c1.clock.Increment(interval)
  1637  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1638  
  1639  				network.stop()
  1640  			})
  1641  		})
  1642  
  1643  		When("reconfiguring raft cluster", func() {
  1644  			const (
  1645  				defaultTimeout = 5 * time.Second
  1646  			)
  1647  			var (
  1648  				options = &raftprotos.Options{
  1649  					TickInterval:         "500ms",
  1650  					ElectionTick:         10,
  1651  					HeartbeatTick:        1,
  1652  					MaxInflightBlocks:    5,
  1653  					SnapshotIntervalSize: 200,
  1654  				}
  1655  				updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue {
  1656  					return map[string]*common.ConfigValue{
  1657  						"ConsensusType": {
  1658  							Version: 1,
  1659  							Value: marshalOrPanic(&orderer.ConsensusType{
  1660  								Metadata: marshalOrPanic(metadata),
  1661  							}),
  1662  						},
  1663  					}
  1664  				}
  1665  				addConsenterConfigValue = func() map[string]*common.ConfigValue {
  1666  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1667  					for _, consenter := range consenters {
  1668  						metadata.Consenters = append(metadata.Consenters, consenter)
  1669  					}
  1670  
  1671  					newConsenter := &raftprotos.Consenter{
  1672  						Host:          "localhost",
  1673  						Port:          7050,
  1674  						ServerTlsCert: serverTLSCert(tlsCA),
  1675  						ClientTlsCert: clientTLSCert(tlsCA),
  1676  					}
  1677  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1678  					return updateRaftConfigValue(metadata)
  1679  				}
  1680  				removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue {
  1681  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1682  					for nodeID, consenter := range consenters {
  1683  						if nodeID == id {
  1684  							continue
  1685  						}
  1686  						metadata.Consenters = append(metadata.Consenters, consenter)
  1687  					}
  1688  					return updateRaftConfigValue(metadata)
  1689  				}
  1690  				createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope {
  1691  					configEnv := newConfigEnv("another-channel",
  1692  						common.HeaderType_CONFIG,
  1693  						newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata)))
  1694  
  1695  					// Wrap config env in Orderer transaction
  1696  					return &common.Envelope{
  1697  						Payload: marshalOrPanic(&common.Payload{
  1698  							Header: &common.Header{
  1699  								ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  1700  									Type:      int32(common.HeaderType_ORDERER_TRANSACTION),
  1701  									ChannelId: channelID,
  1702  								}),
  1703  							},
  1704  							Data: marshalOrPanic(configEnv),
  1705  						}),
  1706  					}
  1707  				}
  1708  			)
  1709  
  1710  			BeforeEach(func() {
  1711  				network.exec(func(c *chain) {
  1712  					c.opts.EvictionSuspicion = time.Millisecond * 100
  1713  					c.opts.LeaderCheckInterval = time.Millisecond * 100
  1714  				})
  1715  
  1716  				network.init()
  1717  				network.start()
  1718  				network.elect(1)
  1719  
  1720  				By("Submitting first tx to cut the block")
  1721  				c1.cutter.CutNext = true
  1722  				err := c1.Order(env, 0)
  1723  				Expect(err).NotTo(HaveOccurred())
  1724  
  1725  				c1.clock.Increment(interval)
  1726  
  1727  				network.exec(
  1728  					func(c *chain) {
  1729  						Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1))
  1730  					})
  1731  			})
  1732  
  1733  			AfterEach(func() {
  1734  				network.stop()
  1735  			})
  1736  
  1737  			Context("channel creation", func() {
  1738  				It("succeeds with valid config metadata", func() {
  1739  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1740  					for _, consenter := range consenters {
  1741  						metadata.Consenters = append(metadata.Consenters, consenter)
  1742  					}
  1743  
  1744  					Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed())
  1745  					network.exec(func(c *chain) {
  1746  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1747  					})
  1748  				})
  1749  
  1750  			})
  1751  
  1752  			Context("reconfiguration", func() {
  1753  				It("can rotate certificate by adding and removing 1 node in one config update", func() {
  1754  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1755  					for id, consenter := range consenters {
  1756  						if id == 2 {
  1757  							// remove second consenter
  1758  							continue
  1759  						}
  1760  						metadata.Consenters = append(metadata.Consenters, consenter)
  1761  					}
  1762  
  1763  					// add new consenter
  1764  					newConsenter := &raftprotos.Consenter{
  1765  						Host:          "localhost",
  1766  						Port:          7050,
  1767  						ServerTlsCert: serverTLSCert(tlsCA),
  1768  						ClientTlsCert: clientTLSCert(tlsCA),
  1769  					}
  1770  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1771  
  1772  					value := map[string]*common.ConfigValue{
  1773  						"ConsensusType": {
  1774  							Version: 1,
  1775  							Value: marshalOrPanic(&orderer.ConsensusType{
  1776  								Metadata: marshalOrPanic(metadata),
  1777  							}),
  1778  						},
  1779  					}
  1780  
  1781  					By("creating new configuration with removed node and new one")
  1782  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1783  					c1.cutter.CutNext = true
  1784  
  1785  					By("sending config transaction")
  1786  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1787  
  1788  					network.exec(func(c *chain) {
  1789  						Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1790  					})
  1791  				})
  1792  
  1793  				It("rotates leader certificate and triggers leadership transfer", func() {
  1794  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1795  					for id, consenter := range consenters {
  1796  						if id == 1 {
  1797  							// remove second consenter
  1798  							continue
  1799  						}
  1800  						metadata.Consenters = append(metadata.Consenters, consenter)
  1801  					}
  1802  
  1803  					// add new consenter
  1804  					newConsenter := &raftprotos.Consenter{
  1805  						Host:          "localhost",
  1806  						Port:          7050,
  1807  						ServerTlsCert: serverTLSCert(tlsCA),
  1808  						ClientTlsCert: clientTLSCert(tlsCA),
  1809  					}
  1810  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1811  
  1812  					value := map[string]*common.ConfigValue{
  1813  						"ConsensusType": {
  1814  							Version: 1,
  1815  							Value: marshalOrPanic(&orderer.ConsensusType{
  1816  								Metadata: marshalOrPanic(metadata),
  1817  							}),
  1818  						},
  1819  					}
  1820  
  1821  					By("creating new configuration with removed node and new one")
  1822  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1823  					c1.cutter.CutNext = true
  1824  
  1825  					By("sending config transaction")
  1826  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1827  
  1828  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower()))
  1829  					network.exec(func(c *chain) {
  1830  						Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1831  					})
  1832  				})
  1833  
  1834  				When("Leader is disconnected after cert rotation", func() {
  1835  					It("still configures communication after failed leader transfer attempt", func() {
  1836  						metadata := &raftprotos.ConfigMetadata{Options: options}
  1837  						for id, consenter := range consenters {
  1838  							if id == 1 {
  1839  								// remove second consenter
  1840  								continue
  1841  							}
  1842  							metadata.Consenters = append(metadata.Consenters, consenter)
  1843  						}
  1844  
  1845  						// add new consenter
  1846  						newConsenter := &raftprotos.Consenter{
  1847  							Host:          "localhost",
  1848  							Port:          7050,
  1849  							ServerTlsCert: serverTLSCert(tlsCA),
  1850  							ClientTlsCert: clientTLSCert(tlsCA),
  1851  						}
  1852  						metadata.Consenters = append(metadata.Consenters, newConsenter)
  1853  
  1854  						value := map[string]*common.ConfigValue{
  1855  							"ConsensusType": {
  1856  								Version: 1,
  1857  								Value: marshalOrPanic(&orderer.ConsensusType{
  1858  									Metadata: marshalOrPanic(metadata),
  1859  								}),
  1860  							},
  1861  						}
  1862  
  1863  						By("creating new configuration with removed node and new one")
  1864  						configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1865  						c1.cutter.CutNext = true
  1866  
  1867  						step1 := c1.getStepFunc()
  1868  						count := c1.rpc.SendConsensusCallCount() // record current step call count
  1869  						c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  1870  							// disconnect network after 4 MsgApp are sent by c1:
  1871  							// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  1872  							// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  1873  							if c1.rpc.SendConsensusCallCount() == count+4 {
  1874  								defer network.disconnect(1)
  1875  							}
  1876  
  1877  							return step1(dest, msg)
  1878  						})
  1879  
  1880  						network.exec(func(c *chain) {
  1881  							Consistently(c.clock.WatcherCount).Should(Equal(1))
  1882  						})
  1883  
  1884  						By("sending config transaction")
  1885  						Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1886  
  1887  						Consistently(c1.observe).ShouldNot(Receive())
  1888  						network.exec(func(c *chain) {
  1889  							// wait for timeout timer to start
  1890  							c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1891  							Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1892  						})
  1893  					})
  1894  				})
  1895  
  1896  				When("Follower is disconnected while leader cert is being rotated", func() {
  1897  					It("still configures communication and transfer leader", func() {
  1898  						metadata := &raftprotos.ConfigMetadata{Options: options}
  1899  						for id, consenter := range consenters {
  1900  							if id == 1 {
  1901  								// remove second consenter
  1902  								continue
  1903  							}
  1904  							metadata.Consenters = append(metadata.Consenters, consenter)
  1905  						}
  1906  
  1907  						// add new consenter
  1908  						newConsenter := &raftprotos.Consenter{
  1909  							Host:          "localhost",
  1910  							Port:          7050,
  1911  							ServerTlsCert: serverTLSCert(tlsCA),
  1912  							ClientTlsCert: clientTLSCert(tlsCA),
  1913  						}
  1914  						metadata.Consenters = append(metadata.Consenters, newConsenter)
  1915  
  1916  						value := map[string]*common.ConfigValue{
  1917  							"ConsensusType": {
  1918  								Version: 1,
  1919  								Value: marshalOrPanic(&orderer.ConsensusType{
  1920  									Metadata: marshalOrPanic(metadata),
  1921  								}),
  1922  							},
  1923  						}
  1924  
  1925  						cnt := c1.rpc.SendConsensusCallCount()
  1926  						network.disconnect(3)
  1927  
  1928  						// Trigger some heartbeats to be sent so that leader notices
  1929  						// failed message delivery to 3, and mark it as Paused.
  1930  						// This is to ensure leadership is transferred to 2.
  1931  						Eventually(func() int {
  1932  							c1.clock.Increment(interval)
  1933  							return c1.rpc.SendConsensusCallCount()
  1934  						}, LongEventualTimeout).Should(BeNumerically(">=", cnt+5))
  1935  
  1936  						By("creating new configuration with removed node and new one")
  1937  						configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1938  						c1.cutter.CutNext = true
  1939  
  1940  						By("sending config transaction")
  1941  						Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1942  
  1943  						Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower)))
  1944  						network.Lock()
  1945  						network.leader = 2 // manually set network leader
  1946  						network.Unlock()
  1947  						network.disconnect(1)
  1948  
  1949  						network.exec(func(c *chain) {
  1950  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1951  							Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1952  						}, 1, 2)
  1953  
  1954  						network.join(3, true)
  1955  						Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1956  						Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1957  
  1958  						By("Ordering normal transaction")
  1959  						c2.cutter.CutNext = true
  1960  						Expect(c3.Order(env, 0)).To(Succeed())
  1961  						network.exec(func(c *chain) {
  1962  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1963  						}, 2, 3)
  1964  					})
  1965  				})
  1966  
  1967  				It("adding node to the cluster", func() {
  1968  					addConsenterUpdate := addConsenterConfigValue()
  1969  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate))
  1970  					c1.cutter.CutNext = true
  1971  
  1972  					By("sending config transaction")
  1973  					err := c1.Configure(configEnv, 0)
  1974  					Expect(err).NotTo(HaveOccurred())
  1975  					Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
  1976  					Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  1977  
  1978  					network.exec(func(c *chain) {
  1979  						Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1))
  1980  						Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2))
  1981  						Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4)))
  1982  					})
  1983  
  1984  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  1985  					meta := &common.Metadata{Value: raftmetabytes}
  1986  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  1987  					Expect(err).NotTo(HaveOccurred())
  1988  
  1989  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil)
  1990  					// if we join a node to existing network, it MUST already obtained blocks
  1991  					// till the config block that adds this node to cluster.
  1992  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  1993  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  1994  					c4.init()
  1995  
  1996  					network.addChain(c4)
  1997  					c4.Start()
  1998  
  1999  					// ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added
  2000  					// to leader's node list right away. An immediate tick does not trigger a heartbeat
  2001  					// being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins
  2002  					// the cluster successfully.
  2003  					Eventually(func() <-chan raft.SoftState {
  2004  						c1.clock.Increment(interval)
  2005  						return c4.observe
  2006  					}, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower})))
  2007  
  2008  					Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1))
  2009  					Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1))
  2010  
  2011  					By("submitting new transaction to follower")
  2012  					c1.cutter.CutNext = true
  2013  					err = c4.Order(env, 0)
  2014  					Expect(err).NotTo(HaveOccurred())
  2015  					Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2016  					Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2017  
  2018  					network.exec(func(c *chain) {
  2019  						Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2))
  2020  					})
  2021  				})
  2022  
  2023  				It("does not reconfigure raft cluster if it's a channel creation tx", func() {
  2024  					configEnv := newConfigEnv("another-channel",
  2025  						common.HeaderType_CONFIG,
  2026  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2)))
  2027  
  2028  					// Wrap config env in Orderer transaction
  2029  					channelCreationEnv := &common.Envelope{
  2030  						Payload: marshalOrPanic(&common.Payload{
  2031  							Header: &common.Header{
  2032  								ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  2033  									Type:      int32(common.HeaderType_ORDERER_TRANSACTION),
  2034  									ChannelId: channelID,
  2035  								}),
  2036  							},
  2037  							Data: marshalOrPanic(configEnv),
  2038  						}),
  2039  					}
  2040  
  2041  					c1.cutter.CutNext = true
  2042  
  2043  					Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed())
  2044  					network.exec(func(c *chain) {
  2045  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2046  					})
  2047  
  2048  					// assert c2 is not evicted
  2049  					Consistently(c2.Errored).ShouldNot(BeClosed())
  2050  					Expect(c2.Order(env, 0)).To(Succeed())
  2051  
  2052  					network.exec(func(c *chain) {
  2053  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2054  					})
  2055  				})
  2056  
  2057  				It("stop leader and continue reconfiguration failing over to new leader", func() {
  2058  					// Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader
  2059  					// configure chain support mock to disconnect c1 right after it writes configuration block
  2060  					// into the ledger, this to simulate failover.
  2061  					// Next boostraping a new node c4 to join a cluster and creating config transaction, submitting
  2062  					// it to the leader. Once leader writes configuration block it fails and leadership transferred to
  2063  					// c2.
  2064  					// Test asserts that new node c4, will join the cluster and c2 will handle failover of
  2065  					// re-configuration. Later we connecting c1 back and making sure it capable of catching up with
  2066  					// new configuration and successfully rejoins replica set.
  2067  
  2068  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2069  					c1.cutter.CutNext = true
  2070  
  2071  					step1 := c1.getStepFunc()
  2072  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2073  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2074  						// disconnect network after 4 MsgApp are sent by c1:
  2075  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2076  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2077  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2078  							defer network.disconnect(1)
  2079  						}
  2080  
  2081  						return step1(dest, msg)
  2082  					})
  2083  
  2084  					By("sending config transaction")
  2085  					err := c1.Configure(configEnv, 0)
  2086  					Expect(err).NotTo(HaveOccurred())
  2087  
  2088  					// every node has written config block to the OSN ledger
  2089  					network.exec(
  2090  						func(c *chain) {
  2091  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2092  						})
  2093  
  2094  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2095  					c1.setStepFunc(step1)
  2096  
  2097  					// elect node with higher index
  2098  					i2, _ := c2.storage.LastIndex() // err is always nil
  2099  					i3, _ := c3.storage.LastIndex()
  2100  					candidate := uint64(2)
  2101  					if i3 > i2 {
  2102  						candidate = 3
  2103  					}
  2104  					network.chains[candidate].cutter.CutNext = true
  2105  					network.elect(candidate)
  2106  
  2107  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2108  					meta := &common.Metadata{Value: raftmetabytes}
  2109  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2110  					Expect(err).NotTo(HaveOccurred())
  2111  
  2112  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil)
  2113  					// if we join a node to existing network, it MUST already obtained blocks
  2114  					// till the config block that adds this node to cluster.
  2115  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2116  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2117  					c4.init()
  2118  
  2119  					network.addChain(c4)
  2120  					c4.start()
  2121  					Expect(c4.WaitReady()).To(Succeed())
  2122  					network.join(4, true)
  2123  
  2124  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2125  					Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2126  
  2127  					By("submitting new transaction to follower")
  2128  					err = c4.Order(env, 0)
  2129  					Expect(err).NotTo(HaveOccurred())
  2130  
  2131  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2132  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2133  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2134  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2135  
  2136  					// node 1 has been stopped should not write any block
  2137  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2138  
  2139  					network.join(1, true)
  2140  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2141  				})
  2142  
  2143  				It("stop cluster quorum and continue reconfiguration after the restart", func() {
  2144  					// Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader
  2145  					// configure chain support mock to stop cluster after config block is committed.
  2146  					// Restart the cluster and ensure it picks up updates and capable to finish reconfiguration.
  2147  
  2148  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2149  					c1.cutter.CutNext = true
  2150  
  2151  					step1 := c1.getStepFunc()
  2152  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2153  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2154  						// disconnect network after 4 MsgApp are sent by c1:
  2155  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2156  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2157  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2158  							defer func() {
  2159  								network.disconnect(1)
  2160  								network.disconnect(2)
  2161  								network.disconnect(3)
  2162  							}()
  2163  						}
  2164  
  2165  						return step1(dest, msg)
  2166  					})
  2167  
  2168  					By("sending config transaction")
  2169  					err := c1.Configure(configEnv, 0)
  2170  					Expect(err).NotTo(HaveOccurred())
  2171  
  2172  					// every node has written config block to the OSN ledger
  2173  					network.exec(
  2174  						func(c *chain) {
  2175  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2176  						})
  2177  
  2178  					// assert conf change proposals have been dropped, before proceed to reconnect network
  2179  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2180  					c1.setStepFunc(step1)
  2181  
  2182  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2183  					meta := &common.Metadata{Value: raftmetabytes}
  2184  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2185  					Expect(err).NotTo(HaveOccurred())
  2186  
  2187  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil)
  2188  					// if we join a node to existing network, it MUST already obtained blocks
  2189  					// till the config block that adds this node to cluster.
  2190  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2191  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2192  					c4.init()
  2193  
  2194  					network.addChain(c4)
  2195  
  2196  					By("reconnecting nodes back")
  2197  					for i := uint64(1); i < 4; i++ {
  2198  						network.connect(i)
  2199  					}
  2200  
  2201  					// elect node with higher index
  2202  					i2, _ := c2.storage.LastIndex() // err is always nil
  2203  					i3, _ := c3.storage.LastIndex()
  2204  					candidate := uint64(2)
  2205  					if i3 > i2 {
  2206  						candidate = 3
  2207  					}
  2208  					network.chains[candidate].cutter.CutNext = true
  2209  					network.elect(candidate)
  2210  
  2211  					c4.start()
  2212  					Expect(c4.WaitReady()).To(Succeed())
  2213  					network.join(4, false)
  2214  
  2215  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2216  					Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2217  
  2218  					By("submitting new transaction to follower")
  2219  					err = c4.Order(env, 0)
  2220  					Expect(err).NotTo(HaveOccurred())
  2221  
  2222  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2223  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2224  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2225  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2226  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2227  				})
  2228  
  2229  				It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() {
  2230  					// Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader.
  2231  					// Prepare config update transaction which removes leader (nodeID = 1), then leader
  2232  					// fails right after it commits configuration block.
  2233  
  2234  					configEnv := newConfigEnv(channelID,
  2235  						common.HeaderType_CONFIG,
  2236  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1
  2237  
  2238  					c1.cutter.CutNext = true
  2239  
  2240  					step1 := c1.getStepFunc()
  2241  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2242  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2243  						// disconnect network after 4 MsgApp are sent by c1:
  2244  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2245  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2246  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2247  							defer network.disconnect(1)
  2248  						}
  2249  
  2250  						return step1(dest, msg)
  2251  					})
  2252  
  2253  					By("sending config transaction")
  2254  					err := c1.Configure(configEnv, 0)
  2255  					Expect(err).NotTo(HaveOccurred())
  2256  
  2257  					network.exec(func(c *chain) {
  2258  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2259  					})
  2260  
  2261  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2262  					c1.setStepFunc(step1)
  2263  
  2264  					// elect node with higher index
  2265  					i2, _ := c2.storage.LastIndex() // err is always nil
  2266  					i3, _ := c3.storage.LastIndex()
  2267  					candidate := uint64(2)
  2268  					if i3 > i2 {
  2269  						candidate = 3
  2270  					}
  2271  					network.chains[candidate].cutter.CutNext = true
  2272  					network.elect(candidate)
  2273  
  2274  					By("submitting new transaction to follower")
  2275  					err = c3.Order(env, 0)
  2276  					Expect(err).NotTo(HaveOccurred())
  2277  
  2278  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2279  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2280  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2281  				})
  2282  
  2283  				It("removes leader from replica set", func() {
  2284  					// Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader.
  2285  					// Prepare config update transaction which removes leader (nodeID = 1), this to
  2286  					// ensure we handle re-configuration of node removal correctly and remaining two
  2287  					// nodes still capable to form functional quorum and Raft capable of making further progress.
  2288  					// Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e.
  2289  					// should not be able to get updates or forward transactions.
  2290  
  2291  					configEnv := newConfigEnv(channelID,
  2292  						common.HeaderType_CONFIG,
  2293  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1
  2294  
  2295  					c1.cutter.CutNext = true
  2296  
  2297  					By("sending config transaction")
  2298  					err := c1.Configure(configEnv, 0)
  2299  					Expect(err).NotTo(HaveOccurred())
  2300  
  2301  					// every node has written config block to the OSN ledger
  2302  					network.exec(
  2303  						func(c *chain) {
  2304  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2305  							Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2))
  2306  							Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2)))
  2307  						})
  2308  
  2309  					// Assert c1 has exited
  2310  					c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2)
  2311  					Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed())
  2312  					close(c1.stopped)
  2313  
  2314  					var newLeader, remainingFollower *chain
  2315  					for newLeader == nil || remainingFollower == nil {
  2316  						var state raft.SoftState
  2317  						select {
  2318  						case state = <-c2.observe:
  2319  						case state = <-c3.observe:
  2320  						case <-time.After(LongEventualTimeout):
  2321  							Fail("Expected a new leader to present")
  2322  						}
  2323  
  2324  						if state.RaftState == raft.StateLeader && state.Lead != raft.None {
  2325  							newLeader = network.chains[state.Lead]
  2326  						}
  2327  
  2328  						if state.RaftState == raft.StateFollower && state.Lead != raft.None {
  2329  							remainingFollower = network.chains[state.Lead]
  2330  						}
  2331  					}
  2332  
  2333  					By("submitting transaction to new leader")
  2334  					newLeader.cutter.CutNext = true
  2335  					err = newLeader.Order(env, 0)
  2336  					Expect(err).NotTo(HaveOccurred())
  2337  
  2338  					Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2339  					Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2340  					// node 1 has been stopped should not write any block
  2341  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2342  
  2343  					By("trying to submit to new node, expected to fail")
  2344  					c1.cutter.CutNext = true
  2345  					err = c1.Order(env, 0)
  2346  					Expect(err).To(HaveOccurred())
  2347  
  2348  					// number of block writes should remain the same
  2349  					Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2))
  2350  					Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2))
  2351  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2352  				})
  2353  
  2354  				It("does not deadlock if leader steps down while config block is in-flight", func() {
  2355  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2356  					c1.cutter.CutNext = true
  2357  
  2358  					signal := make(chan struct{})
  2359  					stub := c1.support.WriteConfigBlockStub
  2360  					c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) {
  2361  						signal <- struct{}{}
  2362  						<-signal
  2363  						stub(b, meta)
  2364  					}
  2365  
  2366  					By("Sending config transaction")
  2367  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  2368  
  2369  					Eventually(signal, LongEventualTimeout).Should(Receive())
  2370  					network.disconnect(1)
  2371  
  2372  					By("Ticking leader till it steps down")
  2373  					Eventually(func() raft.SoftState {
  2374  						c1.clock.Increment(interval)
  2375  						return c1.Node.Status().SoftState
  2376  					}, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower))
  2377  
  2378  					close(signal)
  2379  
  2380  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower)))
  2381  
  2382  					By("Re-electing 1 as leader")
  2383  					network.connect(1)
  2384  					network.elect(1)
  2385  
  2386  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2387  					meta := &common.Metadata{Value: raftmetabytes}
  2388  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2389  					Expect(err).NotTo(HaveOccurred())
  2390  
  2391  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil)
  2392  					// if we join a node to existing network, it MUST already obtained blocks
  2393  					// till the config block that adds this node to cluster.
  2394  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2395  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2396  					c4.init()
  2397  
  2398  					network.addChain(c4)
  2399  					c4.Start()
  2400  
  2401  					Eventually(func() <-chan raft.SoftState {
  2402  						c1.clock.Increment(interval)
  2403  						return c4.observe
  2404  					}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower)))
  2405  
  2406  					By("Submitting tx to confirm network is still working")
  2407  					Expect(c1.Order(env, 0)).To(Succeed())
  2408  
  2409  					network.exec(func(c *chain) {
  2410  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2411  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2412  					})
  2413  				})
  2414  			})
  2415  		})
  2416  
  2417  		When("3/3 nodes are running", func() {
  2418  			JustBeforeEach(func() {
  2419  				network.init()
  2420  				network.start()
  2421  				network.elect(1)
  2422  			})
  2423  
  2424  			AfterEach(func() {
  2425  				network.stop()
  2426  			})
  2427  
  2428  			It("correctly sets the cluster size and leadership metrics", func() {
  2429  				// the network should see only one leadership change
  2430  				network.exec(func(c *chain) {
  2431  					Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1))
  2432  					Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1)))
  2433  					Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1))
  2434  					Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3)))
  2435  				})
  2436  				// c1 should be the leader
  2437  				Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2))
  2438  				Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1)))
  2439  				// c2 and c3 should continue to remain followers
  2440  				Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2441  				Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0)))
  2442  				Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2443  				Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0)))
  2444  			})
  2445  
  2446  			It("orders envelope on leader", func() {
  2447  				By("instructed to cut next block")
  2448  				c1.cutter.CutNext = true
  2449  				err := c1.Order(env, 0)
  2450  				Expect(err).NotTo(HaveOccurred())
  2451  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2452  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2453  
  2454  				network.exec(
  2455  					func(c *chain) {
  2456  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2457  					})
  2458  
  2459  				By("respect batch timeout")
  2460  				c1.cutter.CutNext = false
  2461  
  2462  				err = c1.Order(env, 0)
  2463  				Expect(err).NotTo(HaveOccurred())
  2464  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
  2465  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
  2466  				Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  2467  
  2468  				c1.clock.WaitForNWatchersAndIncrement(timeout, 2)
  2469  				network.exec(
  2470  					func(c *chain) {
  2471  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2472  					})
  2473  			})
  2474  
  2475  			It("orders envelope on follower", func() {
  2476  				By("instructed to cut next block")
  2477  				c1.cutter.CutNext = true
  2478  				err := c2.Order(env, 0)
  2479  				Expect(err).NotTo(HaveOccurred())
  2480  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2481  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2482  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2483  
  2484  				network.exec(
  2485  					func(c *chain) {
  2486  						Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2487  					})
  2488  
  2489  				By("respect batch timeout")
  2490  				c1.cutter.CutNext = false
  2491  
  2492  				err = c2.Order(env, 0)
  2493  				Expect(err).NotTo(HaveOccurred())
  2494  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
  2495  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
  2496  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2497  				Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  2498  
  2499  				c1.clock.WaitForNWatchersAndIncrement(timeout, 2)
  2500  				network.exec(
  2501  					func(c *chain) {
  2502  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2503  					})
  2504  			})
  2505  
  2506  			When("MaxInflightBlocks is reached", func() {
  2507  				BeforeEach(func() {
  2508  					network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 })
  2509  				})
  2510  
  2511  				It("waits for in flight blocks to be committed", func() {
  2512  					c1.cutter.CutNext = true
  2513  					// disconnect c1 to disrupt consensus
  2514  					network.disconnect(1)
  2515  
  2516  					Expect(c1.Order(env, 0)).To(Succeed())
  2517  
  2518  					doneProp := make(chan struct{})
  2519  					go func() {
  2520  						defer GinkgoRecover()
  2521  						Expect(c1.Order(env, 0)).To(Succeed())
  2522  						close(doneProp)
  2523  					}()
  2524  					// expect second `Order` to block
  2525  					Consistently(doneProp).ShouldNot(BeClosed())
  2526  					network.exec(func(c *chain) {
  2527  						Consistently(c.support.WriteBlockCallCount).Should(BeZero())
  2528  					})
  2529  
  2530  					network.connect(1)
  2531  					c1.clock.Increment(interval)
  2532  
  2533  					Eventually(doneProp, LongEventualTimeout).Should(BeClosed())
  2534  					network.exec(func(c *chain) {
  2535  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2536  					})
  2537  				})
  2538  
  2539  				It("resets block in flight when steps down from leader", func() {
  2540  					c1.cutter.CutNext = true
  2541  					c2.cutter.CutNext = true
  2542  					// disconnect c1 to disrupt consensus
  2543  					network.disconnect(1)
  2544  
  2545  					Expect(c1.Order(env, 0)).To(Succeed())
  2546  
  2547  					doneProp := make(chan struct{})
  2548  					go func() {
  2549  						defer GinkgoRecover()
  2550  
  2551  						Expect(c1.Order(env, 0)).To(Succeed())
  2552  						close(doneProp)
  2553  					}()
  2554  					// expect second `Order` to block
  2555  					Consistently(doneProp).ShouldNot(BeClosed())
  2556  					network.exec(func(c *chain) {
  2557  						Consistently(c.support.WriteBlockCallCount).Should(BeZero())
  2558  					})
  2559  
  2560  					network.elect(2)
  2561  					Expect(c3.Order(env, 0)).To(Succeed())
  2562  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  2563  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2564  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2565  
  2566  					network.connect(1)
  2567  					c2.clock.Increment(interval)
  2568  
  2569  					Eventually(doneProp, LongEventualTimeout).Should(BeClosed())
  2570  					network.exec(func(c *chain) {
  2571  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2572  					})
  2573  				})
  2574  			})
  2575  
  2576  			When("leader is disconnected", func() {
  2577  				It("proactively steps down to follower", func() {
  2578  					network.disconnect(1)
  2579  
  2580  					By("Ticking leader until it steps down")
  2581  					Eventually(func() <-chan raft.SoftState {
  2582  						c1.clock.Increment(interval)
  2583  						return c1.observe
  2584  					}, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower})))
  2585  
  2586  					By("Ensuring it does not accept message due to the cluster being leaderless")
  2587  					err := c1.Order(env, 0)
  2588  					Expect(err).To(MatchError("no Raft leader"))
  2589  
  2590  					network.elect(2)
  2591  
  2592  					// c1 should have lost leadership
  2593  					Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3))
  2594  					Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0)))
  2595  					// c2 should become the leader
  2596  					Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2))
  2597  					Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1)))
  2598  					// c2 should continue to remain follower
  2599  					Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2600  
  2601  					network.join(1, true)
  2602  					network.exec(func(c *chain) {
  2603  						Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3))
  2604  						Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1)))
  2605  					})
  2606  
  2607  					err = c1.Order(env, 0)
  2608  					Expect(err).NotTo(HaveOccurred())
  2609  				})
  2610  
  2611  				It("does not deadlock if propose is blocked", func() {
  2612  					signal := make(chan struct{})
  2613  					c1.cutter.CutNext = true
  2614  					c1.support.SequenceStub = func() uint64 {
  2615  						signal <- struct{}{}
  2616  						<-signal
  2617  						return 0
  2618  					}
  2619  
  2620  					By("Sending a normal transaction")
  2621  					Expect(c1.Order(env, 0)).To(Succeed())
  2622  
  2623  					Eventually(signal).Should(Receive())
  2624  					network.disconnect(1)
  2625  
  2626  					By("Ticking leader till it steps down")
  2627  					Eventually(func() raft.SoftState {
  2628  						c1.clock.Increment(interval)
  2629  						return c1.Node.Status().SoftState
  2630  					}).Should(StateEqual(0, raft.StateFollower))
  2631  
  2632  					close(signal)
  2633  
  2634  					Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower)))
  2635  					c1.support.SequenceStub = nil
  2636  					network.exec(func(c *chain) {
  2637  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  2638  					})
  2639  
  2640  					By("Re-electing 1 as leader")
  2641  					network.connect(1)
  2642  					network.elect(1)
  2643  
  2644  					By("Sending another normal transaction")
  2645  					Expect(c1.Order(env, 0)).To(Succeed())
  2646  
  2647  					network.exec(func(c *chain) {
  2648  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2649  					})
  2650  				})
  2651  			})
  2652  
  2653  			When("follower is disconnected", func() {
  2654  				It("should return error when receiving an env", func() {
  2655  					network.disconnect(2)
  2656  
  2657  					errorC := c2.Errored()
  2658  					Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed
  2659  
  2660  					By("Ticking node 2 until it becomes pre-candidate")
  2661  					Eventually(func() <-chan raft.SoftState {
  2662  						c2.clock.Increment(interval)
  2663  						return c2.observe
  2664  					}, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate})))
  2665  
  2666  					Eventually(errorC).Should(BeClosed())
  2667  					err := c2.Order(env, 0)
  2668  					Expect(err).To(HaveOccurred())
  2669  					Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2670  					Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2671  					Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2672  
  2673  					network.connect(2)
  2674  					c1.clock.Increment(interval)
  2675  					Expect(errorC).To(BeClosed())
  2676  
  2677  					Eventually(c2.Errored).ShouldNot(BeClosed())
  2678  				})
  2679  			})
  2680  
  2681  			It("leader retransmits lost messages", func() {
  2682  				// This tests that heartbeats will trigger leader to retransmit lost MsgApp
  2683  
  2684  				c1.cutter.CutNext = true
  2685  
  2686  				network.disconnect(1) // drop MsgApp
  2687  
  2688  				err := c1.Order(env, 0)
  2689  				Expect(err).NotTo(HaveOccurred())
  2690  
  2691  				network.exec(
  2692  					func(c *chain) {
  2693  						Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0))
  2694  					})
  2695  
  2696  				network.connect(1) // reconnect leader
  2697  
  2698  				c1.clock.Increment(interval) // trigger a heartbeat
  2699  				network.exec(
  2700  					func(c *chain) {
  2701  						Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2702  					})
  2703  			})
  2704  
  2705  			It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() {
  2706  				// this ensures that the created blocks are not written out
  2707  				network.disconnect(1)
  2708  
  2709  				c1.cutter.CutNext = true
  2710  				for i := 0; i < 3; i++ {
  2711  					Expect(c1.Order(env, 0)).To(Succeed())
  2712  				}
  2713  
  2714  				Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  2715  
  2716  				network.connect(1)
  2717  
  2718  				// After FAB-13722, leader would pause replication if it gets notified that message
  2719  				// delivery to certain node is failed, i.e. connection refused. Replication to that
  2720  				// follower is resumed if leader receives a MsgHeartbeatResp from it.
  2721  				// We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we
  2722  				// would also risk a slow leader stepping down due to excessive ticks.
  2723  				//
  2724  				// Instead, we can simply send artificial MsgHeartbeatResp to leader to resume.
  2725  				m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp}
  2726  				c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id)
  2727  				m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp}
  2728  				c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id)
  2729  
  2730  				network.exec(func(c *chain) {
  2731  					Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  2732  				})
  2733  			})
  2734  
  2735  			It("new leader should wait for in-fight blocks to commit before accepting new env", func() {
  2736  				// Scenario: when a node is elected as new leader and there are still in-flight blocks,
  2737  				// it should not immediately start accepting new envelopes, instead it should wait for
  2738  				// those in-flight blocks to be committed, otherwise we may create uncle block which
  2739  				// forks and panicks chain.
  2740  				//
  2741  				// Steps:
  2742  				// - start raft cluster with three nodes and genesis block0
  2743  				// - order env1 on c1, which creates block1
  2744  				// - drop MsgApp from 1 to 3
  2745  				// - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1
  2746  				// - disconnect c1 and elect c2
  2747  				// - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create
  2748  				//   an uncle block1 based on block0.
  2749  				// - c2 commits block1
  2750  				// - c2 accepts env2, and creates block2
  2751  				// - c2 commits block2
  2752  				c1.cutter.CutNext = true
  2753  				c2.cutter.CutNext = true
  2754  
  2755  				step1 := c1.getStepFunc()
  2756  				c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2757  					stepMsg := &raftpb.Message{}
  2758  					Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred())
  2759  
  2760  					if dest == 3 {
  2761  						return nil
  2762  					}
  2763  
  2764  					if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 {
  2765  						return nil
  2766  					}
  2767  
  2768  					return step1(dest, msg)
  2769  				})
  2770  
  2771  				Expect(c1.Order(env, 0)).NotTo(HaveOccurred())
  2772  
  2773  				Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2774  				Consistently(c2.support.WriteBlockCallCount).Should(Equal(0))
  2775  				Consistently(c3.support.WriteBlockCallCount).Should(Equal(0))
  2776  
  2777  				network.disconnect(1)
  2778  
  2779  				step2 := c2.getStepFunc()
  2780  				c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2781  					stepMsg := &raftpb.Message{}
  2782  					Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred())
  2783  
  2784  					if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 {
  2785  						for _, ent := range stepMsg.Entries {
  2786  							if len(ent.Data) != 0 {
  2787  								return nil
  2788  							}
  2789  						}
  2790  					}
  2791  					return step2(dest, msg)
  2792  				})
  2793  
  2794  				network.elect(2)
  2795  
  2796  				go func() {
  2797  					defer GinkgoRecover()
  2798  					Expect(c2.Order(env, 0)).NotTo(HaveOccurred())
  2799  				}()
  2800  
  2801  				Consistently(c2.support.WriteBlockCallCount).Should(Equal(0))
  2802  				Consistently(c3.support.WriteBlockCallCount).Should(Equal(0))
  2803  
  2804  				c2.setStepFunc(step2)
  2805  				c2.clock.Increment(interval)
  2806  
  2807  				Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2808  				Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2809  
  2810  				b, _ := c2.support.WriteBlockArgsForCall(0)
  2811  				Expect(b.Header.Number).To(Equal(uint64(1)))
  2812  				b, _ = c2.support.WriteBlockArgsForCall(1)
  2813  				Expect(b.Header.Number).To(Equal(uint64(2)))
  2814  			})
  2815  
  2816  			Context("handling config blocks", func() {
  2817  				var configEnv *common.Envelope
  2818  				BeforeEach(func() {
  2819  					values := map[string]*common.ConfigValue{
  2820  						"BatchTimeout": {
  2821  							Version: 1,
  2822  							Value: marshalOrPanic(&orderer.BatchTimeout{
  2823  								Timeout: "3ms",
  2824  							}),
  2825  						},
  2826  					}
  2827  					configEnv = newConfigEnv(channelID,
  2828  						common.HeaderType_CONFIG,
  2829  						newConfigUpdateEnv(channelID, nil, values),
  2830  					)
  2831  				})
  2832  
  2833  				It("holds up block creation on leader once a config block has been created and not written out", func() {
  2834  					// this ensures that the created blocks are not written out
  2835  					network.disconnect(1)
  2836  
  2837  					c1.cutter.CutNext = true
  2838  					// config block
  2839  					err := c1.Order(configEnv, 0)
  2840  					Expect(err).NotTo(HaveOccurred())
  2841  
  2842  					// to avoid data races since we are accessing these within a goroutine
  2843  					tempEnv := env
  2844  					tempC1 := c1
  2845  
  2846  					done := make(chan struct{})
  2847  
  2848  					// normal block
  2849  					go func() {
  2850  						defer GinkgoRecover()
  2851  
  2852  						// This should be blocked if config block is not committed
  2853  						err := tempC1.Order(tempEnv, 0)
  2854  						Expect(err).NotTo(HaveOccurred())
  2855  
  2856  						close(done)
  2857  					}()
  2858  
  2859  					Consistently(done).ShouldNot(BeClosed())
  2860  
  2861  					network.connect(1)
  2862  					c1.clock.Increment(interval)
  2863  
  2864  					network.exec(
  2865  						func(c *chain) {
  2866  							Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2867  						})
  2868  
  2869  					network.exec(
  2870  						func(c *chain) {
  2871  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2872  						})
  2873  				})
  2874  
  2875  				It("continues creating blocks on leader after a config block has been successfully written out", func() {
  2876  					c1.cutter.CutNext = true
  2877  					// config block
  2878  					err := c1.Configure(configEnv, 0)
  2879  					Expect(err).NotTo(HaveOccurred())
  2880  					network.exec(
  2881  						func(c *chain) {
  2882  							Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2883  						})
  2884  
  2885  					// normal block following config block
  2886  					err = c1.Order(env, 0)
  2887  					Expect(err).NotTo(HaveOccurred())
  2888  					network.exec(
  2889  						func(c *chain) {
  2890  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2891  						})
  2892  				})
  2893  			})
  2894  
  2895  			When("Snapshotting is enabled", func() {
  2896  				BeforeEach(func() {
  2897  					c1.opts.SnapshotIntervalSize = 1
  2898  					c1.opts.SnapshotCatchUpEntries = 1
  2899  				})
  2900  
  2901  				It("keeps running if some entries in memory are purged", func() {
  2902  					// Scenario: snapshotting is enabled on node 1 and it purges memory storage
  2903  					// per every snapshot. Cluster should be correctly functioning.
  2904  
  2905  					i, err := c1.opts.MemoryStorage.FirstIndex()
  2906  					Expect(err).NotTo(HaveOccurred())
  2907  					Expect(i).To(Equal(uint64(1)))
  2908  
  2909  					c1.cutter.CutNext = true
  2910  
  2911  					err = c1.Order(env, 0)
  2912  					Expect(err).NotTo(HaveOccurred())
  2913  
  2914  					network.exec(
  2915  						func(c *chain) {
  2916  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2917  						})
  2918  
  2919  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  2920  					i, err = c1.opts.MemoryStorage.FirstIndex()
  2921  					Expect(err).NotTo(HaveOccurred())
  2922  
  2923  					err = c1.Order(env, 0)
  2924  					Expect(err).NotTo(HaveOccurred())
  2925  
  2926  					network.exec(
  2927  						func(c *chain) {
  2928  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2929  						})
  2930  
  2931  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  2932  					i, err = c1.opts.MemoryStorage.FirstIndex()
  2933  					Expect(err).NotTo(HaveOccurred())
  2934  
  2935  					err = c1.Order(env, 0)
  2936  					Expect(err).NotTo(HaveOccurred())
  2937  
  2938  					network.exec(
  2939  						func(c *chain) {
  2940  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  2941  						})
  2942  
  2943  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  2944  				})
  2945  
  2946  				It("lagged node can catch up using snapshot", func() {
  2947  					network.disconnect(2)
  2948  					c1.cutter.CutNext = true
  2949  
  2950  					c2Lasti, _ := c2.opts.MemoryStorage.LastIndex()
  2951  					var blockCnt int
  2952  					// Order blocks until first index of c1 memory is greater than last index of c2,
  2953  					// so a snapshot will be sent to c2 when it rejoins network
  2954  					Eventually(func() bool {
  2955  						c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex()
  2956  						if c1Firsti > c2Lasti+1 {
  2957  							return true
  2958  						}
  2959  
  2960  						Expect(c1.Order(env, 0)).To(Succeed())
  2961  						blockCnt++
  2962  						Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  2963  						Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  2964  						return false
  2965  					}, LongEventualTimeout).Should(BeTrue())
  2966  
  2967  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  2968  
  2969  					network.join(2, false)
  2970  
  2971  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  2972  					indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir)
  2973  					Expect(indices).To(HaveLen(1))
  2974  					gap := indices[0] - c2Lasti
  2975  
  2976  					// TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector,
  2977  					// which calls block puller, is still replying on real clock, and sometimes increment puller
  2978  					// call count. Therefore we are being more lenient here until suspector starts using fake clock
  2979  					// so we have more deterministic control over it.
  2980  					Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap)))
  2981  
  2982  					// chain should keeps functioning
  2983  					Expect(c2.Order(env, 0)).To(Succeed())
  2984  
  2985  					network.exec(
  2986  						func(c *chain) {
  2987  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1))
  2988  						})
  2989  				})
  2990  			})
  2991  
  2992  			Context("failover", func() {
  2993  				It("follower should step up as leader upon failover", func() {
  2994  					network.stop(1)
  2995  					network.elect(2)
  2996  
  2997  					By("order envelope on new leader")
  2998  					c2.cutter.CutNext = true
  2999  					err := c2.Order(env, 0)
  3000  					Expect(err).NotTo(HaveOccurred())
  3001  
  3002  					// block should not be produced on chain 1
  3003  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3004  
  3005  					// block should be produced on chain 2 & 3
  3006  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3007  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3008  
  3009  					By("order envelope on follower")
  3010  					err = c3.Order(env, 0)
  3011  					Expect(err).NotTo(HaveOccurred())
  3012  
  3013  					// block should not be produced on chain 1
  3014  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3015  
  3016  					// block should be produced on chain 2 & 3
  3017  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  3018  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  3019  				})
  3020  
  3021  				It("follower cannot be elected if its log is not up-to-date", func() {
  3022  					network.disconnect(2)
  3023  
  3024  					c1.cutter.CutNext = true
  3025  					err := c1.Order(env, 0)
  3026  					Expect(err).NotTo(HaveOccurred())
  3027  
  3028  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3029  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3030  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3031  
  3032  					network.disconnect(1)
  3033  					network.connect(2)
  3034  
  3035  					// node 2 has not caught up with other nodes
  3036  					for tick := 0; tick < 2*ELECTION_TICK-1; tick++ {
  3037  						c2.clock.Increment(interval)
  3038  						Consistently(c2.observe).ShouldNot(Receive(Equal(2)))
  3039  					}
  3040  
  3041  					// When PreVote is enabled, node 2 would fail to collect enough
  3042  					// PreVote because its index is not up-to-date. Therefore, it
  3043  					// does not cause leader change on other nodes.
  3044  					Consistently(c3.observe).ShouldNot(Receive())
  3045  					network.elect(3) // node 3 has newest logs among 2&3, so it can be elected
  3046  				})
  3047  
  3048  				It("PreVote prevents reconnected node from disturbing network", func() {
  3049  					network.disconnect(2)
  3050  
  3051  					c1.cutter.CutNext = true
  3052  					err := c1.Order(env, 0)
  3053  					Expect(err).NotTo(HaveOccurred())
  3054  
  3055  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3056  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3057  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3058  
  3059  					network.connect(2)
  3060  
  3061  					for tick := 0; tick < 2*ELECTION_TICK-1; tick++ {
  3062  						c2.clock.Increment(interval)
  3063  						Consistently(c2.observe).ShouldNot(Receive(Equal(2)))
  3064  					}
  3065  
  3066  					Consistently(c1.observe).ShouldNot(Receive())
  3067  					Consistently(c3.observe).ShouldNot(Receive())
  3068  				})
  3069  
  3070  				It("follower can catch up and then campaign with success", func() {
  3071  					network.disconnect(2)
  3072  
  3073  					c1.cutter.CutNext = true
  3074  					for i := 0; i < 10; i++ {
  3075  						err := c1.Order(env, 0)
  3076  						Expect(err).NotTo(HaveOccurred())
  3077  					}
  3078  
  3079  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3080  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3081  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3082  
  3083  					network.join(2, false)
  3084  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3085  
  3086  					network.disconnect(1)
  3087  					network.elect(2)
  3088  				})
  3089  
  3090  				It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() {
  3091  					// enqueue one transaction into 1's blockcutter to test for purging of block cutter
  3092  					c1.cutter.CutNext = false
  3093  					err := c1.Order(env, 0)
  3094  					Expect(err).NotTo(HaveOccurred())
  3095  					Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  3096  
  3097  					// no block should be written because env is not cut into block yet
  3098  					c1.clock.WaitForNWatchersAndIncrement(interval, 2)
  3099  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  3100  
  3101  					network.disconnect(1)
  3102  					network.elect(2)
  3103  					network.join(1, true)
  3104  
  3105  					Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped
  3106  					Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0))
  3107  					// the created block should be discarded since there is a leadership change
  3108  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  3109  
  3110  					network.disconnect(2)
  3111  					network.elect(1)
  3112  
  3113  					err = c1.Order(env, 0)
  3114  					Expect(err).NotTo(HaveOccurred())
  3115  
  3116  					// The following group of assertions is redundant - it's here for completeness.
  3117  					// If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing.
  3118  					// If the blockcucter has been reset, fast-forwarding won't do anything.
  3119  					//
  3120  					// Put differently:
  3121  					//
  3122  					// correct:
  3123  					// stop         start                      fire
  3124  					// |--------------|---------------------------|
  3125  					//    n*intervals              timeout
  3126  					// (advanced in election)
  3127  					//
  3128  					// wrong:
  3129  					// unstop                   fire
  3130  					// |---------------------------|
  3131  					//          timeout
  3132  					//
  3133  					//              timeout-n*interval   n*interval
  3134  					//                 |-----------|----------------|
  3135  					//                             ^                ^
  3136  					//                at this point of time     it should fire
  3137  					//                timer should not fire     at this point
  3138  
  3139  					c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2)
  3140  					Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  3141  					Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  3142  
  3143  					c1.clock.Increment(interval)
  3144  					Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  3145  					Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  3146  				})
  3147  
  3148  				It("stale leader should not be able to propose block because of lagged term", func() {
  3149  					network.disconnect(1)
  3150  					network.elect(2)
  3151  					network.connect(1)
  3152  
  3153  					c1.cutter.CutNext = true
  3154  					err := c1.Order(env, 0)
  3155  					Expect(err).NotTo(HaveOccurred())
  3156  
  3157  					network.exec(
  3158  						func(c *chain) {
  3159  							Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
  3160  						})
  3161  				})
  3162  
  3163  				It("aborts waiting for block to be committed upon leadership lost", func() {
  3164  					network.disconnect(1)
  3165  
  3166  					c1.cutter.CutNext = true
  3167  					err := c1.Order(env, 0)
  3168  					Expect(err).NotTo(HaveOccurred())
  3169  
  3170  					network.exec(
  3171  						func(c *chain) {
  3172  							Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
  3173  						})
  3174  
  3175  					network.elect(2)
  3176  					network.connect(1)
  3177  
  3178  					c2.clock.Increment(interval)
  3179  					// this check guarantees that signal on resignC is consumed in commitBatches method.
  3180  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower})))
  3181  				})
  3182  			})
  3183  		})
  3184  	})
  3185  })
  3186  
  3187  func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode {
  3188  	var nodes []cluster.RemoteNode
  3189  	for i, consenter := range consenterMetadata.Consenters {
  3190  		// For now, skip ourselves
  3191  		if i == 0 {
  3192  			continue
  3193  		}
  3194  		serverDER, _ := pem.Decode(consenter.ServerTlsCert)
  3195  		clientDER, _ := pem.Decode(consenter.ClientTlsCert)
  3196  		node := cluster.RemoteNode{
  3197  			ID:            uint64(i + 1),
  3198  			Endpoint:      "localhost:7050",
  3199  			ServerTLSCert: serverDER.Bytes,
  3200  			ClientTLSCert: clientDER.Bytes,
  3201  		}
  3202  		nodes = append(nodes, node)
  3203  	}
  3204  	return nodes
  3205  }
  3206  
  3207  func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata {
  3208  	md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{
  3209  		TickInterval:      time.Duration(interval).String(),
  3210  		ElectionTick:      ELECTION_TICK,
  3211  		HeartbeatTick:     HEARTBEAT_TICK,
  3212  		MaxInflightBlocks: 5,
  3213  	}}
  3214  	for i := 0; i < nodeCount; i++ {
  3215  		md.Consenters = append(md.Consenters, &raftprotos.Consenter{
  3216  			Host:          "localhost",
  3217  			Port:          7050,
  3218  			ServerTlsCert: serverTLSCert(tlsCA),
  3219  			ClientTlsCert: clientTLSCert(tlsCA),
  3220  		})
  3221  	}
  3222  	return md
  3223  }
  3224  
  3225  func serverTLSCert(tlsCA tlsgen.CA) []byte {
  3226  	cert, err := tlsCA.NewServerCertKeyPair("localhost")
  3227  	if err != nil {
  3228  		panic(err)
  3229  	}
  3230  	return cert.Cert
  3231  }
  3232  
  3233  func clientTLSCert(tlsCA tlsgen.CA) []byte {
  3234  	cert, err := tlsCA.NewClientCertKeyPair()
  3235  	if err != nil {
  3236  		panic(err)
  3237  	}
  3238  	return cert.Cert
  3239  }
  3240  
  3241  // marshalOrPanic serializes a protobuf message and panics if this
  3242  // operation fails
  3243  func marshalOrPanic(pb proto.Message) []byte {
  3244  	data, err := proto.Marshal(pb)
  3245  	if err != nil {
  3246  		panic(err)
  3247  	}
  3248  	return data
  3249  }
  3250  
  3251  // helpers to facilitate tests
  3252  type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error
  3253  
  3254  type chain struct {
  3255  	id uint64
  3256  
  3257  	stepLock sync.Mutex
  3258  	step     stepFunc
  3259  
  3260  	// msgBuffer serializes ingress messages for a chain
  3261  	// so they are delivered in the same order
  3262  	msgBuffer chan *msg
  3263  
  3264  	support      *consensusmocks.FakeConsenterSupport
  3265  	cutter       *mockblockcutter.Receiver
  3266  	configurator *mocks.FakeConfigurator
  3267  	rpc          *mocks.FakeRPC
  3268  	storage      *raft.MemoryStorage
  3269  	clock        *fakeclock.FakeClock
  3270  	opts         etcdraft.Options
  3271  	puller       *mocks.FakeBlockPuller
  3272  
  3273  	// store written blocks to be returned by mock block puller
  3274  	ledgerLock            sync.RWMutex
  3275  	ledger                map[uint64]*common.Block
  3276  	ledgerHeight          uint64
  3277  	lastConfigBlockNumber uint64
  3278  
  3279  	observe   chan raft.SoftState
  3280  	unstarted chan struct{}
  3281  	stopped   chan struct{}
  3282  
  3283  	fakeFields *fakeMetricsFields
  3284  
  3285  	*etcdraft.Chain
  3286  
  3287  	cryptoProvider bccsp.BCCSP
  3288  }
  3289  
  3290  type msg struct {
  3291  	req    *orderer.ConsensusRequest
  3292  	sender uint64
  3293  }
  3294  
  3295  func newChain(
  3296  	timeout time.Duration,
  3297  	channel, dataDir string,
  3298  	id uint64,
  3299  	raftMetadata *raftprotos.BlockMetadata,
  3300  	consenters map[uint64]*raftprotos.Consenter,
  3301  	cryptoProvider bccsp.BCCSP,
  3302  	support *consensusmocks.FakeConsenterSupport,
  3303  ) *chain {
  3304  	rpc := &mocks.FakeRPC{}
  3305  	clock := fakeclock.NewFakeClock(time.Now())
  3306  	storage := raft.NewMemoryStorage()
  3307  
  3308  	fakeFields := newFakeMetricsFields()
  3309  
  3310  	opts := etcdraft.Options{
  3311  		RaftID:              uint64(id),
  3312  		Clock:               clock,
  3313  		TickInterval:        interval,
  3314  		ElectionTick:        ELECTION_TICK,
  3315  		HeartbeatTick:       HEARTBEAT_TICK,
  3316  		MaxSizePerMsg:       1024 * 1024,
  3317  		MaxInflightBlocks:   256,
  3318  		BlockMetadata:       raftMetadata,
  3319  		LeaderCheckInterval: 500 * time.Millisecond,
  3320  		Consenters:          consenters,
  3321  		Logger:              flogging.NewFabricLogger(zap.NewExample()),
  3322  		MemoryStorage:       storage,
  3323  		WALDir:              path.Join(dataDir, "wal"),
  3324  		SnapDir:             path.Join(dataDir, "snapshot"),
  3325  		Metrics:             newFakeMetrics(fakeFields),
  3326  	}
  3327  
  3328  	if support == nil {
  3329  		support = &consensusmocks.FakeConsenterSupport{}
  3330  		support.ChannelIDReturns(channel)
  3331  		support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
  3332  	}
  3333  	cutter := mockblockcutter.NewReceiver()
  3334  	close(cutter.Block)
  3335  	support.BlockCutterReturns(cutter)
  3336  
  3337  	// upon leader change, lead is reset to 0 before set to actual
  3338  	// new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be
  3339  	// sent on this chan, so we need size to be 2
  3340  	observe := make(chan raft.SoftState, 2)
  3341  
  3342  	configurator := &mocks.FakeConfigurator{}
  3343  	puller := &mocks.FakeBlockPuller{}
  3344  
  3345  	ch := make(chan struct{})
  3346  	close(ch)
  3347  
  3348  	c := &chain{
  3349  		id:           id,
  3350  		support:      support,
  3351  		cutter:       cutter,
  3352  		rpc:          rpc,
  3353  		storage:      storage,
  3354  		observe:      observe,
  3355  		clock:        clock,
  3356  		opts:         opts,
  3357  		unstarted:    ch,
  3358  		stopped:      make(chan struct{}),
  3359  		configurator: configurator,
  3360  		puller:       puller,
  3361  		ledger: map[uint64]*common.Block{
  3362  			0: getSeedBlock(), // Very first block
  3363  		},
  3364  		ledgerHeight:   1,
  3365  		fakeFields:     fakeFields,
  3366  		cryptoProvider: cryptoProvider,
  3367  		msgBuffer:      make(chan *msg, 500),
  3368  	}
  3369  
  3370  	// receives normal blocks and metadata and appends it into
  3371  	// the ledger struct to simulate write behaviour
  3372  	appendNormalBlockToLedger := func(b *common.Block, meta []byte) {
  3373  		c.ledgerLock.Lock()
  3374  		defer c.ledgerLock.Unlock()
  3375  
  3376  		b = proto.Clone(b).(*common.Block)
  3377  		bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  3378  		Expect(err).NotTo(HaveOccurred())
  3379  		b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  3380  
  3381  		lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber})
  3382  		b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{
  3383  			Value: lastConfigValue,
  3384  		})
  3385  
  3386  		c.ledger[b.Header.Number] = b
  3387  		if c.ledgerHeight < b.Header.Number+1 {
  3388  			c.ledgerHeight = b.Header.Number + 1
  3389  		}
  3390  	}
  3391  
  3392  	// receives config blocks and metadata and appends it into
  3393  	// the ledger struct to simulate write behaviour
  3394  	appendConfigBlockToLedger := func(b *common.Block, meta []byte) {
  3395  		c.ledgerLock.Lock()
  3396  		defer c.ledgerLock.Unlock()
  3397  
  3398  		b = proto.Clone(b).(*common.Block)
  3399  		bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  3400  		Expect(err).NotTo(HaveOccurred())
  3401  		b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  3402  
  3403  		c.lastConfigBlockNumber = b.Header.Number
  3404  
  3405  		lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber})
  3406  		b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{
  3407  			Value: lastConfigValue,
  3408  		})
  3409  
  3410  		c.ledger[b.Header.Number] = b
  3411  		if c.ledgerHeight < b.Header.Number+1 {
  3412  			c.ledgerHeight = b.Header.Number + 1
  3413  		}
  3414  	}
  3415  
  3416  	c.support.WriteBlockStub = appendNormalBlockToLedger
  3417  	c.support.WriteConfigBlockStub = appendConfigBlockToLedger
  3418  
  3419  	// returns current ledger height
  3420  	c.support.HeightStub = func() uint64 {
  3421  		c.ledgerLock.RLock()
  3422  		defer c.ledgerLock.RUnlock()
  3423  		return c.ledgerHeight
  3424  	}
  3425  
  3426  	// reads block from the ledger
  3427  	c.support.BlockStub = func(number uint64) *common.Block {
  3428  		c.ledgerLock.RLock()
  3429  		defer c.ledgerLock.RUnlock()
  3430  		return c.ledger[number]
  3431  	}
  3432  
  3433  	// consume ingress messages for chain
  3434  	go func() {
  3435  		for msg := range c.msgBuffer {
  3436  			c.Consensus(msg.req, msg.sender)
  3437  		}
  3438  	}()
  3439  
  3440  	return c
  3441  }
  3442  
  3443  func (c *chain) init() {
  3444  	ch, err := etcdraft.NewChain(
  3445  		c.support,
  3446  		c.opts,
  3447  		c.configurator,
  3448  		c.rpc,
  3449  		c.cryptoProvider,
  3450  		func() (etcdraft.BlockPuller, error) { return c.puller, nil },
  3451  		nil,
  3452  		c.observe,
  3453  	)
  3454  	Expect(err).NotTo(HaveOccurred())
  3455  	c.Chain = ch
  3456  }
  3457  
  3458  func (c *chain) start() {
  3459  	c.unstarted = nil
  3460  	c.Start()
  3461  }
  3462  
  3463  func (c *chain) setStepFunc(f stepFunc) {
  3464  	c.stepLock.Lock()
  3465  	c.step = f
  3466  	c.stepLock.Unlock()
  3467  }
  3468  
  3469  func (c *chain) getStepFunc() stepFunc {
  3470  	c.stepLock.Lock()
  3471  	defer c.stepLock.Unlock()
  3472  	return c.step
  3473  }
  3474  
  3475  type network struct {
  3476  	sync.RWMutex
  3477  
  3478  	leader uint64
  3479  	chains map[uint64]*chain
  3480  
  3481  	// links simulates the configuration of comm layer (link is bi-directional).
  3482  	// if links[left][right] == true, right can send msg to left.
  3483  	links map[uint64]map[uint64]bool
  3484  	// connectivity determines if a node is connected to network. This is used for tests
  3485  	// to simulate network partition.
  3486  	connectivity map[uint64]bool
  3487  }
  3488  
  3489  func (n *network) link(from []uint64, to uint64) {
  3490  	links := make(map[uint64]bool)
  3491  	for _, id := range from {
  3492  		links[id] = true
  3493  	}
  3494  
  3495  	n.Lock()
  3496  	defer n.Unlock()
  3497  
  3498  	n.links[to] = links
  3499  }
  3500  
  3501  func (n *network) linked(from, to uint64) bool {
  3502  	n.RLock()
  3503  	defer n.RUnlock()
  3504  
  3505  	return n.links[to][from]
  3506  }
  3507  
  3508  func (n *network) connect(id uint64) {
  3509  	n.Lock()
  3510  	defer n.Unlock()
  3511  
  3512  	n.connectivity[id] = true
  3513  }
  3514  
  3515  func (n *network) disconnect(id uint64) {
  3516  	n.Lock()
  3517  	defer n.Unlock()
  3518  
  3519  	n.connectivity[id] = false
  3520  }
  3521  
  3522  func (n *network) connected(id uint64) bool {
  3523  	n.RLock()
  3524  	defer n.RUnlock()
  3525  
  3526  	return n.connectivity[id]
  3527  }
  3528  
  3529  func (n *network) addChain(c *chain) {
  3530  	n.connect(c.id) // chain is connected by default
  3531  
  3532  	c.step = func(dest uint64, req *orderer.ConsensusRequest) error {
  3533  		if !n.linked(c.id, dest) {
  3534  			return errors.Errorf("connection refused")
  3535  		}
  3536  
  3537  		if !n.connected(c.id) || !n.connected(dest) {
  3538  			return errors.Errorf("connection lost")
  3539  		}
  3540  
  3541  		n.RLock()
  3542  		target := n.chains[dest]
  3543  		n.RUnlock()
  3544  		target.msgBuffer <- &msg{req: req, sender: c.id}
  3545  		return nil
  3546  	}
  3547  
  3548  	c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error {
  3549  		c.stepLock.Lock()
  3550  		defer c.stepLock.Unlock()
  3551  		return c.step(dest, msg)
  3552  	}
  3553  
  3554  	c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest) error {
  3555  		if !n.linked(c.id, dest) {
  3556  			return errors.Errorf("connection refused")
  3557  		}
  3558  
  3559  		if !n.connected(c.id) || !n.connected(dest) {
  3560  			return errors.Errorf("connection lost")
  3561  		}
  3562  
  3563  		n.RLock()
  3564  		target := n.chains[dest]
  3565  		n.RUnlock()
  3566  		go func() {
  3567  			defer GinkgoRecover()
  3568  			target.Submit(msg, c.id)
  3569  		}()
  3570  		return nil
  3571  	}
  3572  
  3573  	c.puller.PullBlockStub = func(i uint64) *common.Block {
  3574  		n.RLock()
  3575  		leaderChain := n.chains[n.leader]
  3576  		n.RUnlock()
  3577  
  3578  		leaderChain.ledgerLock.RLock()
  3579  		defer leaderChain.ledgerLock.RUnlock()
  3580  		block := leaderChain.ledger[i]
  3581  		return block
  3582  	}
  3583  
  3584  	c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) {
  3585  		n.RLock()
  3586  		leader := n.chains[n.leader]
  3587  		n.RUnlock()
  3588  
  3589  		if leader == nil {
  3590  			return nil, errors.Errorf("ledger not available")
  3591  		}
  3592  
  3593  		leader.ledgerLock.RLock()
  3594  		defer leader.ledgerLock.RUnlock()
  3595  		return map[string]uint64{"leader": leader.ledgerHeight}, nil
  3596  	}
  3597  
  3598  	c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) {
  3599  		var ids []uint64
  3600  		for _, node := range nodes {
  3601  			ids = append(ids, node.ID)
  3602  		}
  3603  		n.link(ids, c.id)
  3604  	})
  3605  
  3606  	n.Lock()
  3607  	defer n.Unlock()
  3608  	n.chains[c.id] = c
  3609  }
  3610  
  3611  func createNetwork(
  3612  	timeout time.Duration,
  3613  	channel, dataDir string,
  3614  	raftMetadata *raftprotos.BlockMetadata,
  3615  	consenters map[uint64]*raftprotos.Consenter,
  3616  	cryptoProvider bccsp.BCCSP,
  3617  	tlsCA tlsgen.CA,
  3618  ) *network {
  3619  	n := &network{
  3620  		chains:       make(map[uint64]*chain),
  3621  		connectivity: make(map[uint64]bool),
  3622  		links:        make(map[uint64]map[uint64]bool),
  3623  	}
  3624  
  3625  	for _, nodeID := range raftMetadata.ConsenterIds {
  3626  		dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID))
  3627  		Expect(err).NotTo(HaveOccurred())
  3628  
  3629  		m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata)
  3630  		support := &consensusmocks.FakeConsenterSupport{}
  3631  		support.ChannelIDReturns(channel)
  3632  		support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
  3633  		mockOrdererConfig := mockOrdererWithTLSRootCert(timeout, nil, tlsCA)
  3634  		support.SharedConfigReturns(mockOrdererConfig)
  3635  		n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider, support))
  3636  	}
  3637  
  3638  	return n
  3639  }
  3640  
  3641  // tests could alter configuration of a chain before creating it
  3642  func (n *network) init() {
  3643  	n.exec(func(c *chain) { c.init() })
  3644  }
  3645  
  3646  func (n *network) start(ids ...uint64) {
  3647  	nodes := ids
  3648  	if len(nodes) == 0 {
  3649  		for i := range n.chains {
  3650  			nodes = append(nodes, i)
  3651  		}
  3652  	}
  3653  
  3654  	for _, id := range nodes {
  3655  		n.chains[id].start()
  3656  
  3657  		// When the Raft node bootstraps, it produces a ConfChange
  3658  		// to add itself, which needs to be consumed with Ready().
  3659  		// If there are pending configuration changes in raft,
  3660  		// it refused to campaign, no matter how many ticks supplied.
  3661  		// This is not a problem in production code because eventually
  3662  		// raft.Ready will be consumed as real time goes by.
  3663  		//
  3664  		// However, this is problematic when using fake clock and artificial
  3665  		// ticks. Instead of ticking raft indefinitely until raft.Ready is
  3666  		// consumed, this check is added to indirectly guarantee
  3667  		// that first ConfChange is actually consumed and we can safely
  3668  		// proceed to tick raft.
  3669  		Eventually(func() error {
  3670  			_, err := n.chains[id].storage.Entries(1, 1, 1)
  3671  			return err
  3672  		}, LongEventualTimeout).ShouldNot(HaveOccurred())
  3673  		Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred())
  3674  	}
  3675  }
  3676  
  3677  func (n *network) stop(ids ...uint64) {
  3678  	nodes := ids
  3679  	if len(nodes) == 0 {
  3680  		for i := range n.chains {
  3681  			nodes = append(nodes, i)
  3682  		}
  3683  	}
  3684  
  3685  	for _, id := range nodes {
  3686  		c := n.chains[id]
  3687  		c.Halt()
  3688  		Eventually(c.Errored).Should(BeClosed())
  3689  		select {
  3690  		case <-c.stopped:
  3691  		default:
  3692  			close(c.stopped)
  3693  		}
  3694  	}
  3695  }
  3696  
  3697  func (n *network) exec(f func(c *chain), ids ...uint64) {
  3698  	if len(ids) == 0 {
  3699  		for _, c := range n.chains {
  3700  			f(c)
  3701  		}
  3702  
  3703  		return
  3704  	}
  3705  
  3706  	for _, i := range ids {
  3707  		f(n.chains[i])
  3708  	}
  3709  }
  3710  
  3711  // connect a node to network and tick leader to trigger
  3712  // a heartbeat so newly joined node can detect leader.
  3713  //
  3714  // expectLeaderChange controls whether leader change should
  3715  // be observed on newly joined node.
  3716  // - it should be true if newly joined node was leader
  3717  // - it should be false if newly joined node was follower, and
  3718  //   already knows the leader.
  3719  func (n *network) join(id uint64, expectLeaderChange bool) {
  3720  	n.connect(id)
  3721  
  3722  	n.RLock()
  3723  	leader, follower := n.chains[n.leader], n.chains[id]
  3724  	n.RUnlock()
  3725  
  3726  	step := leader.getStepFunc()
  3727  	signal := make(chan struct{})
  3728  	leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  3729  		if dest == id {
  3730  			// close signal channel when a message targeting newly
  3731  			// joined node is observed on wire.
  3732  			select {
  3733  			case <-signal:
  3734  			default:
  3735  				close(signal)
  3736  			}
  3737  		}
  3738  
  3739  		return step(dest, msg)
  3740  	})
  3741  
  3742  	// Tick leader so it sends out a heartbeat to new node.
  3743  	// One tick _may_ not be enough because leader might be busy
  3744  	// and this tick is droppped on the floor.
  3745  	Eventually(func() <-chan struct{} {
  3746  		leader.clock.Increment(interval)
  3747  		return signal
  3748  	}, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed())
  3749  
  3750  	leader.setStepFunc(step)
  3751  
  3752  	if expectLeaderChange {
  3753  		Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower})))
  3754  	}
  3755  
  3756  	// wait for newly joined node to catch up with leader
  3757  	i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex()
  3758  	Expect(err).NotTo(HaveOccurred())
  3759  	Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i))
  3760  }
  3761  
  3762  // elect deterministically elects a node as leader
  3763  func (n *network) elect(id uint64) {
  3764  	n.RLock()
  3765  	// skip observing leader change on followers if the same leader is elected as the previous one,
  3766  	// because this may happen too quickly from a slow follower's point of view, and 0 -> X transition
  3767  	// may not be omitted at all.
  3768  	observeFollowers := id != n.leader
  3769  	candidate := n.chains[id]
  3770  	var followers []*chain
  3771  	for _, c := range n.chains {
  3772  		if c.id != id {
  3773  			followers = append(followers, c)
  3774  		}
  3775  	}
  3776  	n.RUnlock()
  3777  
  3778  	// Send node an artificial MsgTimeoutNow to emulate leadership transfer.
  3779  	fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id)
  3780  	candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: id})}, 0)
  3781  	Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader)))
  3782  
  3783  	n.Lock()
  3784  	n.leader = id
  3785  	n.Unlock()
  3786  
  3787  	if !observeFollowers {
  3788  		return
  3789  	}
  3790  
  3791  	// now observe leader change on other nodes
  3792  	for _, c := range followers {
  3793  		if c.id == id {
  3794  			continue
  3795  		}
  3796  
  3797  		select {
  3798  		case <-c.stopped: // skip check if node n is stopped
  3799  		case <-c.unstarted: // skip check if node is not started yet
  3800  		default:
  3801  			if n.linked(c.id, id) && n.connected(c.id) {
  3802  				Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower)))
  3803  			}
  3804  		}
  3805  	}
  3806  
  3807  }
  3808  
  3809  // sets the configEnv var declared above
  3810  func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope {
  3811  	return &common.Envelope{
  3812  		Payload: marshalOrPanic(&common.Payload{
  3813  			Header: &common.Header{
  3814  				ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  3815  					Type:      int32(headerType),
  3816  					ChannelId: chainID,
  3817  				}),
  3818  			},
  3819  			Data: marshalOrPanic(&common.ConfigEnvelope{
  3820  				LastUpdate: &common.Envelope{
  3821  					Payload: marshalOrPanic(&common.Payload{
  3822  						Header: &common.Header{
  3823  							ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  3824  								Type:      int32(common.HeaderType_CONFIG_UPDATE),
  3825  								ChannelId: chainID,
  3826  							}),
  3827  						},
  3828  						Data: marshalOrPanic(configUpdateEnv),
  3829  					}), // common.Payload
  3830  				}, // LastUpdate
  3831  			}),
  3832  		}),
  3833  	}
  3834  }
  3835  
  3836  func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope {
  3837  	return &common.ConfigUpdateEnvelope{
  3838  		ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{
  3839  			ChannelId: chainID,
  3840  			ReadSet: &common.ConfigGroup{
  3841  				Groups: map[string]*common.ConfigGroup{
  3842  					"Orderer": {
  3843  						Values: oldValues,
  3844  					},
  3845  				},
  3846  			},
  3847  			WriteSet: &common.ConfigGroup{
  3848  				Groups: map[string]*common.ConfigGroup{
  3849  					"Orderer": {
  3850  						Values: newValues,
  3851  					},
  3852  				},
  3853  			}, // WriteSet
  3854  		}),
  3855  	}
  3856  }
  3857  
  3858  func getSeedBlock() *common.Block {
  3859  	return &common.Block{
  3860  		Header:   &common.BlockHeader{},
  3861  		Data:     &common.BlockData{Data: [][]byte{[]byte("foo")}},
  3862  		Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)},
  3863  	}
  3864  }
  3865  
  3866  func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher {
  3867  	return Equal(raft.SoftState{Lead: lead, RaftState: state})
  3868  }
  3869  
  3870  func BeFollower() types.GomegaMatcher {
  3871  	return &StateMatcher{expect: raft.StateFollower}
  3872  }
  3873  
  3874  type StateMatcher struct {
  3875  	expect raft.StateType
  3876  }
  3877  
  3878  func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) {
  3879  	state, ok := actual.(raft.SoftState)
  3880  	if !ok {
  3881  		return false, errors.Errorf("StateMatcher expects a raft SoftState")
  3882  	}
  3883  
  3884  	return state.RaftState == stmatcher.expect, nil
  3885  }
  3886  
  3887  func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) {
  3888  	state, ok := actual.(raft.SoftState)
  3889  	if !ok {
  3890  		return "StateMatcher expects a raft SoftState"
  3891  	}
  3892  
  3893  	return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect)
  3894  }
  3895  
  3896  func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) {
  3897  	state, ok := actual.(raft.SoftState)
  3898  	if !ok {
  3899  		return "StateMatcher expects a raft SoftState"
  3900  	}
  3901  
  3902  	return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect)
  3903  }
  3904  
  3905  func noOpBlockPuller() (etcdraft.BlockPuller, error) {
  3906  	bp := &mocks.FakeBlockPuller{}
  3907  	return bp, nil
  3908  }